Test Failed
Push — master ( 2eb953...f148c7 )
by Dispositif
08:32
created

ExternConverterTrait::convertDate()   B

Complexity

Conditions 8
Paths 6

Size

Total Lines 29
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 72

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 8
eloc 15
c 4
b 0
f 0
nc 6
nop 1
dl 0
loc 29
ccs 0
cts 0
cp 0
crap 72
rs 8.4444
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe/Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    /**
29
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
30
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
31 5
     *
32 2
     * @param $data
33
     *
34 5
     * @return string|null
35 1
     */
36
    protected function convertURLaccess($data): ?string
37 4
    {
38 1
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
39
        if (isset($data['og:article:content_tier'])) {
40
            switch (strtolower($data['og:article:content_tier'])) {
41 4
                case 'free':
42
                    return 'libre';
43
                case 'locked':
44
                    return 'payant';
45
                case 'metered':
46
                    return 'limité';
47
            }
48
        }
49
50
        // NYT, Figaro
51
        // Todo : Si pas libre => limité ou payant ?
52
        if (isset($data['isAccessibleForFree'])) {
53
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
54 5
        }
55
56 5
        if (isset($data['DC.rights'])) {
57 3
            if (in_array(strtolower($data['DC.rights']), ['free', 'public domain', 'domaine public'])) {
58
                return 'libre';
59
            }
60
        }
61 2
62 1
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
63
        // "	Information about who access the resource or an indication of its security status."
64
        // Values are a mystery...
65 1
        if (isset($data['DC.accessRights'])) {
66 1
            if (in_array(
67
                strtolower($data['DC.accessRights']),
68
                [
69
                    'free',
70
                    'public domain',
71
                    'public',
72 5
                    'domaine public',
73
                    'available',
74 5
                ]
75 2
            )
76 2
            ) {
77 1
                return 'libre';
78
            }
79
        }
80 2
81
        return null;
82
    }
83 3
84
    protected function sameAsTrue($str = null): bool
85
    {
86
        if ($str === null) {
87 5
            return false;
88
        }
89 5
        if (is_bool($str)) {
90 5
            return $str;
91
        }
92 5
        $str = strtolower($str);
93 5
        if (in_array($str, ['true', '1', 'yes', 'oui', 'ok'])) {
94
            return true;
95 5
        }
96
97
        return false;
98
    }
99
100
    /**
101
     * Réduit le nombre d'auteurs si > 3.
102
     * En $modeEtAll=true vérification pour "et al.=oui".
103 5
     * TODO : wikifyPressAgency()
104
     *
105
     * @param string|null $authors
106 5
     * @param bool        $modeEtAl
107
     *
108
     * @return string|null
109 5
     */
110
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
111 5
    {
112
        if (empty($authors)) {
113
            return null;
114
        }
115 5
        // conserve juste les 2 premiers auteurs TODO : refactor
116
        // Bob, Martin ; Yul, Bar ; ... ; ...
117
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
118 5
            return ($modeEtAl) ? 'oui' : $matches[1];
119
        }
120
        // Bob Martin, Yul Bar, ..., ...,...
121
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
122 5
            return ($modeEtAl) ? 'oui' : $matches[1];
123
        }
124
125
        return ($modeEtAl) ? null : $authors;
126
    }
127
128
    protected function convertDCpage(array $meta): ?string
129
    {
130
        if (isset($meta['citation_firstpage'])) {
131 5
            $page = $meta['citation_firstpage'];
132
            if (isset($meta['citation_lastpage'])) {
133 5
                $page .= '–'.$meta['citation_lastpage'];
134 2
            }
135
136
            return (string)$page;
137 3
        }
138 2
139
        return null;
140
    }
141 1
142
    public function cleanAuthor(?string $str = null): ?string
143
    {
144 3
        if ($str === null) {
145
            return null;
146
        }
147 3
        $str = $this->clean($str);
148
        // "https://www.facebook.com/search/top/?q=..."
149
        if (preg_match('#^https?://.+#i', $str)) {
150
            return null;
151
        }
152 3
        // "Par Bob"
153 3
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
154 3
            return $matches[1];
155
        }
156 3
157
        return $str;
158
    }
159
160
    /**
161
     * Note : à appliquer AVANT wikification (sinon bug sur | )
162
     *
163
     * @param string|null $str
164
     *
165
     * @return string|null
166 3
     */
167 2
    public function clean(?string $str = null): ?string
168 3
    {
169
        if ($str === null) {
170 2
            return null;
171 2
        }
172
        $str = $this->stripEmailAdress($str);
173
174
        $str = str_replace(
175
            [
176
                '|',
177
                "\n",
178 3
                "\t",
179
                "\r",
180
                '&#x27;',
181 3
                '&#39;',
182
                '&#039;',
183 3
                '&apos;',
184 3
                "\n",
185
                "&#10;",
186
                "&eacute;",
187
                '©',
188
                '{{',
189 3
                '}}',
190
                '[[',
191
                ']]',
192
            ],
193
            [
194
                '/',
195
                ' ',
196
                ' ',
197
                '',
198 5
                "’",
199
                "'",
200 5
                "'",
201
                "'",
202
                '',
203
                ' ',
204
                "é",
205 5
                '',
206
                '',
207
                '',
208
                '',
209
                '',
210 5
            ],
211
            $str
212
        );
213
214
        $str = html_entity_decode($str);
215
        $str = strip_tags($str);
216
217 5
        if (strlen($str) >= 30 && isset($this->titleFromHtmlState) && $this->titleFromHtmlState) {
218
            $str .= "<!-- Vérifiez ce titre -->";
219
        }
220
221
        return $str;
222
    }
223
224
    public function stripEmailAdress(?string $str = null): ?string
225
    {
226
        if ($str === null) {
227 5
            return null;
228
        }
229 5
230 3
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
231
    }
232
233 4
    protected function convertOGtype2format(?string $ogType): ?string
234
    {
235
        if (empty($ogType)) {
236 4
            return null;
237 4
        }
238 4
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
239 4
        if (strpos($ogType, 'video') !== false) {
240 4
            return 'vidéo';
241 4
        }
242 4
        if (strpos($ogType, 'book') !== false) {
243 4
            return 'livre';
244
        }
245 4
246
        return null;
247
    }
248
249
    /**
250
     * https://developers.facebook.com/docs/internationalization#locales
251
     * @param string|null $lang
252
     *
253
     * @return string|null
254
     */
255
    protected function convertLangue(?string $lang = null): ?string
256
    {
257
        if (empty($lang)) {
258
            return null;
259
        }
260
        // en_GB
261
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
262
            return $matches[1];
263
        }
264
265
        return Language::all2wiki($lang);
266
    }
267
268
    protected function convertAuteur($data, $indice)
269
    {
270
        // author=Bob
271
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
272
            return html_entity_decode($data['author']);
273
        }
274
275
        // author ['name'=>'Bob','@type'=>'Person']
276
        if (0 === $indice
277
            && isset($data['author'])
278
            && isset($data['author']['name'])
279
            && (!isset($data['author']['@type'])
280
                || 'Person' === $data['author']['@type'])
281
        ) {
282
            if (is_string($data['author']['name'])) {
283
                return html_entity_decode($data['author']['name']);
284
            }
285
286
            return html_entity_decode($data['author']['name'][0]);
287
        }
288
289
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
290
        if (isset($data['author']) && isset($data['author'][$indice])
291
            && (!isset($data['author'][$indice]['@type'])
292
                || 'Person' === $data['author'][$indice]['@type'])
293
        ) {
294
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
295
                return html_entity_decode($data['author'][$indice]['name']);
296
            }
297
298
            // "author" => [ "@type" => "Person", "name" => [] ]
299
            if (isset($data['author'][$indice]['name'][0])) {
300
                return html_entity_decode($data['author'][$indice]['name'][0]);
301
            }
302
        }
303
304
        return null;
305
    }
306
307
    protected function convertInstitutionnel($data)
308
    {
309
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
310
            && 'Person' !== $data['author'][0]['@type']
311
        ) {
312
            return html_entity_decode($data['author'][0]['name']);
313
        }
314
315
        return null;
316
    }
317
318
    /**
319
     * todo move to generalize as utility
320
     *
321
     * @param string $str
322
     *
323
     * @return string
324
     * @throws Exception
325
     */
326
    protected function convertDate(?string $str): ?string
327
    {
328
        if (empty($str)) {
329
            return null;
330
        }
331
        $str = str_replace(' 00:00:00', '', $str);
332
        $str = str_replace('/', '-', $str);
333
334
        // "2012"
335
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
336
            return $str;
337
        }
338
        // "1775-1783" (Gallica)
339
        if (preg_match('#^[12][0-9]{3}-[12][0-9]{3}$#', $str)) {
340
            return $str;
341
        }
342
343
        try {
344
            $date = new DateTime($str);
345
        } catch (Exception $e) {
346
            // 23/11/2015 00:00:00
347
            if (isset($this) && isset($this->log) && method_exists($this->log, 'notice')) {
348
                $this->log->notice('EXCEPTION DATE');
349
            }
350
351
            return '<!-- '.$str.' -->';
352
        }
353
354
        return $date->format('d-m-Y');
355
    }
356
357
    /**
358
     * Wikification des noms/acronymes d'agences de presse.
359
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
360
     *
361
     * @param string $str
362
     *
363
     * @return string
364
     */
365
    protected function wikifyPressAgency(?string $str): ?string
366
    {
367
        if (empty($str)) {
368
            return null;
369
        }
370
        // skip potential wikilinks
371
        if (strpos($str, '[') !== false) {
372
            return $str;
373
        }
374
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
375
        $str = str_replace('Reuters', '[[Reuters]]', $str);
376
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
377
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
378
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
379
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
380
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
381
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
382
383
        return $str;
384
    }
385
386
    /**
387
     * Add "note=" parameter/value for human information.
388
     */
389
    private function addNote()
390
    {
391
        return null;
392
    }
393
}
394