Passed
Push — master ( dff8a4...2556d0 )
by Dispositif
08:19
created

ExternConverterTrait::cleanSEOTitle()   A

Complexity

Conditions 5
Paths 2

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 9.664

Importance

Changes 1
Bugs 1 Features 0
Metric Value
cc 5
eloc 7
c 1
b 1
f 0
nc 2
nop 2
dl 0
loc 14
ccs 3
cts 7
cp 0.4286
crap 9.664
rs 9.6111
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Publisher;
11
12
use App\Domain\Enums\Language;
13
use DateTime;
14
use Exception;
15
16
trait ExternConverterTrait
17
{
18
    protected function isAnArticle(?string $str): bool
19 5
    {
20
        return in_array($str, ['article', 'journalArticle']);
21 5
    }
22 5
23
    /**
24
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
25
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
26
     *
27
     * @param $data
28 5
     *
29
     * @return string|null
30
     */
31 5
    protected function convertURLaccess($data): ?string
32 2
    {
33
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
34 5
        if (isset($data['og:article:content_tier'])) {
35 1
            switch (strtolower($data['og:article:content_tier'])) {
36
                case 'free':
37 4
                    return 'libre';
38 1
                case 'locked':
39
                    return 'payant';
40
                case 'metered':
41 4
                    return 'limité';
42
            }
43
        }
44
45
        // NYT, Figaro
46
        // Todo : Si pas libre => limité ou payant ?
47
        if (isset($data['isAccessibleForFree'])) {
48
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
49
        }
50
51
        if (isset($data['DC.rights']) && in_array(strtolower($data['DC.rights']), ['free', 'public domain', 'domaine public'])) {
52
            return 'libre';
53
        }
54 5
55
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
56 5
        // "	Information about who access the resource or an indication of its security status."
57 3
        // Values are a mystery...
58
        if (isset($data['DC.accessRights']) && in_array(
59
                strtolower($data['DC.accessRights']),
60
                [
61 2
                    'free',
62 1
                    'public domain',
63
                    'public',
64
                    'domaine public',
65 1
                    'available',
66 1
                ]
67
            )) {
68
            return 'libre';
69
        }
70
71
        return null;
72 5
    }
73
74 5
    protected function sameAsTrue($str = null): bool
75 2
    {
76 2
        if ($str === null) {
77 1
            return false;
78
        }
79
        if (is_bool($str)) {
80 2
            return $str;
81
        }
82
        $str = strtolower($str);
83 3
        return in_array($str, ['true', '1', 'yes', 'oui', 'ok']);
84
    }
85
86
    /**
87 5
     * Réduit le nombre d'auteurs si > 3.
88
     * En $modeEtAll=true vérification pour "et al.=oui".
89 5
     * TODO : wikifyPressAgency()
90 5
     */
91
    protected function authorsEtAl(?string $authors, bool $modeEtAl = false): ?string
92 5
    {
93 5
        if (empty($authors)) {
94
            return null;
95 5
        }
96
        // conserve juste les 2 premiers auteurs TODO : refactor
97
        // Bob, Martin ; Yul, Bar ; ... ; ...
98
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
99
            return ($modeEtAl) ? 'oui' : $matches[1];
100
        }
101
        // Bob Martin, Yul Bar, ..., ...,...
102
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
103 5
            return ($modeEtAl) ? 'oui' : $matches[1];
104
        }
105
106 5
        return ($modeEtAl) ? null : $authors;
107
    }
108
109 5
    protected function convertDCpage(array $meta): ?string
110
    {
111 5
        if (isset($meta['citation_firstpage'])) {
112
            $page = $meta['citation_firstpage'];
113
            if (isset($meta['citation_lastpage'])) {
114
                $page .= '–' . $meta['citation_lastpage'];
115 5
            }
116
117
            return (string)$page;
118 5
        }
119
120
        return null;
121
    }
122 5
123
    public function cleanAuthor(?string $str = null): ?string
124
    {
125
        if ($str === null) {
126
            return null;
127
        }
128
        $str = $this->clean($str);
129
        // "https://www.facebook.com/search/top/?q=..."
130
        if (preg_match('#^https?://.+#i', $str)) {
131 5
            return null;
132
        }
133 5
        // "Par Bob"
134 2
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
135
            return $matches[1];
136
        }
137 3
138 2
        return $str;
139
    }
140
141 1
    /**
142
     * Note : à appliquer AVANT wikification (sinon bug sur | )
143
     *
144 3
     * @param string|null $str
145
     *
146
     * @return string|null
147 3
     */
148
    public function clean(?string $str = null): ?string
149
    {
150
        if ($str === null) {
151
            return null;
152 3
        }
153 3
        $str = $this->stripEmailAdress($str);
154 3
155
        $str = str_replace(
156 3
            [
157
                '|',
158
                "\n",
159
                "\t",
160
                "\r",
161
                '&#x27;',
162
                '&#39;',
163
                '&#039;',
164
                '&apos;',
165
                "\n",
166 3
                "&#10;",
167 2
                "&eacute;",
168 3
                '©',
169
                '{{',
170 2
                '}}',
171 2
                '[[',
172
                ']]',
173
            ],
174
            [
175
                '/',
176
                ' ',
177
                ' ',
178 3
                '',
179
                "’",
180
                "'",
181 3
                "'",
182
                "'",
183 3
                '',
184 3
                ' ',
185
                "é",
186
                '',
187
                '',
188
                '',
189 3
                '',
190
                '',
191
            ],
192
            $str
193
        );
194
195
        $str = html_entity_decode($str);
196
        $str = strip_tags($str);
197
198 5
        return trim($str);
199
    }
200 5
201
    public function stripEmailAdress(?string $str = null): ?string
202
    {
203
        if ($str === null) {
204
            return null;
205 5
        }
206
207
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
208
    }
209
210 5
    protected function convertOGtype2format(?string $ogType): ?string
211
    {
212
        if (empty($ogType)) {
213
            return null;
214
        }
215
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
216
        if (strpos($ogType, 'video') !== false) {
217 5
            return 'vidéo';
218
        }
219
        if (strpos($ogType, 'book') !== false) {
220
            return 'livre';
221
        }
222
223
        return null;
224
    }
225
226
    /**
227 5
     * https://developers.facebook.com/docs/internationalization#locales
228
     * @param string|null $lang
229 5
     *
230 3
     * @return string|null
231
     */
232
    protected function convertLangue(?string $lang = null): ?string
233 4
    {
234
        if (empty($lang)) {
235
            return null;
236 4
        }
237 4
        // en_GB
238 4
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
239 4
            return $matches[1];
240 4
        }
241 4
242 4
        return Language::all2wiki($lang);
243 4
    }
244
245 4
    protected function convertAuteur($data, $indice): ?string
246
    {
247
        // author=Bob
248
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
249
            return html_entity_decode($data['author']);
250
        }
251
252
        // author ['name'=>'Bob','@type'=>'Person']
253
        if (0 === $indice
254
            && isset($data['author'])
255
            && isset($data['author']['name'])
256
            && (!isset($data['author']['@type'])
257
                || 'Person' === $data['author']['@type'])
258
        ) {
259
            if (is_string($data['author']['name'])) {
260
                return html_entity_decode($data['author']['name']);
261
            }
262
263
            return html_entity_decode($data['author']['name'][0]);
264
        }
265
266
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
267
        if (isset($data['author']) && isset($data['author'][$indice])
268
            && (!isset($data['author'][$indice]['@type'])
269
                || 'Person' === $data['author'][$indice]['@type'])
270
        ) {
271
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
272
                return html_entity_decode($data['author'][$indice]['name']);
273
            }
274
275
            // "author" => [ "@type" => "Person", "name" => [] ]
276
            if (isset($data['author'][$indice]['name'][0])) {
277
                return html_entity_decode($data['author'][$indice]['name'][0]);
278
            }
279
        }
280
281
        return null;
282
    }
283
284
    protected function convertInstitutionnel($data): ?string
285
    {
286
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
287
            && 'Person' !== $data['author'][0]['@type']
288
        ) {
289
            return html_entity_decode($data['author'][0]['name']);
290
        }
291
292
        return null;
293
    }
294
295
    /**
296
     * todo move to generalize as utility
297
     * @throws Exception
298
     */
299
    protected function convertDate(?string $str): ?string
300
    {
301
        if (empty($str)) {
302
            return null;
303
        }
304
        $str = str_replace(' 00:00:00', '', $str);
305
        $str = str_replace('/', '-', $str);
306
307
        // "2012"
308
        if (preg_match('#^[12]\d{3}$#', $str)) {
309
            return $str;
310
        }
311
        // "1775-1783" (Gallica)
312
        if (preg_match('#^[12]\d{3}-[12]\d{3}$#', $str)) {
313
            return $str;
314
        }
315
316
        try {
317
            $date = new DateTime($str);
318
        } catch (Exception $e) {
319
            // 23/11/2015 00:00:00
320
            if (isset($this->log) && method_exists($this->log, 'notice')) {
321
                $this->log->notice('EXCEPTION DATE');
322
            }
323
324
            return '<!-- ' . $str . ' -->';
325
        }
326
327
        return $date->format('d-m-Y');
328
    }
329
330
    /**
331
     * Wikification des noms/acronymes d'agences de presse.
332
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
333
     */
334
    protected function wikifyPressAgency(?string $str): ?string
335
    {
336
        if (empty($str)) {
337
            return null;
338
        }
339
        // skip potential wikilinks
340
        if (strpos($str, '[') !== false) {
341
            return $str;
342
        }
343
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
344
        $str = str_replace('Reuters', '[[Reuters]]', $str);
345
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
346
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
347
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
348
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
349
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
350
351
        return preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
352
    }
353
}
354