Passed
Push — master ( dff8a4...2556d0 )
by Dispositif
08:19
created

ExternConverterTrait::convertURLaccess()   B

Complexity

Conditions 11
Paths 13

Size

Total Lines 41
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 11.0359

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
eloc 21
c 1
b 0
f 0
nc 13
nop 1
dl 0
loc 41
ccs 14
cts 15
cp 0.9333
crap 11.0359
rs 7.3166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Publisher;
11
12
use App\Domain\Enums\Language;
13
use DateTime;
14
use Exception;
15
16
trait ExternConverterTrait
17
{
18
    protected function isAnArticle(?string $str): bool
19 5
    {
20
        return in_array($str, ['article', 'journalArticle']);
21 5
    }
22 5
23
    /**
24
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
25
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
26
     *
27
     * @param $data
28 5
     *
29
     * @return string|null
30
     */
31 5
    protected function convertURLaccess($data): ?string
32 2
    {
33
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
34 5
        if (isset($data['og:article:content_tier'])) {
35 1
            switch (strtolower($data['og:article:content_tier'])) {
36
                case 'free':
37 4
                    return 'libre';
38 1
                case 'locked':
39
                    return 'payant';
40
                case 'metered':
41 4
                    return 'limité';
42
            }
43
        }
44
45
        // NYT, Figaro
46
        // Todo : Si pas libre => limité ou payant ?
47
        if (isset($data['isAccessibleForFree'])) {
48
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
49
        }
50
51
        if (isset($data['DC.rights']) && in_array(strtolower($data['DC.rights']), ['free', 'public domain', 'domaine public'])) {
52
            return 'libre';
53
        }
54 5
55
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
56 5
        // "	Information about who access the resource or an indication of its security status."
57 3
        // Values are a mystery...
58
        if (isset($data['DC.accessRights']) && in_array(
59
                strtolower($data['DC.accessRights']),
60
                [
61 2
                    'free',
62 1
                    'public domain',
63
                    'public',
64
                    'domaine public',
65 1
                    'available',
66 1
                ]
67
            )) {
68
            return 'libre';
69
        }
70
71
        return null;
72 5
    }
73
74 5
    protected function sameAsTrue($str = null): bool
75 2
    {
76 2
        if ($str === null) {
77 1
            return false;
78
        }
79
        if (is_bool($str)) {
80 2
            return $str;
81
        }
82
        $str = strtolower($str);
83 3
        return in_array($str, ['true', '1', 'yes', 'oui', 'ok']);
84
    }
85
86
    /**
87 5
     * Réduit le nombre d'auteurs si > 3.
88
     * En $modeEtAll=true vérification pour "et al.=oui".
89 5
     * TODO : wikifyPressAgency()
90 5
     */
91
    protected function authorsEtAl(?string $authors, bool $modeEtAl = false): ?string
92 5
    {
93 5
        if (empty($authors)) {
94
            return null;
95 5
        }
96
        // conserve juste les 2 premiers auteurs TODO : refactor
97
        // Bob, Martin ; Yul, Bar ; ... ; ...
98
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
99
            return ($modeEtAl) ? 'oui' : $matches[1];
100
        }
101
        // Bob Martin, Yul Bar, ..., ...,...
102
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
103 5
            return ($modeEtAl) ? 'oui' : $matches[1];
104
        }
105
106 5
        return ($modeEtAl) ? null : $authors;
107
    }
108
109 5
    protected function convertDCpage(array $meta): ?string
110
    {
111 5
        if (isset($meta['citation_firstpage'])) {
112
            $page = $meta['citation_firstpage'];
113
            if (isset($meta['citation_lastpage'])) {
114
                $page .= '–' . $meta['citation_lastpage'];
115 5
            }
116
117
            return (string)$page;
118 5
        }
119
120
        return null;
121
    }
122 5
123
    public function cleanAuthor(?string $str = null): ?string
124
    {
125
        if ($str === null) {
126
            return null;
127
        }
128
        $str = $this->clean($str);
129
        // "https://www.facebook.com/search/top/?q=..."
130
        if (preg_match('#^https?://.+#i', $str)) {
131 5
            return null;
132
        }
133 5
        // "Par Bob"
134 2
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
135
            return $matches[1];
136
        }
137 3
138 2
        return $str;
139
    }
140
141 1
    /**
142
     * Note : à appliquer AVANT wikification (sinon bug sur | )
143
     *
144 3
     * @param string|null $str
145
     *
146
     * @return string|null
147 3
     */
148
    public function clean(?string $str = null): ?string
149
    {
150
        if ($str === null) {
151
            return null;
152 3
        }
153 3
        $str = $this->stripEmailAdress($str);
154 3
155
        $str = str_replace(
156 3
            [
157
                '|',
158
                "\n",
159
                "\t",
160
                "\r",
161
                '&#x27;',
162
                '&#39;',
163
                '&#039;',
164
                '&apos;',
165
                "\n",
166 3
                "&#10;",
167 2
                "&eacute;",
168 3
                '©',
169
                '{{',
170 2
                '}}',
171 2
                '[[',
172
                ']]',
173
            ],
174
            [
175
                '/',
176
                ' ',
177
                ' ',
178 3
                '',
179
                "’",
180
                "'",
181 3
                "'",
182
                "'",
183 3
                '',
184 3
                ' ',
185
                "é",
186
                '',
187
                '',
188
                '',
189 3
                '',
190
                '',
191
            ],
192
            $str
193
        );
194
195
        $str = html_entity_decode($str);
196
        $str = strip_tags($str);
197
198 5
        return trim($str);
199
    }
200 5
201
    public function stripEmailAdress(?string $str = null): ?string
202
    {
203
        if ($str === null) {
204
            return null;
205 5
        }
206
207
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
208
    }
209
210 5
    protected function convertOGtype2format(?string $ogType): ?string
211
    {
212
        if (empty($ogType)) {
213
            return null;
214
        }
215
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
216
        if (strpos($ogType, 'video') !== false) {
217 5
            return 'vidéo';
218
        }
219
        if (strpos($ogType, 'book') !== false) {
220
            return 'livre';
221
        }
222
223
        return null;
224
    }
225
226
    /**
227 5
     * https://developers.facebook.com/docs/internationalization#locales
228
     * @param string|null $lang
229 5
     *
230 3
     * @return string|null
231
     */
232
    protected function convertLangue(?string $lang = null): ?string
233 4
    {
234
        if (empty($lang)) {
235
            return null;
236 4
        }
237 4
        // en_GB
238 4
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
239 4
            return $matches[1];
240 4
        }
241 4
242 4
        return Language::all2wiki($lang);
243 4
    }
244
245 4
    protected function convertAuteur($data, $indice): ?string
246
    {
247
        // author=Bob
248
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
249
            return html_entity_decode($data['author']);
250
        }
251
252
        // author ['name'=>'Bob','@type'=>'Person']
253
        if (0 === $indice
254
            && isset($data['author'])
255
            && isset($data['author']['name'])
256
            && (!isset($data['author']['@type'])
257
                || 'Person' === $data['author']['@type'])
258
        ) {
259
            if (is_string($data['author']['name'])) {
260
                return html_entity_decode($data['author']['name']);
261
            }
262
263
            return html_entity_decode($data['author']['name'][0]);
264
        }
265
266
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
267
        if (isset($data['author']) && isset($data['author'][$indice])
268
            && (!isset($data['author'][$indice]['@type'])
269
                || 'Person' === $data['author'][$indice]['@type'])
270
        ) {
271
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
272
                return html_entity_decode($data['author'][$indice]['name']);
273
            }
274
275
            // "author" => [ "@type" => "Person", "name" => [] ]
276
            if (isset($data['author'][$indice]['name'][0])) {
277
                return html_entity_decode($data['author'][$indice]['name'][0]);
278
            }
279
        }
280
281
        return null;
282
    }
283
284
    protected function convertInstitutionnel($data): ?string
285
    {
286
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
287
            && 'Person' !== $data['author'][0]['@type']
288
        ) {
289
            return html_entity_decode($data['author'][0]['name']);
290
        }
291
292
        return null;
293
    }
294
295
    /**
296
     * todo move to generalize as utility
297
     * @throws Exception
298
     */
299
    protected function convertDate(?string $str): ?string
300
    {
301
        if (empty($str)) {
302
            return null;
303
        }
304
        $str = str_replace(' 00:00:00', '', $str);
305
        $str = str_replace('/', '-', $str);
306
307
        // "2012"
308
        if (preg_match('#^[12]\d{3}$#', $str)) {
309
            return $str;
310
        }
311
        // "1775-1783" (Gallica)
312
        if (preg_match('#^[12]\d{3}-[12]\d{3}$#', $str)) {
313
            return $str;
314
        }
315
316
        try {
317
            $date = new DateTime($str);
318
        } catch (Exception $e) {
319
            // 23/11/2015 00:00:00
320
            if (isset($this->log) && method_exists($this->log, 'notice')) {
321
                $this->log->notice('EXCEPTION DATE');
322
            }
323
324
            return '<!-- ' . $str . ' -->';
325
        }
326
327
        return $date->format('d-m-Y');
328
    }
329
330
    /**
331
     * Wikification des noms/acronymes d'agences de presse.
332
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
333
     */
334
    protected function wikifyPressAgency(?string $str): ?string
335
    {
336
        if (empty($str)) {
337
            return null;
338
        }
339
        // skip potential wikilinks
340
        if (strpos($str, '[') !== false) {
341
            return $str;
342
        }
343
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
344
        $str = str_replace('Reuters', '[[Reuters]]', $str);
345
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
346
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
347
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
348
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
349
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
350
351
        return preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
352
    }
353
}
354