Test Failed
Push — master ( 2eb953...f148c7 )
by Dispositif
08:32
created

ExternConverterTrait::clean()   B

Complexity

Conditions 5
Paths 3

Size

Total Lines 55
Code Lines 42

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5.9256

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 5
eloc 42
c 3
b 0
f 0
nc 3
nop 1
dl 0
loc 55
ccs 14
cts 21
cp 0.6667
crap 5.9256
rs 8.9368

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe/Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    /**
29
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
30
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
31 5
     *
32 2
     * @param $data
33
     *
34 5
     * @return string|null
35 1
     */
36
    protected function convertURLaccess($data): ?string
37 4
    {
38 1
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
39
        if (isset($data['og:article:content_tier'])) {
40
            switch (strtolower($data['og:article:content_tier'])) {
41 4
                case 'free':
42
                    return 'libre';
43
                case 'locked':
44
                    return 'payant';
45
                case 'metered':
46
                    return 'limité';
47
            }
48
        }
49
50
        // NYT, Figaro
51
        // Todo : Si pas libre => limité ou payant ?
52
        if (isset($data['isAccessibleForFree'])) {
53
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
54 5
        }
55
56 5
        if (isset($data['DC.rights'])) {
57 3
            if (in_array(strtolower($data['DC.rights']), ['free', 'public domain', 'domaine public'])) {
58
                return 'libre';
59
            }
60
        }
61 2
62 1
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
63
        // "	Information about who access the resource or an indication of its security status."
64
        // Values are a mystery...
65 1
        if (isset($data['DC.accessRights'])) {
66 1
            if (in_array(
67
                strtolower($data['DC.accessRights']),
68
                [
69
                    'free',
70
                    'public domain',
71
                    'public',
72 5
                    'domaine public',
73
                    'available',
74 5
                ]
75 2
            )
76 2
            ) {
77 1
                return 'libre';
78
            }
79
        }
80 2
81
        return null;
82
    }
83 3
84
    protected function sameAsTrue($str = null): bool
85
    {
86
        if ($str === null) {
87 5
            return false;
88
        }
89 5
        if (is_bool($str)) {
90 5
            return $str;
91
        }
92 5
        $str = strtolower($str);
93 5
        if (in_array($str, ['true', '1', 'yes', 'oui', 'ok'])) {
94
            return true;
95 5
        }
96
97
        return false;
98
    }
99
100
    /**
101
     * Réduit le nombre d'auteurs si > 3.
102
     * En $modeEtAll=true vérification pour "et al.=oui".
103 5
     * TODO : wikifyPressAgency()
104
     *
105
     * @param string|null $authors
106 5
     * @param bool        $modeEtAl
107
     *
108
     * @return string|null
109 5
     */
110
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
111 5
    {
112
        if (empty($authors)) {
113
            return null;
114
        }
115 5
        // conserve juste les 2 premiers auteurs TODO : refactor
116
        // Bob, Martin ; Yul, Bar ; ... ; ...
117
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
118 5
            return ($modeEtAl) ? 'oui' : $matches[1];
119
        }
120
        // Bob Martin, Yul Bar, ..., ...,...
121
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
122 5
            return ($modeEtAl) ? 'oui' : $matches[1];
123
        }
124
125
        return ($modeEtAl) ? null : $authors;
126
    }
127
128
    protected function convertDCpage(array $meta): ?string
129
    {
130
        if (isset($meta['citation_firstpage'])) {
131 5
            $page = $meta['citation_firstpage'];
132
            if (isset($meta['citation_lastpage'])) {
133 5
                $page .= '–'.$meta['citation_lastpage'];
134 2
            }
135
136
            return (string)$page;
137 3
        }
138 2
139
        return null;
140
    }
141 1
142
    public function cleanAuthor(?string $str = null): ?string
143
    {
144 3
        if ($str === null) {
145
            return null;
146
        }
147 3
        $str = $this->clean($str);
148
        // "https://www.facebook.com/search/top/?q=..."
149
        if (preg_match('#^https?://.+#i', $str)) {
150
            return null;
151
        }
152 3
        // "Par Bob"
153 3
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
154 3
            return $matches[1];
155
        }
156 3
157
        return $str;
158
    }
159
160
    /**
161
     * Note : à appliquer AVANT wikification (sinon bug sur | )
162
     *
163
     * @param string|null $str
164
     *
165
     * @return string|null
166 3
     */
167 2
    public function clean(?string $str = null): ?string
168 3
    {
169
        if ($str === null) {
170 2
            return null;
171 2
        }
172
        $str = $this->stripEmailAdress($str);
173
174
        $str = str_replace(
175
            [
176
                '|',
177
                "\n",
178 3
                "\t",
179
                "\r",
180
                '&#x27;',
181 3
                '&#39;',
182
                '&#039;',
183 3
                '&apos;',
184 3
                "\n",
185
                "&#10;",
186
                "&eacute;",
187
                '©',
188
                '{{',
189 3
                '}}',
190
                '[[',
191
                ']]',
192
            ],
193
            [
194
                '/',
195
                ' ',
196
                ' ',
197
                '',
198 5
                "’",
199
                "'",
200 5
                "'",
201
                "'",
202
                '',
203
                ' ',
204
                "é",
205 5
                '',
206
                '',
207
                '',
208
                '',
209
                '',
210 5
            ],
211
            $str
212
        );
213
214
        $str = html_entity_decode($str);
215
        $str = strip_tags($str);
216
217 5
        if (strlen($str) >= 30 && isset($this->titleFromHtmlState) && $this->titleFromHtmlState) {
218
            $str .= "<!-- Vérifiez ce titre -->";
219
        }
220
221
        return $str;
222
    }
223
224
    public function stripEmailAdress(?string $str = null): ?string
225
    {
226
        if ($str === null) {
227 5
            return null;
228
        }
229 5
230 3
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
231
    }
232
233 4
    protected function convertOGtype2format(?string $ogType): ?string
234
    {
235
        if (empty($ogType)) {
236 4
            return null;
237 4
        }
238 4
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
239 4
        if (strpos($ogType, 'video') !== false) {
240 4
            return 'vidéo';
241 4
        }
242 4
        if (strpos($ogType, 'book') !== false) {
243 4
            return 'livre';
244
        }
245 4
246
        return null;
247
    }
248
249
    /**
250
     * https://developers.facebook.com/docs/internationalization#locales
251
     * @param string|null $lang
252
     *
253
     * @return string|null
254
     */
255
    protected function convertLangue(?string $lang = null): ?string
256
    {
257
        if (empty($lang)) {
258
            return null;
259
        }
260
        // en_GB
261
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
262
            return $matches[1];
263
        }
264
265
        return Language::all2wiki($lang);
266
    }
267
268
    protected function convertAuteur($data, $indice)
269
    {
270
        // author=Bob
271
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
272
            return html_entity_decode($data['author']);
273
        }
274
275
        // author ['name'=>'Bob','@type'=>'Person']
276
        if (0 === $indice
277
            && isset($data['author'])
278
            && isset($data['author']['name'])
279
            && (!isset($data['author']['@type'])
280
                || 'Person' === $data['author']['@type'])
281
        ) {
282
            if (is_string($data['author']['name'])) {
283
                return html_entity_decode($data['author']['name']);
284
            }
285
286
            return html_entity_decode($data['author']['name'][0]);
287
        }
288
289
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
290
        if (isset($data['author']) && isset($data['author'][$indice])
291
            && (!isset($data['author'][$indice]['@type'])
292
                || 'Person' === $data['author'][$indice]['@type'])
293
        ) {
294
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
295
                return html_entity_decode($data['author'][$indice]['name']);
296
            }
297
298
            // "author" => [ "@type" => "Person", "name" => [] ]
299
            if (isset($data['author'][$indice]['name'][0])) {
300
                return html_entity_decode($data['author'][$indice]['name'][0]);
301
            }
302
        }
303
304
        return null;
305
    }
306
307
    protected function convertInstitutionnel($data)
308
    {
309
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
310
            && 'Person' !== $data['author'][0]['@type']
311
        ) {
312
            return html_entity_decode($data['author'][0]['name']);
313
        }
314
315
        return null;
316
    }
317
318
    /**
319
     * todo move to generalize as utility
320
     *
321
     * @param string $str
322
     *
323
     * @return string
324
     * @throws Exception
325
     */
326
    protected function convertDate(?string $str): ?string
327
    {
328
        if (empty($str)) {
329
            return null;
330
        }
331
        $str = str_replace(' 00:00:00', '', $str);
332
        $str = str_replace('/', '-', $str);
333
334
        // "2012"
335
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
336
            return $str;
337
        }
338
        // "1775-1783" (Gallica)
339
        if (preg_match('#^[12][0-9]{3}-[12][0-9]{3}$#', $str)) {
340
            return $str;
341
        }
342
343
        try {
344
            $date = new DateTime($str);
345
        } catch (Exception $e) {
346
            // 23/11/2015 00:00:00
347
            if (isset($this) && isset($this->log) && method_exists($this->log, 'notice')) {
348
                $this->log->notice('EXCEPTION DATE');
349
            }
350
351
            return '<!-- '.$str.' -->';
352
        }
353
354
        return $date->format('d-m-Y');
355
    }
356
357
    /**
358
     * Wikification des noms/acronymes d'agences de presse.
359
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
360
     *
361
     * @param string $str
362
     *
363
     * @return string
364
     */
365
    protected function wikifyPressAgency(?string $str): ?string
366
    {
367
        if (empty($str)) {
368
            return null;
369
        }
370
        // skip potential wikilinks
371
        if (strpos($str, '[') !== false) {
372
            return $str;
373
        }
374
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
375
        $str = str_replace('Reuters', '[[Reuters]]', $str);
376
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
377
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
378
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
379
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
380
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
381
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
382
383
        return $str;
384
    }
385
386
    /**
387
     * Add "note=" parameter/value for human information.
388
     */
389
    private function addNote()
390
    {
391
        return null;
392
    }
393
}
394