Passed
Push — master ( 7c3be8...26e53d )
by Dispositif
08:12
created

ExternConverterTrait::convertURLaccess()   B

Complexity

Conditions 11
Paths 21

Size

Total Lines 36
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 11.0908

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
eloc 17
c 1
b 0
f 0
nc 21
nop 1
dl 0
loc 36
ccs 10
cts 11
cp 0.9091
crap 11.0908
rs 7.3166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    /**
29
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
30
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
31 5
     *
32 2
     * @param $data
33
     *
34 5
     * @return string|null
35 1
     */
36
    protected function convertURLaccess($data): ?string
37 4
    {
38 1
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
39
        if (isset($data['og:article:content_tier'])) {
40
            switch (strtolower($data['og:article:content_tier'])) {
41 4
                case 'free':
42
                    return 'libre';
43
                case 'locked':
44
                    return 'payant';
45
                case 'metered':
46
                    return 'limité';
47
            }
48
        }
49
50
        // NYT, Figaro
51
        // Todo : Si pas libre => limité ou payant ?
52
        if (isset($data['isAccessibleForFree'])) {
53
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
54 5
        }
55
56 5
        if (isset($data['DC.rights'])) {
57 3
            if (in_array(strtolower($data['DC.rights']), ['free', 'public domain'])) {
58
                return 'libre';
59
            }
60
        }
61 2
62 1
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
63
        // "	Information about who access the resource or an indication of its security status."
64
        // Values are a mystery...
65 1
        if (isset($data['DC.accessRights'])) {
66 1
            if (in_array(strtolower($data['DC.accessRights']), ['free', 'public domain', 'public', 'available'])) {
67
                return 'libre';
68
            }
69
        }
70
71
        return null;
72 5
    }
73
74 5
    protected function sameAsTrue($str = null): bool
75 2
    {
76 2
        if ($str === null) {
77 1
            return false;
78
        }
79
        if (is_bool($str)) {
80 2
            return $str;
81
        }
82
        $str = strtolower($str);
83 3
        if (in_array($str, ['true', '1', 'yes', 'oui', 'ok'])) {
84
            return true;
85
        }
86
87 5
        return false;
88
    }
89 5
90 5
    /**
91
     * Réduit le nombre d'auteurs si > 3.
92 5
     * En $modeEtAll=true vérification pour "et al.=oui".
93 5
     * TODO : wikifyPressAgency()
94
     *
95 5
     * @param string|null $authors
96
     * @param bool        $modeEtAl
97
     *
98
     * @return string|null
99
     */
100
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
101
    {
102
        if (empty($authors)) {
103 5
            return null;
104
        }
105
        // conserve juste les 2 premiers auteurs TODO : refactor
106 5
        // Bob, Martin ; Yul, Bar ; ... ; ...
107
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
108
            return ($modeEtAl) ? 'oui' : $matches[1];
109 5
        }
110
        // Bob Martin, Yul Bar, ..., ...,...
111 5
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
112
            return ($modeEtAl) ? 'oui' : $matches[1];
113
        }
114
115 5
        return ($modeEtAl) ? null : $authors;
116
    }
117
118 5
    protected function convertDCpage(array $meta): ?string
119
    {
120
        if (isset($meta['citation_firstpage'])) {
121
            $page = $meta['citation_firstpage'];
122 5
            if (isset($meta['citation_lastpage'])) {
123
                $page .= '–'.$meta['citation_lastpage'];
124
            }
125
126
            return (string)$page;
127
        }
128
129
        return null;
130
    }
131 5
132
    public function cleanAuthor(?string $str = null): ?string
133 5
    {
134 2
        if ($str === null) {
135
            return null;
136
        }
137 3
        $str = $this->clean($str);
138 2
        // "https://www.facebook.com/search/top/?q=..."
139
        if (preg_match('#^https?://.+#i', $str)) {
140
            return null;
141 1
        }
142
        // "Par Bob"
143
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
144 3
            return $matches[1];
145
        }
146
147 3
        return $str;
148
    }
149
150
    /**
151
     * Note : à appliquer AVANT wikification (sinon bug sur | )
152 3
     *
153 3
     * @param string|null $str
154 3
     *
155
     * @return string|null
156 3
     */
157
    public function clean(?string $str = null): ?string
158
    {
159
        if ($str === null) {
160
            return null;
161
        }
162
        $str = $this->stripEmailAdress($str);
163
164
        $str = str_replace(
165
            ['|', "\n", "\t", '&#x27;', '&#39;', '&#039;', '&apos;', "\n", "&#10;", "&eacute;", '©'],
166 3
            [
167 2
                '/',
168 3
                ' ',
169
                ' ',
170 2
                "’",
171 2
                "'",
172
                "'",
173
                "'",
174
                '',
175
                ' ',
176
                "é",
177
                '',
178 3
            ],
179
            $str
180
        );
181 3
182
        return html_entity_decode($str);
183 3
    }
184 3
185
    public function stripEmailAdress(?string $str = null): ?string
186
    {
187
        if ($str === null) {
188
            return null;
189 3
        }
190
191
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
192
    }
193
194
    protected function convertOGtype2format(?string $ogType): ?string
195
    {
196
        if (empty($ogType)) {
197
            return null;
198 5
        }
199
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
200 5
        if (strpos($ogType, 'video') !== false) {
201
            return 'vidéo';
202
        }
203
        if (strpos($ogType, 'book') !== false) {
204
            return 'livre';
205 5
        }
206
207
        return null;
208
    }
209
210 5
    /**
211
     * https://developers.facebook.com/docs/internationalization#locales
212
     * @param string|null $lang
213
     *
214
     * @return string|null
215
     */
216
    protected function convertLangue(?string $lang = null): ?string
217 5
    {
218
        if (empty($lang)) {
219
            return null;
220
        }
221
        // en_GB
222
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
223
            return $matches[1];
224
        }
225
226
        return Language::all2wiki($lang);
227 5
    }
228
229 5
    protected function convertAuteur($data, $indice)
230 3
    {
231
        // author=Bob
232
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
233 4
            return html_entity_decode($data['author']);
234
        }
235
236 4
        // author ['name'=>'Bob','@type'=>'Person']
237 4
        if (0 === $indice
238 4
            && isset($data['author'])
239 4
            && isset($data['author']['name'])
240 4
            && (!isset($data['author']['@type'])
241 4
                || 'Person' === $data['author']['@type'])
242 4
        ) {
243 4
            if (is_string($data['author']['name'])) {
244
                return html_entity_decode($data['author']['name']);
245 4
            }
246
247
            return html_entity_decode($data['author']['name'][0]);
248
        }
249
250
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
251
        if (isset($data['author']) && isset($data['author'][$indice])
252
            && (!isset($data['author'][$indice]['@type'])
253
                || 'Person' === $data['author'][$indice]['@type'])
254
        ) {
255
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
256
                return html_entity_decode($data['author'][$indice]['name']);
257
            }
258
259
            // "author" => [ "@type" => "Person", "name" => [] ]
260
            if (isset($data['author'][$indice]['name'][0])) {
261
                return html_entity_decode($data['author'][$indice]['name'][0]);
262
            }
263
        }
264
265
        return null;
266
    }
267
268
    protected function convertInstitutionnel($data)
269
    {
270
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
271
            && 'Person' !== $data['author'][0]['@type']
272
        ) {
273
            return html_entity_decode($data['author'][0]['name']);
274
        }
275
276
        return null;
277
    }
278
279
    /**
280
     * todo move to generalize as utility
281
     *
282
     * @param string $str
283
     *
284
     * @return string
285
     * @throws Exception
286
     */
287
    protected function convertDate(?string $str): ?string
288
    {
289
        if (empty($str)) {
290
            return null;
291
        }
292
        $str = str_replace(' 00:00:00', '', $str);
293
        $str = str_replace('/', '-', $str);
294
295
        // "2012"
296
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
297
            return $str;
298
        }
299
        // "1775-1783" (Gallica)
300
        if (preg_match('#^[12][0-9]{3}-[12][0-9]{3}$#', $str)) {
301
            return $str;
302
        }
303
304
        try {
305
            $date = new DateTime($str);
306
        } catch (Exception $e) {
307
            // 23/11/2015 00:00:00
308
            if (isset($this) && isset($this->log) && method_exists($this->log, 'notice')) {
309
                $this->log->notice('EXCEPTION DATE');
310
            }
311
312
            return '<!-- '.$str.' -->';
313
        }
314
315
        return $date->format('d-m-Y');
316
    }
317
318
    /**
319
     * Wikification des noms/acronymes d'agences de presse.
320
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
321
     *
322
     * @param string $str
323
     *
324
     * @return string
325
     */
326
    protected function wikifyPressAgency(?string $str): ?string
327
    {
328
        if (empty($str)) {
329
            return null;
330
        }
331
        // skip potential wikilinks
332
        if (strpos($str, '[') !== false) {
333
            return $str;
334
        }
335
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
336
        $str = str_replace('Reuters', '[[Reuters]]', $str);
337
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
338
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
339
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
340
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
341
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
342
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
343
344
        return $str;
345
    }
346
347
}
348