Test Failed
Push — master ( 766a39...696a12 )
by Dispositif
09:33
created

ExternConverterTrait::cleanAuthor()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 16
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 8
c 0
b 0
f 0
nc 4
nop 1
dl 0
loc 16
ccs 5
cts 5
cp 1
crap 4
rs 10
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe/Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        return in_array($str, ['article', 'journalArticle']);
22 5
    }
23
24
    /**
25
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
26
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
27
     *
28 5
     * @param $data
29
     *
30
     * @return string|null
31 5
     */
32 2
    protected function convertURLaccess($data): ?string
33
    {
34 5
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
35 1
        if (isset($data['og:article:content_tier'])) {
36
            switch (strtolower($data['og:article:content_tier'])) {
37 4
                case 'free':
38 1
                    return 'libre';
39
                case 'locked':
40
                    return 'payant';
41 4
                case 'metered':
42
                    return 'limité';
43
            }
44
        }
45
46
        // NYT, Figaro
47
        // Todo : Si pas libre => limité ou payant ?
48
        if (isset($data['isAccessibleForFree'])) {
49
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
50
        }
51
52
        if (isset($data['DC.rights']) && in_array(strtolower($data['DC.rights']), ['free', 'public domain', 'domaine public'])) {
53
            return 'libre';
54 5
        }
55
56 5
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
57 3
        // "	Information about who access the resource or an indication of its security status."
58
        // Values are a mystery...
59
        if (isset($data['DC.accessRights']) && in_array(
60
            strtolower($data['DC.accessRights']),
61 2
            [
62 1
                'free',
63
                'public domain',
64
                'public',
65 1
                'domaine public',
66 1
                'available',
67
            ]
68
        )) {
69
            return 'libre';
70
        }
71
72 5
        return null;
73
    }
74 5
75 2
    protected function sameAsTrue($str = null): bool
76 2
    {
77 1
        if ($str === null) {
78
            return false;
79
        }
80 2
        if (is_bool($str)) {
81
            return $str;
82
        }
83 3
        $str = strtolower($str);
84
        return in_array($str, ['true', '1', 'yes', 'oui', 'ok']);
85
    }
86
87 5
    /**
88
     * Réduit le nombre d'auteurs si > 3.
89 5
     * En $modeEtAll=true vérification pour "et al.=oui".
90 5
     * TODO : wikifyPressAgency()
91
     */
92 5
    protected function authorsEtAl(?string $authors, bool $modeEtAl = false): ?string
93 5
    {
94
        if (empty($authors)) {
95 5
            return null;
96
        }
97
        // conserve juste les 2 premiers auteurs TODO : refactor
98
        // Bob, Martin ; Yul, Bar ; ... ; ...
99
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
100
            return ($modeEtAl) ? 'oui' : $matches[1];
101
        }
102
        // Bob Martin, Yul Bar, ..., ...,...
103 5
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
104
            return ($modeEtAl) ? 'oui' : $matches[1];
105
        }
106 5
107
        return ($modeEtAl) ? null : $authors;
108
    }
109 5
110
    protected function convertDCpage(array $meta): ?string
111 5
    {
112
        if (isset($meta['citation_firstpage'])) {
113
            $page = $meta['citation_firstpage'];
114
            if (isset($meta['citation_lastpage'])) {
115 5
                $page .= '–'.$meta['citation_lastpage'];
116
            }
117
118 5
            return (string)$page;
119
        }
120
121
        return null;
122 5
    }
123
124
    public function cleanAuthor(?string $str = null): ?string
125
    {
126
        if ($str === null) {
127
            return null;
128
        }
129
        $str = $this->clean($str);
130
        // "https://www.facebook.com/search/top/?q=..."
131 5
        if (preg_match('#^https?://.+#i', $str)) {
132
            return null;
133 5
        }
134 2
        // "Par Bob"
135
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
136
            return $matches[1];
137 3
        }
138 2
139
        return $str;
140
    }
141 1
142
    /**
143
     * Note : à appliquer AVANT wikification (sinon bug sur | )
144 3
     *
145
     * @param string|null $str
146
     *
147 3
     * @return string|null
148
     */
149
    public function clean(?string $str = null): ?string
150
    {
151
        if ($str === null) {
152 3
            return null;
153 3
        }
154 3
        $str = $this->stripEmailAdress($str);
155
156 3
        $str = str_replace(
157
            [
158
                '|',
159
                "\n",
160
                "\t",
161
                "\r",
162
                '&#x27;',
163
                '&#39;',
164
                '&#039;',
165
                '&apos;',
166 3
                "\n",
167 2
                "&#10;",
168 3
                "&eacute;",
169
                '©',
170 2
                '{{',
171 2
                '}}',
172
                '[[',
173
                ']]',
174
            ],
175
            [
176
                '/',
177
                ' ',
178 3
                ' ',
179
                '',
180
                "’",
181 3
                "'",
182
                "'",
183 3
                "'",
184 3
                '',
185
                ' ',
186
                "é",
187
                '',
188
                '',
189 3
                '',
190
                '',
191
                '',
192
            ],
193
            $str
194
        );
195
196
        $str = html_entity_decode($str);
197
        $str = strip_tags($str);
198 5
199
        return trim($str);
200 5
    }
201
202
    public function cleanSEOTitle(?string $title = null, $url = null): ?string
0 ignored issues
show
Unused Code introduced by
The parameter $url is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

202
    public function cleanSEOTitle(?string $title = null, /** @scrutinizer ignore-unused */ $url = null): ?string

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
203
    {
204
        $cleanTitle = $this->clean($title);
205 5
206
        // TODO {titre à vérifier} + checkSEOTitle()
207
        if (strlen($cleanTitle) >= 30 && isset($this->titleFromHtmlState) && $this->titleFromHtmlState) {
0 ignored issues
show
Bug introduced by
It seems like $cleanTitle can also be of type null; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

207
        if (strlen(/** @scrutinizer ignore-type */ $cleanTitle) >= 30 && isset($this->titleFromHtmlState) && $this->titleFromHtmlState) {
Loading history...
208
            $cleanTitle .= "<!-- Vérifiez ce titre -->";
209
        }
210 5
211
        return $cleanTitle;
212
    }
213
214
    public function stripEmailAdress(?string $str = null): ?string
215
    {
216
        if ($str === null) {
217 5
            return null;
218
        }
219
220
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
221
    }
222
223
    protected function convertOGtype2format(?string $ogType): ?string
224
    {
225
        if (empty($ogType)) {
226
            return null;
227 5
        }
228
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
229 5
        if (strpos($ogType, 'video') !== false) {
230 3
            return 'vidéo';
231
        }
232
        if (strpos($ogType, 'book') !== false) {
233 4
            return 'livre';
234
        }
235
236 4
        return null;
237 4
    }
238 4
239 4
    /**
240 4
     * https://developers.facebook.com/docs/internationalization#locales
241 4
     * @param string|null $lang
242 4
     *
243 4
     * @return string|null
244
     */
245 4
    protected function convertLangue(?string $lang = null): ?string
246
    {
247
        if (empty($lang)) {
248
            return null;
249
        }
250
        // en_GB
251
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
252
            return $matches[1];
253
        }
254
255
        return Language::all2wiki($lang);
256
    }
257
258
    protected function convertAuteur($data, $indice): ?string
259
    {
260
        // author=Bob
261
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
262
            return html_entity_decode($data['author']);
263
        }
264
265
        // author ['name'=>'Bob','@type'=>'Person']
266
        if (0 === $indice
267
            && isset($data['author'])
268
            && isset($data['author']['name'])
269
            && (!isset($data['author']['@type'])
270
                || 'Person' === $data['author']['@type'])
271
        ) {
272
            if (is_string($data['author']['name'])) {
273
                return html_entity_decode($data['author']['name']);
274
            }
275
276
            return html_entity_decode($data['author']['name'][0]);
277
        }
278
279
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
280
        if (isset($data['author']) && isset($data['author'][$indice])
281
            && (!isset($data['author'][$indice]['@type'])
282
                || 'Person' === $data['author'][$indice]['@type'])
283
        ) {
284
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
285
                return html_entity_decode($data['author'][$indice]['name']);
286
            }
287
288
            // "author" => [ "@type" => "Person", "name" => [] ]
289
            if (isset($data['author'][$indice]['name'][0])) {
290
                return html_entity_decode($data['author'][$indice]['name'][0]);
291
            }
292
        }
293
294
        return null;
295
    }
296
297
    protected function convertInstitutionnel($data): ?string
298
    {
299
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
300
            && 'Person' !== $data['author'][0]['@type']
301
        ) {
302
            return html_entity_decode($data['author'][0]['name']);
303
        }
304
305
        return null;
306
    }
307
308
    /**
309
     * todo move to generalize as utility
310
     *
311
     * @throws Exception
312
     */
313
    protected function convertDate(?string $str): ?string
314
    {
315
        if (empty($str)) {
316
            return null;
317
        }
318
        $str = str_replace(' 00:00:00', '', $str);
319
        $str = str_replace('/', '-', $str);
320
321
        // "2012"
322
        if (preg_match('#^[12]\d{3}$#', $str)) {
323
            return $str;
324
        }
325
        // "1775-1783" (Gallica)
326
        if (preg_match('#^[12]\d{3}-[12]\d{3}$#', $str)) {
327
            return $str;
328
        }
329
330
        try {
331
            $date = new DateTime($str);
332
        } catch (Exception $e) {
333
            // 23/11/2015 00:00:00
334
            if (isset($this) && isset($this->log) && method_exists($this->log, 'notice')) {
335
                $this->log->notice('EXCEPTION DATE');
336
            }
337
338
            return '<!-- '.$str.' -->';
339
        }
340
341
        return $date->format('d-m-Y');
342
    }
343
344
    /**
345
     * Wikification des noms/acronymes d'agences de presse.
346
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
347
     */
348
    protected function wikifyPressAgency(?string $str): ?string
349
    {
350
        if (empty($str)) {
351
            return null;
352
        }
353
        // skip potential wikilinks
354
        if (strpos($str, '[') !== false) {
355
            return $str;
356
        }
357
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
358
        $str = str_replace('Reuters', '[[Reuters]]', $str);
359
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
360
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
361
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
362
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
363
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
364
365
        return preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
366
    }
367
368
    /**
369
     * Add "note=" parameter/value for human information.
370
     */
371
    private function addNote()
372
    {
373
        return null;
374
    }
375
}
376