Passed
Push — master ( f36d22...dff8a4 )
by Dispositif
02:58
created

ExternConverterTrait::addNote()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 1
c 0
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 0
cts 0
cp 0
crap 2
rs 10
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Publisher;
11
12
use App\Domain\Enums\Language;
13
use DateTime;
14
use Exception;
15
16
trait ExternConverterTrait
17
{
18
    protected function isAnArticle(?string $str): bool
19 5
    {
20
        return in_array($str, ['article', 'journalArticle']);
21 5
    }
22 5
23
    /**
24
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
25
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
26
     *
27
     * @param $data
28 5
     *
29
     * @return string|null
30
     */
31 5
    protected function convertURLaccess($data): ?string
32 2
    {
33
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
34 5
        if (isset($data['og:article:content_tier'])) {
35 1
            switch (strtolower($data['og:article:content_tier'])) {
36
                case 'free':
37 4
                    return 'libre';
38 1
                case 'locked':
39
                    return 'payant';
40
                case 'metered':
41 4
                    return 'limité';
42
            }
43
        }
44
45
        // NYT, Figaro
46
        // Todo : Si pas libre => limité ou payant ?
47
        if (isset($data['isAccessibleForFree'])) {
48
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
49
        }
50
51
        if (isset($data['DC.rights']) && in_array(strtolower($data['DC.rights']), ['free', 'public domain', 'domaine public'])) {
52
            return 'libre';
53
        }
54 5
55
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
56 5
        // "	Information about who access the resource or an indication of its security status."
57 3
        // Values are a mystery...
58
        if (isset($data['DC.accessRights']) && in_array(
59
                strtolower($data['DC.accessRights']),
60
                [
61 2
                    'free',
62 1
                    'public domain',
63
                    'public',
64
                    'domaine public',
65 1
                    'available',
66 1
                ]
67
            )) {
68
            return 'libre';
69
        }
70
71
        return null;
72 5
    }
73
74 5
    protected function sameAsTrue($str = null): bool
75 2
    {
76 2
        if ($str === null) {
77 1
            return false;
78
        }
79
        if (is_bool($str)) {
80 2
            return $str;
81
        }
82
        $str = strtolower($str);
83 3
        return in_array($str, ['true', '1', 'yes', 'oui', 'ok']);
84
    }
85
86
    /**
87 5
     * Réduit le nombre d'auteurs si > 3.
88
     * En $modeEtAll=true vérification pour "et al.=oui".
89 5
     * TODO : wikifyPressAgency()
90 5
     */
91
    protected function authorsEtAl(?string $authors, bool $modeEtAl = false): ?string
92 5
    {
93 5
        if (empty($authors)) {
94
            return null;
95 5
        }
96
        // conserve juste les 2 premiers auteurs TODO : refactor
97
        // Bob, Martin ; Yul, Bar ; ... ; ...
98
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
99
            return ($modeEtAl) ? 'oui' : $matches[1];
100
        }
101
        // Bob Martin, Yul Bar, ..., ...,...
102
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
103 5
            return ($modeEtAl) ? 'oui' : $matches[1];
104
        }
105
106 5
        return ($modeEtAl) ? null : $authors;
107
    }
108
109 5
    protected function convertDCpage(array $meta): ?string
110
    {
111 5
        if (isset($meta['citation_firstpage'])) {
112
            $page = $meta['citation_firstpage'];
113
            if (isset($meta['citation_lastpage'])) {
114
                $page .= '–' . $meta['citation_lastpage'];
115 5
            }
116
117
            return (string)$page;
118 5
        }
119
120
        return null;
121
    }
122 5
123
    public function cleanAuthor(?string $str = null): ?string
124
    {
125
        if ($str === null) {
126
            return null;
127
        }
128
        $str = $this->clean($str);
129
        // "https://www.facebook.com/search/top/?q=..."
130
        if (preg_match('#^https?://.+#i', $str)) {
131 5
            return null;
132
        }
133 5
        // "Par Bob"
134 2
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
135
            return $matches[1];
136
        }
137 3
138 2
        return $str;
139
    }
140
141 1
    /**
142
     * Note : à appliquer AVANT wikification (sinon bug sur | )
143
     *
144 3
     * @param string|null $str
145
     *
146
     * @return string|null
147 3
     */
148
    public function clean(?string $str = null): ?string
149
    {
150
        if ($str === null) {
151
            return null;
152 3
        }
153 3
        $str = $this->stripEmailAdress($str);
154 3
155
        $str = str_replace(
156 3
            [
157
                '|',
158
                "\n",
159
                "\t",
160
                "\r",
161
                '&#x27;',
162
                '&#39;',
163
                '&#039;',
164
                '&apos;',
165
                "\n",
166 3
                "&#10;",
167 2
                "&eacute;",
168 3
                '©',
169
                '{{',
170 2
                '}}',
171 2
                '[[',
172
                ']]',
173
            ],
174
            [
175
                '/',
176
                ' ',
177
                ' ',
178 3
                '',
179
                "’",
180
                "'",
181 3
                "'",
182
                "'",
183 3
                '',
184 3
                ' ',
185
                "é",
186
                '',
187
                '',
188
                '',
189 3
                '',
190
                '',
191
            ],
192
            $str
193
        );
194
195
        $str = html_entity_decode($str);
196
        $str = strip_tags($str);
197
198 5
        return trim($str);
199
    }
200 5
201
    /**
202
     * Naive check for SEO title.
203
     */
204
    public function cleanSEOTitle(?string $title, ?string $url = null): ?string
0 ignored issues
show
Unused Code introduced by
The parameter $url is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

204
    public function cleanSEOTitle(?string $title, /** @scrutinizer ignore-unused */ ?string $url = null): ?string

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
205 5
    {
206
        $cleanTitle = $this->clean($title);
207
208
        // TODO {titre à vérifier} + checkSEOTitle()
209
        if (
210 5
            null !== $cleanTitle
211
            && strlen($cleanTitle) >= 30
212
            && isset($this->titleFromHtmlState) && $this->titleFromHtmlState
213
        ) {
214
            $cleanTitle .= "<!-- Vérifiez ce titre -->";
215
        }
216
217 5
        return $cleanTitle;
218
    }
219
220
    public function stripEmailAdress(?string $str = null): ?string
221
    {
222
        if ($str === null) {
223
            return null;
224
        }
225
226
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
227 5
    }
228
229 5
    protected function convertOGtype2format(?string $ogType): ?string
230 3
    {
231
        if (empty($ogType)) {
232
            return null;
233 4
        }
234
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
235
        if (strpos($ogType, 'video') !== false) {
236 4
            return 'vidéo';
237 4
        }
238 4
        if (strpos($ogType, 'book') !== false) {
239 4
            return 'livre';
240 4
        }
241 4
242 4
        return null;
243 4
    }
244
245 4
    /**
246
     * https://developers.facebook.com/docs/internationalization#locales
247
     * @param string|null $lang
248
     *
249
     * @return string|null
250
     */
251
    protected function convertLangue(?string $lang = null): ?string
252
    {
253
        if (empty($lang)) {
254
            return null;
255
        }
256
        // en_GB
257
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
258
            return $matches[1];
259
        }
260
261
        return Language::all2wiki($lang);
262
    }
263
264
    protected function convertAuteur($data, $indice): ?string
265
    {
266
        // author=Bob
267
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
268
            return html_entity_decode($data['author']);
269
        }
270
271
        // author ['name'=>'Bob','@type'=>'Person']
272
        if (0 === $indice
273
            && isset($data['author'])
274
            && isset($data['author']['name'])
275
            && (!isset($data['author']['@type'])
276
                || 'Person' === $data['author']['@type'])
277
        ) {
278
            if (is_string($data['author']['name'])) {
279
                return html_entity_decode($data['author']['name']);
280
            }
281
282
            return html_entity_decode($data['author']['name'][0]);
283
        }
284
285
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
286
        if (isset($data['author']) && isset($data['author'][$indice])
287
            && (!isset($data['author'][$indice]['@type'])
288
                || 'Person' === $data['author'][$indice]['@type'])
289
        ) {
290
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
291
                return html_entity_decode($data['author'][$indice]['name']);
292
            }
293
294
            // "author" => [ "@type" => "Person", "name" => [] ]
295
            if (isset($data['author'][$indice]['name'][0])) {
296
                return html_entity_decode($data['author'][$indice]['name'][0]);
297
            }
298
        }
299
300
        return null;
301
    }
302
303
    protected function convertInstitutionnel($data): ?string
304
    {
305
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
306
            && 'Person' !== $data['author'][0]['@type']
307
        ) {
308
            return html_entity_decode($data['author'][0]['name']);
309
        }
310
311
        return null;
312
    }
313
314
    /**
315
     * todo move to generalize as utility
316
     * @throws Exception
317
     */
318
    protected function convertDate(?string $str): ?string
319
    {
320
        if (empty($str)) {
321
            return null;
322
        }
323
        $str = str_replace(' 00:00:00', '', $str);
324
        $str = str_replace('/', '-', $str);
325
326
        // "2012"
327
        if (preg_match('#^[12]\d{3}$#', $str)) {
328
            return $str;
329
        }
330
        // "1775-1783" (Gallica)
331
        if (preg_match('#^[12]\d{3}-[12]\d{3}$#', $str)) {
332
            return $str;
333
        }
334
335
        try {
336
            $date = new DateTime($str);
337
        } catch (Exception $e) {
338
            // 23/11/2015 00:00:00
339
            if (isset($this->log) && method_exists($this->log, 'notice')) {
340
                $this->log->notice('EXCEPTION DATE');
341
            }
342
343
            return '<!-- ' . $str . ' -->';
344
        }
345
346
        return $date->format('d-m-Y');
347
    }
348
349
    /**
350
     * Wikification des noms/acronymes d'agences de presse.
351
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
352
     */
353
    protected function wikifyPressAgency(?string $str): ?string
354
    {
355
        if (empty($str)) {
356
            return null;
357
        }
358
        // skip potential wikilinks
359
        if (strpos($str, '[') !== false) {
360
            return $str;
361
        }
362
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
363
        $str = str_replace('Reuters', '[[Reuters]]', $str);
364
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
365
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
366
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
367
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
368
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
369
370
        return preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
371
    }
372
}
373