Passed
Push — master ( 7c3be8...26e53d )
by Dispositif
08:12
created

ExternConverterTrait::convertDate()   B

Complexity

Conditions 8
Paths 6

Size

Total Lines 29
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 72

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 8
eloc 15
c 4
b 0
f 0
nc 6
nop 1
dl 0
loc 29
ccs 0
cts 0
cp 0
crap 72
rs 8.4444
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    /**
29
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
30
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
31 5
     *
32 2
     * @param $data
33
     *
34 5
     * @return string|null
35 1
     */
36
    protected function convertURLaccess($data): ?string
37 4
    {
38 1
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
39
        if (isset($data['og:article:content_tier'])) {
40
            switch (strtolower($data['og:article:content_tier'])) {
41 4
                case 'free':
42
                    return 'libre';
43
                case 'locked':
44
                    return 'payant';
45
                case 'metered':
46
                    return 'limité';
47
            }
48
        }
49
50
        // NYT, Figaro
51
        // Todo : Si pas libre => limité ou payant ?
52
        if (isset($data['isAccessibleForFree'])) {
53
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'payant';
54 5
        }
55
56 5
        if (isset($data['DC.rights'])) {
57 3
            if (in_array(strtolower($data['DC.rights']), ['free', 'public domain'])) {
58
                return 'libre';
59
            }
60
        }
61 2
62 1
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
63
        // "	Information about who access the resource or an indication of its security status."
64
        // Values are a mystery...
65 1
        if (isset($data['DC.accessRights'])) {
66 1
            if (in_array(strtolower($data['DC.accessRights']), ['free', 'public domain', 'public', 'available'])) {
67
                return 'libre';
68
            }
69
        }
70
71
        return null;
72 5
    }
73
74 5
    protected function sameAsTrue($str = null): bool
75 2
    {
76 2
        if ($str === null) {
77 1
            return false;
78
        }
79
        if (is_bool($str)) {
80 2
            return $str;
81
        }
82
        $str = strtolower($str);
83 3
        if (in_array($str, ['true', '1', 'yes', 'oui', 'ok'])) {
84
            return true;
85
        }
86
87 5
        return false;
88
    }
89 5
90 5
    /**
91
     * Réduit le nombre d'auteurs si > 3.
92 5
     * En $modeEtAll=true vérification pour "et al.=oui".
93 5
     * TODO : wikifyPressAgency()
94
     *
95 5
     * @param string|null $authors
96
     * @param bool        $modeEtAl
97
     *
98
     * @return string|null
99
     */
100
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
101
    {
102
        if (empty($authors)) {
103 5
            return null;
104
        }
105
        // conserve juste les 2 premiers auteurs TODO : refactor
106 5
        // Bob, Martin ; Yul, Bar ; ... ; ...
107
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
108
            return ($modeEtAl) ? 'oui' : $matches[1];
109 5
        }
110
        // Bob Martin, Yul Bar, ..., ...,...
111 5
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
112
            return ($modeEtAl) ? 'oui' : $matches[1];
113
        }
114
115 5
        return ($modeEtAl) ? null : $authors;
116
    }
117
118 5
    protected function convertDCpage(array $meta): ?string
119
    {
120
        if (isset($meta['citation_firstpage'])) {
121
            $page = $meta['citation_firstpage'];
122 5
            if (isset($meta['citation_lastpage'])) {
123
                $page .= '–'.$meta['citation_lastpage'];
124
            }
125
126
            return (string)$page;
127
        }
128
129
        return null;
130
    }
131 5
132
    public function cleanAuthor(?string $str = null): ?string
133 5
    {
134 2
        if ($str === null) {
135
            return null;
136
        }
137 3
        $str = $this->clean($str);
138 2
        // "https://www.facebook.com/search/top/?q=..."
139
        if (preg_match('#^https?://.+#i', $str)) {
140
            return null;
141 1
        }
142
        // "Par Bob"
143
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
144 3
            return $matches[1];
145
        }
146
147 3
        return $str;
148
    }
149
150
    /**
151
     * Note : à appliquer AVANT wikification (sinon bug sur | )
152 3
     *
153 3
     * @param string|null $str
154 3
     *
155
     * @return string|null
156 3
     */
157
    public function clean(?string $str = null): ?string
158
    {
159
        if ($str === null) {
160
            return null;
161
        }
162
        $str = $this->stripEmailAdress($str);
163
164
        $str = str_replace(
165
            ['|', "\n", "\t", '&#x27;', '&#39;', '&#039;', '&apos;', "\n", "&#10;", "&eacute;", '©'],
166 3
            [
167 2
                '/',
168 3
                ' ',
169
                ' ',
170 2
                "’",
171 2
                "'",
172
                "'",
173
                "'",
174
                '',
175
                ' ',
176
                "é",
177
                '',
178 3
            ],
179
            $str
180
        );
181 3
182
        return html_entity_decode($str);
183 3
    }
184 3
185
    public function stripEmailAdress(?string $str = null): ?string
186
    {
187
        if ($str === null) {
188
            return null;
189 3
        }
190
191
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
192
    }
193
194
    protected function convertOGtype2format(?string $ogType): ?string
195
    {
196
        if (empty($ogType)) {
197
            return null;
198 5
        }
199
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
200 5
        if (strpos($ogType, 'video') !== false) {
201
            return 'vidéo';
202
        }
203
        if (strpos($ogType, 'book') !== false) {
204
            return 'livre';
205 5
        }
206
207
        return null;
208
    }
209
210 5
    /**
211
     * https://developers.facebook.com/docs/internationalization#locales
212
     * @param string|null $lang
213
     *
214
     * @return string|null
215
     */
216
    protected function convertLangue(?string $lang = null): ?string
217 5
    {
218
        if (empty($lang)) {
219
            return null;
220
        }
221
        // en_GB
222
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
223
            return $matches[1];
224
        }
225
226
        return Language::all2wiki($lang);
227 5
    }
228
229 5
    protected function convertAuteur($data, $indice)
230 3
    {
231
        // author=Bob
232
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
233 4
            return html_entity_decode($data['author']);
234
        }
235
236 4
        // author ['name'=>'Bob','@type'=>'Person']
237 4
        if (0 === $indice
238 4
            && isset($data['author'])
239 4
            && isset($data['author']['name'])
240 4
            && (!isset($data['author']['@type'])
241 4
                || 'Person' === $data['author']['@type'])
242 4
        ) {
243 4
            if (is_string($data['author']['name'])) {
244
                return html_entity_decode($data['author']['name']);
245 4
            }
246
247
            return html_entity_decode($data['author']['name'][0]);
248
        }
249
250
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
251
        if (isset($data['author']) && isset($data['author'][$indice])
252
            && (!isset($data['author'][$indice]['@type'])
253
                || 'Person' === $data['author'][$indice]['@type'])
254
        ) {
255
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
256
                return html_entity_decode($data['author'][$indice]['name']);
257
            }
258
259
            // "author" => [ "@type" => "Person", "name" => [] ]
260
            if (isset($data['author'][$indice]['name'][0])) {
261
                return html_entity_decode($data['author'][$indice]['name'][0]);
262
            }
263
        }
264
265
        return null;
266
    }
267
268
    protected function convertInstitutionnel($data)
269
    {
270
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
271
            && 'Person' !== $data['author'][0]['@type']
272
        ) {
273
            return html_entity_decode($data['author'][0]['name']);
274
        }
275
276
        return null;
277
    }
278
279
    /**
280
     * todo move to generalize as utility
281
     *
282
     * @param string $str
283
     *
284
     * @return string
285
     * @throws Exception
286
     */
287
    protected function convertDate(?string $str): ?string
288
    {
289
        if (empty($str)) {
290
            return null;
291
        }
292
        $str = str_replace(' 00:00:00', '', $str);
293
        $str = str_replace('/', '-', $str);
294
295
        // "2012"
296
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
297
            return $str;
298
        }
299
        // "1775-1783" (Gallica)
300
        if (preg_match('#^[12][0-9]{3}-[12][0-9]{3}$#', $str)) {
301
            return $str;
302
        }
303
304
        try {
305
            $date = new DateTime($str);
306
        } catch (Exception $e) {
307
            // 23/11/2015 00:00:00
308
            if (isset($this) && isset($this->log) && method_exists($this->log, 'notice')) {
309
                $this->log->notice('EXCEPTION DATE');
310
            }
311
312
            return '<!-- '.$str.' -->';
313
        }
314
315
        return $date->format('d-m-Y');
316
    }
317
318
    /**
319
     * Wikification des noms/acronymes d'agences de presse.
320
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
321
     *
322
     * @param string $str
323
     *
324
     * @return string
325
     */
326
    protected function wikifyPressAgency(?string $str): ?string
327
    {
328
        if (empty($str)) {
329
            return null;
330
        }
331
        // skip potential wikilinks
332
        if (strpos($str, '[') !== false) {
333
            return $str;
334
        }
335
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
336
        $str = str_replace('Reuters', '[[Reuters]]', $str);
337
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
338
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
339
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
340
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
341
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
342
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
343
344
        return $str;
345
    }
346
347
}
348