Test Failed
Push — master ( 319840...0bb54c )
by Dispositif
06:21
created

ExternConverterTrait::convertDCpage()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 4.125

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 6
nc 3
nop 1
dl 0
loc 12
ccs 3
cts 6
cp 0.5
crap 4.125
rs 10
c 1
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    /**
29
     * mapping "accès url" : libre, inscription, limité, payant/abonnement.
30
     * https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Le_Bistro/25_ao%C3%BBt_2020#Lien_externe_:_paramètre_pour_accessibilité_restreinte_(abonnement,_article_payant)
31 5
     *
32 2
     * @param $data
33
     *
34 5
     * @return string|null
35 1
     */
36
    protected function convertURLaccess($data): ?string
37 4
    {
38 1
        // https://developers.facebook.com/docs/instant-articles/subscriptions/content-tiering/?locale=fr_FR
39
        if (isset($data['og:article:content_tier'])) {
40
            switch (strtolower($data['og:article:content_tier'])) {
41 4
                case 'free':
42
                    return 'libre';
43
                case 'locked':
44
                    return 'payant';
45
                case 'metered':
46
                    return 'limité';
47
            }
48
        }
49
50
        // NYT, Figaro
51
        // Todo : Si pas libre => limité ou payant ?
52
        if (isset($data['isAccessibleForFree'])) {
53
            return ($this->sameAsTrue($data['isAccessibleForFree'])) ? 'libre' : 'limité';
54 5
        }
55
56 5
        if (isset($data['DC.rights'])) {
57 3
            if (in_array(strtolower($data['DC.rights']), ['free', 'public domain'])) {
58
                return 'libre';
59
            }
60
        }
61 2
62 1
        // TODO : https://terms.tdwg.org/wiki/dcterms:accessRights
63
        // "	Information about who access the resource or an indication of its security status."
64
65 1
        return null;
66 1
    }
67
68
    protected function sameAsTrue($str = null): bool
69
    {
70
        if ($str === null) {
71
            return false;
72 5
        }
73
        if (is_bool($str)) {
74 5
            return $str;
75 2
        }
76 2
        $str = strtolower($str);
77 1
        if (in_array($str, ['true', '1', 'yes', 'oui', 'ok'])) {
78
            return true;
79
        }
80 2
81
        return false;
82
    }
83 3
84
    /**
85
     * Réduit le nombre d'auteurs si > 3.
86
     * En $modeEtAll=true vérification pour "et al.=oui".
87 5
     * TODO : wikifyPressAgency()
88
     *
89 5
     * @param string|null $authors
90 5
     * @param bool        $modeEtAl
91
     *
92 5
     * @return string|null
93 5
     */
94
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
95 5
    {
96
        if (empty($authors)) {
97
            return null;
98
        }
99
        // conserve juste les 2 premiers auteurs TODO : refactor
100
        // Bob, Martin ; Yul, Bar ; ... ; ...
101
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
102
            return ($modeEtAl) ? 'oui' : $matches[1];
103 5
        }
104
        // Bob Martin, Yul Bar, ..., ...,...
105
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
106 5
            return ($modeEtAl) ? 'oui' : $matches[1];
107
        }
108
109 5
        return ($modeEtAl) ? null : $authors;
110
    }
111 5
112
    protected function convertDCpage(array $meta): ?string
113
    {
114
        if (isset($meta['citation_firstpage'])) {
115 5
            $page = $meta['citation_firstpage'];
116
            if (isset($meta['citation_lastpage'])) {
117
                $page .= '–'.$meta['citation_lastpage'];
118 5
            }
119
120
            return (string)$page;
121
        }
122 5
123
        return null;
124
    }
125
126
    public function cleanAuthor(?string $str = null): ?string
127
    {
128
        if ($str === null) {
129
            return null;
130
        }
131 5
        $str = $this->clean($str);
132
        // "https://www.facebook.com/search/top/?q=..."
133 5
        if (preg_match('#^https?://.+#i', $str)) {
134 2
            return null;
135
        }
136
        // "Par Bob"
137 3
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
138 2
            return $matches[1];
139
        }
140
141 1
        return $str;
142
    }
143
144 3
    /**
145
     * Note : à appliquer AVANT wikification (sinon bug sur | )
146
     *
147 3
     * @param string|null $str
148
     *
149
     * @return string|null
150
     */
151
    public function clean(?string $str = null): ?string
152 3
    {
153 3
        if ($str === null) {
154 3
            return null;
155
        }
156 3
        $str = $this->stripEmailAdress($str);
157
158
        $str = str_replace(
159
            ['|', "\n", "\t", '&#x27;', '&#39;', '&#039;', '&apos;', "\n", "&#10;", "&eacute;", '©'],
160
            [
161
                '/',
162
                ' ',
163
                ' ',
164
                "’",
165
                "'",
166 3
                "'",
167 2
                "'",
168 3
                '',
169
                ' ',
170 2
                "é",
171 2
                '',
172
            ],
173
            $str
174
        );
175
176
        return html_entity_decode($str);
177
    }
178 3
179
    public function stripEmailAdress(?string $str = null): ?string
180
    {
181 3
        if ($str === null) {
182
            return null;
183 3
        }
184 3
185
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
186
    }
187
188
    protected function convertOGtype2format(?string $ogType): ?string
189 3
    {
190
        if (empty($ogType)) {
191
            return null;
192
        }
193
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
194
        if (strpos($ogType, 'video') !== false) {
195
            return 'vidéo';
196
        }
197
        if (strpos($ogType, 'book') !== false) {
198 5
            return 'livre';
199
        }
200 5
201
        return null;
202
    }
203
204
    /**
205 5
     * https://developers.facebook.com/docs/internationalization#locales
206
     * @param string|null $lang
207
     *
208
     * @return string|null
209
     */
210 5
    protected function convertLangue(?string $lang = null): ?string
211
    {
212
        if (empty($lang)) {
213
            return null;
214
        }
215
        // en_GB
216
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
217 5
            return $matches[1];
218
        }
219
220
        return Language::all2wiki($lang);
221
    }
222
223
    protected function convertAuteur($data, $indice)
224
    {
225
        // author=Bob
226
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
227 5
            return html_entity_decode($data['author']);
228
        }
229 5
230 3
        // author ['name'=>'Bob','@type'=>'Person']
231
        if (0 === $indice
232
            && isset($data['author'])
233 4
            && isset($data['author']['name'])
234
            && (!isset($data['author']['@type'])
235
                || 'Person' === $data['author']['@type'])
236 4
        ) {
237 4
            if (is_string($data['author']['name'])) {
238 4
                return html_entity_decode($data['author']['name']);
239 4
            }
240 4
241 4
            return html_entity_decode($data['author']['name'][0]);
242 4
        }
243 4
244
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
245 4
        if (isset($data['author']) && isset($data['author'][$indice])
246
            && (!isset($data['author'][$indice]['@type'])
247
                || 'Person' === $data['author'][$indice]['@type'])
248
        ) {
249
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
250
                return html_entity_decode($data['author'][$indice]['name']);
251
            }
252
253
            // "author" => [ "@type" => "Person", "name" => [] ]
254
            if (isset($data['author'][$indice]['name'][0])) {
255
                return html_entity_decode($data['author'][$indice]['name'][0]);
256
            }
257
        }
258
259
        return null;
260
    }
261
262
    protected function convertInstitutionnel($data)
263
    {
264
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
265
            && 'Person' !== $data['author'][0]['@type']
266
        ) {
267
            return html_entity_decode($data['author'][0]['name']);
268
        }
269
270
        return null;
271
    }
272
273
    /**
274
     * @param string $str
275
     *
276
     * @return string
277
     * @throws Exception
278
     */
279
    protected function convertDate(?string $str): ?string
280
    {
281
        if (empty($str)) {
282
            return null;
283
        }
284
        $str = str_replace(' 00:00:00', '', $str);
285
        $str = str_replace('/', '-', $str);
286
287
        // "2012"
288
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
289
            return $str;
290
        }
291
        // "1775-1783" (Gallica)
292
        if (preg_match('#^[12][0-9]{3}-[12][0-9]{3}$#', $str)) {
293
            return $str;
294
        }
295
296
        try {
297
            $date = new DateTime($str);
298
        } catch (Exception $e) {
299
            // 23/11/2015 00:00:00
300
            dump('EXCEPTION DATE');
301
302
            return '<!-- '.$str.' -->';
303
        }
304
305
        return $date->format('d-m-Y');
306
    }
307
308
    /**
309
     * Wikification des noms/acronymes d'agences de presse.
310
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
311
     *
312
     * @param string $str
313
     *
314
     * @return string
315
     */
316
    protected function wikifyPressAgency(?string $str): ?string
317
    {
318
        if (empty($str)) {
319
            return null;
320
        }
321
        // skip potential wikilinks
322
        if (strpos($str, '[') !== false) {
323
            return $str;
324
        }
325
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
326
        $str = str_replace('Reuters', '[[Reuters]]', $str);
327
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
328
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
329
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
330
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
331
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
332
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
333
334
        return $str;
335
    }
336
337
}
338