Passed
Push — master ( aa70d7...319840 )
by Dispositif
07:47
created

ExternConverterTrait::convertLangue()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 3.1406

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 5
nc 3
nop 1
dl 0
loc 11
ccs 3
cts 4
cp 0.75
crap 3.1406
rs 10
c 1
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    protected function convertURLaccess($data): ?string
29
    {
30
        // NYT, Figaro
31 5
        if (isset($data['isAccessibleForFree'])) {
32 2
            return $data['isAccessibleForFree'] ? 'ouvert' : 'limité';
33
        }
34 5
        if (isset($data['DC.rights'])) {
35 1
            return (in_array($data['DC.rights'], ['free', 'public domain'])) ? 'ouvert' : 'limité';
36
        }
37 4
        if (isset($data['og:article:content_tier'])) {
38 1
            return ($data['og:article:content_tier'] === 'free') ? 'ouvert' : 'limité';
39
        }
40
41 4
        return null;
42
    }
43
44
    /**
45
     * Réduit le nombre d'auteurs si > 3.
46
     * En $modeEtAll=true vérification pour "et al.=oui".
47
     * TODO : wikifyPressAgency()
48
     *
49
     * @param string|null $authors
50
     * @param bool        $modeEtAl
51
     *
52
     * @return string|null
53
     */
54 5
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
55
    {
56 5
        if (empty($authors)) {
57 3
            return null;
58
        }
59
        // conserve juste les 2 premiers auteurs TODO : refactor
60
        // Bob, Martin ; Yul, Bar ; ... ; ...
61 2
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
62 1
            return ($modeEtAl) ? 'oui' : $matches[1];
63
        }
64
        // Bob Martin, Yul Bar, ..., ...,...
65 1
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
66 1
            return ($modeEtAl) ? 'oui' : $matches[1];
67
        }
68
69
        return ($modeEtAl) ? null : $authors;
70
    }
71
72 5
    protected function convertDCpage(array $meta): ?string
73
    {
74 5
        if (isset($meta['citation_firstpage'])) {
75 2
            $page = $meta['citation_firstpage'];
76 2
            if (isset($meta['citation_lastpage'])) {
77 1
                $page .= '–'.$meta['citation_lastpage'];
78
            }
79
80 2
            return (string)$page;
81
        }
82
83 3
        return null;
84
    }
85
86
    public function cleanAuthor(?string $str = null): ?string
87 5
    {
88
        if ($str === null) {
89 5
            return null;
90 5
        }
91
        $str = $this->clean($str);
92 5
        // "https://www.facebook.com/search/top/?q=..."
93 5
        if (preg_match('#^https?://.+#i', $str)) {
94
            return null;
95 5
        }
96
        // "Par Bob"
97
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
98
            return $matches[1];
99
        }
100
101
        return $str;
102
    }
103 5
104
    /**
105
     * Note : à appliquer AVANT wikification (sinon bug sur | )
106 5
     *
107
     * @param string|null $str
108
     *
109 5
     * @return string|null
110
     */
111 5
    public function clean(?string $str = null): ?string
112
    {
113
        if ($str === null) {
114
            return null;
115 5
        }
116
        $str = $this->stripEmailAdress($str);
117
118 5
        $str = str_replace(
119
            ['|', "\n", "\t", '&#x27;', '&#39;', '&#039;', '&apos;', "\n", "&#10;", "&eacute;", '©'],
120
            [
121
                '/',
122 5
                ' ',
123
                ' ',
124
                "’",
125
                "'",
126
                "'",
127
                "'",
128
                '',
129
                ' ',
130
                "é",
131 5
                '',
132
            ],
133 5
            $str
134 2
        );
135
136
        return html_entity_decode($str);
137 3
    }
138 2
139
    public function stripEmailAdress(?string $str = null): ?string
140
    {
141 1
        if ($str === null) {
142
            return null;
143
        }
144 3
145
        return preg_replace('# ?[^ ]+@[^ ]+\.[A-Z]+#i', '', $str);
146
    }
147 3
148
    protected function convertOGtype2format(?string $ogType): ?string
149
    {
150
        if (empty($ogType)) {
151
            return null;
152 3
        }
153 3
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
154 3
        if (strpos($ogType, 'video') !== false) {
155
            return 'vidéo';
156 3
        }
157
        if (strpos($ogType, 'book') !== false) {
158
            return 'livre';
159
        }
160
161
        return null;
162
    }
163
164
    /**
165
     * https://developers.facebook.com/docs/internationalization#locales
166 3
     * @param string|null $lang
167 2
     *
168 3
     * @return string|null
169
     */
170 2
    protected function convertLangue(?string $lang = null): ?string
171 2
    {
172
        if (empty($lang)) {
173
            return null;
174
        }
175
        // en_GB
176
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
177
            return $matches[1];
178 3
        }
179
180
        return Language::all2wiki($lang);
181 3
    }
182
183 3
    protected function convertAuteur($data, $indice)
184 3
    {
185
        // author=Bob
186
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
187
            return html_entity_decode($data['author']);
188
        }
189 3
190
        // author ['name'=>'Bob','@type'=>'Person']
191
        if (0 === $indice
192
            && isset($data['author'])
193
            && isset($data['author']['name'])
194
            && (!isset($data['author']['@type'])
195
                || 'Person' === $data['author']['@type'])
196
        ) {
197
            if (is_string($data['author']['name'])) {
198 5
                return html_entity_decode($data['author']['name']);
199
            }
200 5
201
            return html_entity_decode($data['author']['name'][0]);
202
        }
203
204
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
205 5
        if (isset($data['author']) && isset($data['author'][$indice])
206
            && (!isset($data['author'][$indice]['@type'])
207
                || 'Person' === $data['author'][$indice]['@type'])
208
        ) {
209
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
210 5
                return html_entity_decode($data['author'][$indice]['name']);
211
            }
212
213
            // "author" => [ "@type" => "Person", "name" => [] ]
214
            if (isset($data['author'][$indice]['name'][0])) {
215
                return html_entity_decode($data['author'][$indice]['name'][0]);
216
            }
217 5
        }
218
219
        return null;
220
    }
221
222
    protected function convertInstitutionnel($data)
223
    {
224
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
225
            && 'Person' !== $data['author'][0]['@type']
226
        ) {
227 5
            return html_entity_decode($data['author'][0]['name']);
228
        }
229 5
230 3
        return null;
231
    }
232
233 4
    /**
234
     * @param string $str
235
     *
236 4
     * @return string
237 4
     * @throws Exception
238 4
     */
239 4
    protected function convertDate(?string $str): ?string
240 4
    {
241 4
        if (empty($str)) {
242 4
            return null;
243 4
        }
244
        $str = str_replace(' 00:00:00', '', $str);
245 4
        $str = str_replace('/', '-', $str);
246
247
        // "2012"
248
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
249
            return $str;
250
        }
251
        // "1775-1783" (Gallica)
252
        if (preg_match('#^[12][0-9]{3}-[12][0-9]{3}$#', $str)) {
253
            return $str;
254
        }
255
256
        try {
257
            $date = new DateTime($str);
258
        } catch (Exception $e) {
259
            // 23/11/2015 00:00:00
260
            dump('EXCEPTION DATE');
261
262
            return '<!-- '.$str.' -->';
263
        }
264
265
        return $date->format('d-m-Y');
266
    }
267
268
    /**
269
     * Wikification des noms/acronymes d'agences de presse.
270
     * Note : utiliser APRES clean() et cleanAuthor() sinon bug "|"
271
     *
272
     * @param string $str
273
     *
274
     * @return string
275
     */
276
    protected function wikifyPressAgency(?string $str): ?string
277
    {
278
        if (empty($str)) {
279
            return null;
280
        }
281
        // skip potential wikilinks
282
        if (strpos($str, '[') !== false) {
283
            return $str;
284
        }
285
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
286
        $str = str_replace('Reuters', '[[Reuters]]', $str);
287
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
288
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
289
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
290
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
291
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
292
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
293
294
        return $str;
295
    }
296
297
}
298