Passed
Push — master ( b3f7dd...feb615 )
by Dispositif
06:47
created

ExternConverterTrait::convertDCpage()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 6
nc 3
nop 1
dl 0
loc 12
ccs 7
cts 7
cp 1
crap 3
rs 10
c 1
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19 5
    protected function isAnArticle(?string $str): bool
20
    {
21 5
        if (in_array($str, ['article', 'journalArticle'])) {
22 5
            return true;
23
        }
24
25
        return false;
26
    }
27
28 5
    protected function convertURLaccess($data): ?string
29
    {
30
        // NYT, Figaro
31 5
        if (isset($data['isAccessibleForFree'])) {
32 2
            return $data['isAccessibleForFree'] ? 'ouvert' : 'limité';
33
        }
34 5
        if (isset($data['DC.rights'])) {
35 1
            return (in_array($data['DC.rights'], ['free', 'public domain'])) ? 'ouvert' : 'limité';
36
        }
37 4
        if (isset($data['og:article:content_tier'])) {
38 1
            return ($data['og:article:content_tier'] === 'free') ? 'ouvert' : 'limité';
39
        }
40
41 4
        return null;
42
    }
43
44
    /**
45
     * Réduit le nombre d'auteurs si > 3.
46
     * En $modeEtAll=true vérification pour "et al.=oui".
47
     * TODO : wikifyPressAgency()
48
     *
49
     * @param string|null $authors
50
     * @param bool        $modeEtAl
51
     *
52
     * @return string|null
53
     */
54 5
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
55
    {
56 5
        if (empty($authors)) {
57 3
            return null;
58
        }
59
        // conserve juste les 2 premiers auteurs TODO : refactor
60
        // Bob, Martin ; Yul, Bar ; ... ; ...
61 2
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
62 1
            return ($modeEtAl) ? 'oui' : $matches[1];
63
        }
64
        // Bob Martin, Yul Bar, ..., ...,...
65 1
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
66 1
            return ($modeEtAl) ? 'oui' : $matches[1];
67
        }
68
69
        return ($modeEtAl) ? null : $authors;
70
    }
71
72 5
    protected function convertDCpage(array $meta): ?string
73
    {
74 5
        if (isset($meta['citation_firstpage'])) {
75 2
            $page = $meta['citation_firstpage'];
76 2
            if (isset($meta['citation_lastpage'])) {
77 1
                $page .= '–'.$meta['citation_lastpage'];
78
            }
79
80 2
            return (string)$page;
81
        }
82
83 3
        return null;
84
    }
85
86
    public function cleanAuthor(?string $str = null): ?string
87 5
    {
88
        if ($str === null) {
89 5
            return null;
90 5
        }
91
        $str = $this->clean($str);
92 5
        // "https://www.facebook.com/search/top/?q=..."
93 5
        if (preg_match('#^https?://.+#i', $str)) {
94
            return null;
95 5
        }
96
        // "Par Bob"
97
        if (preg_match('#^Par (.+)$#i', $str, $matches)) {
98
            return $matches[1];
99
        }
100
101
        return $str;
102
    }
103 5
104
    // TODO encodage + normalizer
105
    public function clean(?string $str = null): ?string
106 5
    {
107
        if ($str === null) {
108
            return null;
109 5
        }
110
        $str = str_replace(
111 5
            ["\n", "\t", '&#x27;', '&#39;', '&#039;', '&apos;', "\n", "&#10;", "&eacute;"],
112
            [
113
                ' ',
114
                ' ',
115 5
                "’",
116
                "'",
117
                "'",
118 5
                "'",
119
                '',
120
                ' ',
121
                "é",
122 5
            ],
123
            $str
124
        );
125
126
        return html_entity_decode($str);
127
    }
128
129
    protected function convertOGtype2format(?string $ogType): ?string
130
    {
131 5
        if (empty($ogType)) {
132
            return null;
133 5
        }
134 2
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
135
        if (strpos($ogType, 'video') !== false) {
136
            return 'vidéo';
137 3
        }
138 2
        if (strpos($ogType, 'book') !== false) {
139
            return 'livre';
140
        }
141 1
142
        return null;
143
    }
144 3
145
    /**
146
     * https://developers.facebook.com/docs/internationalization#locales
147 3
     * @param string|null $lang
148
     *
149
     * @return string|null
150
     */
151
    protected function convertLangue(?string $lang = null): ?string
152 3
    {
153 3
        if (empty($lang)) {
154 3
            return null;
155
        }
156 3
        // en_GB
157
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
158
            return $matches[1];
159
        }
160
161
        return Language::all2wiki($lang);
162
    }
163
164
    protected function convertAuteur($data, $indice)
165
    {
166 3
        // author=Bob
167 2
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
168 3
            return html_entity_decode($data['author']);
169
        }
170 2
171 2
        // author ['name'=>'Bob','@type'=>'Person']
172
        if (0 === $indice
173
            && isset($data['author'])
174
            && isset($data['author']['name'])
175
            && (!isset($data['author']['@type'])
176
                || 'Person' === $data['author']['@type'])
177
        ) {
178 3
            if (is_string($data['author']['name'])) {
179
                return html_entity_decode($data['author']['name']);
180
            }
181 3
182
            return html_entity_decode($data['author']['name'][0]);
183 3
        }
184 3
185
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
186
        if (isset($data['author']) && isset($data['author'][$indice])
187
            && (!isset($data['author'][$indice]['@type'])
188
                || 'Person' === $data['author'][$indice]['@type'])
189 3
        ) {
190
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
191
                return html_entity_decode($data['author'][$indice]['name']);
192
            }
193
194
            // "author" => [ "@type" => "Person", "name" => [] ]
195
            if (isset($data['author'][$indice]['name'][0])) {
196
                return html_entity_decode($data['author'][$indice]['name'][0]);
197
            }
198 5
        }
199
200 5
        return null;
201
    }
202
203
    protected function convertInstitutionnel($data)
204
    {
205 5
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
206
            && 'Person' !== $data['author'][0]['@type']
207
        ) {
208
            return html_entity_decode($data['author'][0]['name']);
209
        }
210 5
211
        return null;
212
    }
213
214
    /**
215
     * @param string $str
216
     *
217 5
     * @return string
218
     * @throws Exception
219
     */
220
    protected function convertDate(?string $str): ?string
221
    {
222
        if (empty($str)) {
223
            return null;
224
        }
225
226
        // "2012"
227 5
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
228
            return $str;
229 5
        }
230 3
231
        try {
232
            $date = new DateTime($str);
233 4
        } catch (Exception $e) {
234
            // 23/11/2015 00:00:00
235
            dump('EXCEPTION DATE');
236 4
237 4
            return $str;
238 4
        }
239 4
240 4
        return $date->format('d-m-Y');
241 4
    }
242 4
243 4
    /**
244
     * Wikification des noms/acronymes d'agences de presse.
245 4
     *
246
     * @param string $str
247
     *
248
     * @return string
249
     */
250
    protected function wikifyPressAgency(?string $str): ?string
251
    {
252
        if (empty($str)) {
253
            return null;
254
        }
255
        // skip potential wikilinks
256
        if (strpos($str, '[') !== false) {
257
            return $str;
258
        }
259
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
260
        $str = str_replace('Reuters', '[[Reuters]]', $str);
261
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
262
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
263
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
264
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
265
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
266
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
267
268
        return $str;
269
    }
270
271
}
272