Passed
Push — dev ( b7aeac...61ab03 )
by Dispositif
03:20
created

ExternConverterTrait::clean()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 20
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 13
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 20
rs 9.8333
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain\Publisher;
12
13
use App\Domain\Enums\Language;
14
use DateTime;
15
use Exception;
16
17
trait ExternConverterTrait
18
{
19
    protected function isAnArticle(?string $str): bool
20
    {
21
        if (in_array($str, ['article', 'journalArticle'])) {
22
            return true;
23
        }
24
25
        return false;
26
    }
27
28
    protected function convertURLaccess($data): ?string
29
    {
30
        // NYT, Figaro
31
        if (isset($data['isAccessibleForFree'])) {
32
            return $data['isAccessibleForFree'] ? 'ouvert' : 'limité';
33
        }
34
        if (isset($data['DC.rights'])) {
35
            return (in_array($data['DC.rights'], ['free', 'public domain'])) ? 'ouvert' : 'limité';
36
        }
37
        if (isset($data['og:article:content_tier'])) {
38
            return ($data['og:article:content_tier'] === 'free') ? 'ouvert' : 'limité';
39
        }
40
41
        return null;
42
    }
43
44
    /**
45
     * Réduit le nombre d'auteurs si > 3.
46
     * En $modeEtAll=true vérification pour "et al.=oui".
47
     * TODO : wikifyPressAgency()
48
     *
49
     * @param string|null $authors
50
     * @param bool        $modeEtAl
51
     *
52
     * @return string|null
53
     */
54
    protected function authorsEtAl(?string $authors, $modeEtAl = false): ?string
55
    {
56
        if (empty($authors)) {
57
            return null;
58
        }
59
        // conserve juste les 3 premiers auteurs TODO : refactor
60
        // Bob, Martin ; Yul, Bar ; ... ; ...
61
        if (preg_match('#([^;]+;[^;]+);[^;]+;.+#', $authors, $matches)) {
62
            return ($modeEtAl) ? 'oui' : $matches[1];
63
        }
64
        // Bob Martin, Yul Bar, ..., ...,...
65
        if (preg_match('#([^,]+,[^,]+),[^,]+,.+#', $authors, $matches)) {
66
            return ($modeEtAl) ? 'oui' : $matches[1];
67
        }
68
69
        return ($modeEtAl) ? null : $authors;
70
    }
71
72
    protected function convertDCpage(array $meta): ?string
73
    {
74
        if (isset($meta['citation_firstpage'])) {
75
            $page = $meta['citation_firstpage'];
76
            if (isset($meta['citation_lastpage'])) {
77
                $page .= '–'.$meta['citation_lastpage'];
78
            }
79
80
            return (string)$page;
81
        }
82
83
        return null;
84
    }
85
86
    // TODO encodage + normalizer
87
    public function clean(?string $str = null): ?string
88
    {
89
        if ($str === null) {
90
            return null;
91
        }
92
        $str = str_replace(
93
            ['&#39;', '&#039;', '&apos;', "\n", "&#10;", "|", "&eacute;"],
94
            [
95
                "'",
96
                "'",
97
                "'",
98
                '',
99
                ' ',
100
                '/',
101
                "é",
102
            ],
103
            $str
104
        );
105
106
        return html_entity_decode($str);
107
    }
108
109
    protected function convertOGtype2format(?string $ogType): ?string
110
    {
111
        if (empty($ogType)) {
112
            return null;
113
        }
114
        // og:type = default: website / video.movie / video.tv_show video.other / article, book, profile
115
        if (strpos($ogType, 'video') !== false) {
116
            return 'vidéo';
117
        }
118
        if (strpos($ogType, 'book') !== false) {
119
            return 'livre';
120
        }
121
122
        return null;
123
    }
124
125
    /**
126
     * https://developers.facebook.com/docs/internationalization#locales
127
     * @param string|null $lang
128
     *
129
     * @return string|null
130
     */
131
    protected function convertLangue(?string $lang = null): ?string
132
    {
133
        if (empty($lang)) {
134
            return null;
135
        }
136
        // en_GB
137
        if (preg_match('#^([a-z]{2})_[A-Z]{2}$#', $lang, $matches)) {
138
            return $matches[1];
139
        }
140
141
        return Language::all2wiki($lang);
142
    }
143
144
    protected function convertAuteur($data, $indice)
145
    {
146
        // author=Bob
147
        if (isset($data['author']) && is_string($data['author']) && $indice === 1) {
148
            return html_entity_decode($data['author']);
149
        }
150
151
        // author ['name'=>'Bob','@type'=>'Person']
152
        if (0 === $indice
153
            && isset($data['author'])
154
            && isset($data['author']['name'])
155
            && (!isset($data['author']['@type'])
156
                || 'Person' === $data['author']['@type'])
157
        ) {
158
            if (is_string($data['author']['name'])) {
159
                return html_entity_decode($data['author']['name']);
160
            }
161
162
            return html_entity_decode($data['author']['name'][0]);
163
        }
164
165
        // author [ 0 => ['name'=>'Bob'], 1=> ...]
166
        if (isset($data['author']) && isset($data['author'][$indice])
167
            && (!isset($data['author'][$indice]['@type'])
168
                || 'Person' === $data['author'][$indice]['@type'])
169
        ) {
170
            if (isset($data['author'][$indice]['name']) && is_string($data['author'][$indice]['name'])) {
171
                return html_entity_decode($data['author'][$indice]['name']);
172
            }
173
174
            // "author" => [ "@type" => "Person", "name" => [] ]
175
            return html_entity_decode($data['author'][$indice]['name'][0]);
176
        }
177
178
        return null;
179
    }
180
181
    protected function convertInstitutionnel($data)
182
    {
183
        if (isset($data['author']) && isset($data['author'][0]) && isset($data['author'][0]['@type'])
184
            && 'Person' !== $data['author'][0]['@type']
185
        ) {
186
            return html_entity_decode($data['author'][0]['name']);
187
        }
188
189
        return null;
190
    }
191
192
    /**
193
     * @param string $str
194
     *
195
     * @return string
196
     * @throws Exception
197
     */
198
    protected function convertDate(?string $str): ?string
199
    {
200
        if (empty($str)) {
201
            return null;
202
        }
203
204
        // "2012"
205
        if (preg_match('#^[12][0-9]{3}$#', $str)) {
206
            return $str;
207
        }
208
209
        try {
210
            $date = new DateTime($str);
211
        } catch (Exception $e) {
212
            dump('EXCEPTION DATE');
213
214
            return $str;
215
        }
216
217
        return $date->format('d-m-Y');
218
    }
219
220
    /**
221
     * Wikification des noms/acronymes d'agences de presse.
222
     *
223
     * @param string $str
224
     *
225
     * @return string
226
     */
227
    protected function wikifyPressAgency(?string $str): ?string
228
    {
229
        if (empty($str)) {
230
            return null;
231
        }
232
        // skip potential wikilinks
233
        if (strpos($str, '[') !== false) {
234
            return $str;
235
        }
236
        $str = preg_replace('#\b(AFP)\b#i', '[[Agence France-Presse|AFP]]', $str);
237
        $str = str_replace('Reuters', '[[Reuters]]', $str);
238
        $str = str_replace('Associated Press', '[[Associated Press]]', $str);
239
        $str = preg_replace('#\b(PA)\b#', '[[Press Association|PA]]', $str);
240
        $str = preg_replace('#\b(AP)\b#', '[[Associated Press|AP]]', $str);
241
        $str = str_replace('Xinhua', '[[Xinhua]]', $str);
242
        $str = preg_replace('#\b(ATS)\b#', '[[Agence télégraphique suisse|ATS]]', $str);
243
        $str = preg_replace('#\b(PC|CP)\b#', '[[La Presse canadienne|PC]]', $str);
244
245
        return $str;
246
    }
247
248
}
249