Passed
Push — dev ( 4ef148...eba0dc )
by Dispositif
07:17
created

GoogleLivresTemplate::arrayKeysToLower()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 4
c 0
b 0
f 0
nc 2
nop 1
dl 0
loc 8
rs 10
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Models\Wiki;
11
12
use App\Domain\Utils\ArrayProcessTrait;
13
use DomainException;
14
use Exception;
15
16
/**
17
 * https://fr.wikipedia.org/wiki/Mod%C3%A8le:Google_Livres
18
 * Le premier paramètre (ou id) est obligatoire.
19
 * Le deuxième (ou titre) est requis si on ne veut pas fabriquer le lien brut (inclusion {{ouvrage}} 'Lire en ligne')
20
 * Class GoogleLivresTemplate.
21
 */
22
class GoogleLivresTemplate extends AbstractWikiTemplate
23
{
24
    use ArrayProcessTrait;
25
26
    const DEFAULT_GOOGLEBOOK_URL = 'https://books.google.com/books';
27
28
    const MODEL_NAME = 'Google Livres';
29
30
    const REQUIRED_PARAMETERS = ['id' => ''];
31
32
    const PARAM_ALIAS
33
        = [
34
            '1' => 'id',
35
            '2' => 'titre',
36
            'surligné' => 'surligne',
37
            'BuchID' => 'id',
38
        ];
39
40
    const GOOGLEBOOK_URL_PATTERN = 'https?://(?:books|play)\.google\.[a-z\.]{2,5}/(?:books)?(?:books/[^\?]+\.html)?(?:/reader)?\?(?:[a-zA-Z=&]+&)?id=';
41
42
    protected $parametersByOrder
43
        = ['id', 'titre', 'couv', 'page', 'romain', 'page autre', 'surligne'];
44
45
    /**
46
     * Create {Google Book} from URL.
47
     * See also https://fr.wikipedia.org/wiki/Utilisateur:Jack_ma/GB
48
     * https://stackoverflow.com/questions/11584551/need-information-on-query-parameters-for-google-books-e-g-difference-between-d.
49
     *
50
     * @param string $url
51
     *
52
     * @return GoogleLivresTemplate|null
53
     * @throws Exception
54
     */
55
    public static function createFromURL(string $url): ?self
56
    {
57
        if (!self::isGoogleBookURL($url)) {
58
            throw new DomainException('not a Google Book URL');
59
        }
60
        $gooDat = self::parseGoogleBookQuery($url);
61
62
        if (empty($gooDat['id'])) {
63
            throw new DomainException("no GoogleBook 'id' in URL");
64
        }
65
        if (!preg_match('#[0-9A-Za-z_\-]{12}#', $gooDat['id'])) {
66
            throw new DomainException("GoogleBook 'id' malformed [0-9A-Za-z_\-]{12}");
67
        }
68
69
        $data = self::mapGooData($gooDat);
70
71
        $templ = new self();
72
        $templ->hydrate($data);
73
74
        return $templ;
75
    }
76
77
    /**
78
     * Check google URL pattern.
79
     *
80
     * @param string $text
81
     *
82
     * @return bool
83
     */
84
    public static function isGoogleBookURL(string $text): bool
85
    {
86
        if (preg_match('#^'.self::GOOGLEBOOK_URL_PATTERN.'[^>\]} \n]+$#i', $text) > 0) {
87
            return true;
88
        }
89
90
        return false;
91
    }
92
93
    /**
94
     * Parse URL argument from ?query and #fragment.
95
     *
96
     * @param string $url
97
     *
98
     * @return array
99
     */
100
    public static function parseGoogleBookQuery(string $url): array
101
    {
102
        // Note : Also datas in URL after the '#' !!! (URL fragment)
103
        $queryData = parse_url($url, PHP_URL_QUERY); // after ?
104
        $fragmentData = parse_url($url, PHP_URL_FRAGMENT); // after #
105
        // queryData precedence over fragmentData
106
        parse_str(implode('&', [$fragmentData, $queryData]), $val);
107
108
        return self::arrayKeysToLower($val);
109
    }
110
111
    /**
112
     * Mapping Google URL data to {Google Livres} data.
113
     *
114
     * @param array $gooData
115
     *
116
     * @return array
117
     */
118
    private static function mapGooData(array $gooData): array
119
    {
120
        $data = [];
121
        $data['id'] = $gooData['id'];
122
123
        // show cover ?
124
        if (isset($gooData['printsec']) && 'frontcover' === $gooData['printsec']) {
125
            $data['couv'] = '1';
126
        }
127
128
        // page number
129
        if (!empty($gooData['pg'])) {
130
            $data['page autre'] = $gooData['pg'];
131
132
            //  pg=PAx => "page=x"
133
            if (preg_match('/^PA([0-9]+)$/', $gooData['pg'], $matches) > 0) {
134
                $data['page'] = $matches[1];
135
                unset($data['page autre']);
136
            }
137
            //  pg=PRx => "page=x|romain=1"
138
            if (preg_match('/^PR([0-9]+)$/', $gooData['pg'], $matches) > 0) {
139
                $data['page'] = $matches[1];
140
                $data['romain'] = '1';
141
                unset($data['page autre']);
142
            }
143
        }
144
        // q : keywords search / dq : quoted phrase search
145
        // affichage Google : dq ignoré si q existe
146
        if (!empty($gooData['dq']) || !empty($gooData['q'])) {
147
            $data['surligne'] = $gooData['q'] ?? $gooData['dq']; // q prévaut
148
            $data['surligne'] = self::googleUrlEncode($data['surligne']);
149
        }
150
151
        return $data;
152
    }
153
154
    /**
155
     * Instead of url_encode(). No UTF-8 encoding.
156
     *
157
     * @param string $str
158
     *
159
     * @return string
160
     */
161
    public static function googleUrlEncode(string $str): string
162
    {
163
        return str_replace(' ', '+', trim(urldecode($str)));
164
    }
165
166
    /**
167
     * Clean the google book URL from optional&tracking data.
168
     *
169
     * @param string $url
170
     *
171
     * @return string URL
172
     * @throws Exception
173
     */
174
    public static function simplifyGoogleUrl(string $url): string
175
    {
176
        if (!self::isGoogleBookURL($url)) {
177
            // not DomainException for live testing with OuvrageOptimize
178
            throw new Exception('not a Google Book URL');
179
        }
180
181
        $gooDat = self::parseGoogleBookQuery($url);
182
        if (empty($gooDat['id'])) {
183
            throw new DomainException("no GoogleBook 'id' in URL");
184
        }
185
        if (!preg_match('#[0-9A-Za-z_\-]{12}#', $gooDat['id'])) {
186
            throw new DomainException("GoogleBook 'id' malformed");
187
        }
188
189
        // clean encoding q= dq=
190
        //                if(isset($gooDat['q'])) {
191
        //                    $gooDat['q'] = self::googleUrlEncode($gooDat['q']);
192
        //                }
193
        //                if(isset($gooDat['dq'])) {
194
        //                    $gooDat['dq'] = self::googleUrlEncode($gooDat['dq']);
195
        //                }
196
197
        $dat = [];
198
        // keep only a few parameters (+'q' ?)
199
        // q : keywords search / dq : quoted phrase search
200
        $keeps = ['id', 'pg', 'printsec', 'q', 'dq'];
201
        foreach ($keeps as $keep) {
202
            if (!empty($gooDat[$keep])) {
203
                $dat[$keep] = $gooDat[$keep];
204
            }
205
        }
206
207
        // 1 exemple : https://fr.wikipedia.org/w/index.php?title=Foudre_de_Catatumbo&diff=next&oldid=168721836&diffmode=source
208
        // 1. mettre URL &dq= pour final
209
        //
210
        // 2. si q!=dq (changement ultérieur formulaire recherche) alors q= prévaut pour résultat final
211
        // 2. mettre URL &q pour final
212
        //
213
        // 3. Recherche global sur http://books.google.fr => pg= dq= (#q= avec q==dq)
214
        // 3. dans ce cas (q==dq), url final avec seulement dq= donne résultat OK
215
        //
216
        // 4 . if you use a url without &redir_esc=y#v=onepage for a book with "Preview" available,
217
        // usually &dq shows the highlighted text in full page view whereas &q shows the snippet view (so you have to
218
        // click on the snippet to see the full page).
219
        // &dq allows highlighting in books where there is "Preview" available and &pg=PTx is in the URL
220
        //
221
        // #v=onepage ou #v=snippet
222
        if (isset($dat['q']) && isset($dat['dq'])) {
223
            // si q==dq alors dq prévaut pour affichage (sinon affichage différent avec url seulement q=)
224
            if ($dat['q'] === $dat['dq']) {
225
                unset($dat['q']);
226
            } // si q!=dq (exemple : nouveaux mots clés dans formulaire recherche) alors q= prévaut pour résultat final
227
            else {
228
                unset($dat['dq']);
229
            }
230
        }
231
232
        $googleURL = self::DEFAULT_GOOGLEBOOK_URL;
233
234
        // domain .com .fr
235
        $gooDomain = self::parseGoogleDomain($url);
236
        if ($gooDomain) {
237
            $googleURL = str_replace('.com', $gooDomain, $googleURL);
238
        }
239
240
        // todo http_build_query process an urlencode, but a not encoded q= value ("fu+bar") is beautiful
241
        return $googleURL.'?'.http_build_query($dat);
242
    }
243
244
    /**
245
     * return '.fr' or '.com'.
246
     *
247
     * @param string $url
248
     *
249
     * @return string|null
250
     */
251
    private static function parseGoogleDomain(string $url): ?string
252
    {
253
        $host = parse_url($url, PHP_URL_HOST);
254
        if (!empty($host) && preg_match('#\.[a-z]{2,3}$#', $host, $matches) > 0) {
255
            // Maroc : google.co.ma (sous-domaine!!)
256
            return str_replace(['.ma', '.uk'], ['.co.ma', '.co.uk'], $matches[0]); // .fr
257
        }
258
259
        return null;
260
    }
261
262
    /**
263
     * Check if Google URL or wiki {Google Books} template.
264
     *
265
     * @param string $text
266
     *
267
     * @return bool
268
     */
269
    public static function isGoogleBookValue(string $text): bool
270
    {
271
        if (true === self::isGoogleBookURL($text)) {
272
            return true;
273
        }
274
        if (preg_match('#^{{[ \n]*Google (Livres|Books)[^}]+}}$#i', $text) > 0) {
275
            return true;
276
        }
277
278
        return false;
279
    }
280
281
    /**
282
     * Serialize the wiki-template.
283
     * Improvement : force param order : id/titre/...
284
     *
285
     * @param bool|null $cleanOrder
286
     *
287
     * @return string
288
     */
289
    public function serialize(?bool $cleanOrder = true): string
290
    {
291
        $text = parent::serialize();
292
293
        // Documentation suggère non affichage de ces 2 paramètres
294
        return str_replace(['id=', 'titre='], '', $text);
295
    }
296
}
297