Passed
Push — dev ( eba0dc...b8ced8 )
by Dispositif
02:52
created

GoogleLivresTemplate::simplifyGoogleUrl()   B

Complexity

Conditions 10
Paths 21

Size

Total Lines 68
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 10
eloc 22
c 2
b 0
f 0
nc 21
nop 1
dl 0
loc 68
rs 7.6666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Models\Wiki;
11
12
use App\Domain\Utils\ArrayProcessTrait;
13
use DomainException;
14
use Exception;
15
16
/**
17
 * https://fr.wikipedia.org/wiki/Mod%C3%A8le:Google_Livres
18
 * Le premier paramètre (ou id) est obligatoire.
19
 * Le deuxième (ou titre) est requis si on ne veut pas fabriquer le lien brut (inclusion {{ouvrage}} 'Lire en ligne')
20
 * Class GoogleLivresTemplate.
21
 */
22
class GoogleLivresTemplate extends AbstractWikiTemplate
23
{
24
    use ArrayProcessTrait;
25
26
    const DEFAULT_GOOGLEBOOK_URL = 'https://books.google.com/books';
27
28
    const MODEL_NAME = 'Google Livres';
29
30
    const REQUIRED_PARAMETERS = ['id' => ''];
31
32
    const PARAM_ALIAS
33
        = [
34
            '1' => 'id',
35
            '2' => 'titre',
36
            'surligné' => 'surligne',
37
            'BuchID' => 'id',
38
        ];
39
40
    const GOOGLEBOOK_URL_PATTERN = 'https?://(?:books|play)\.google\.[a-z\.]{2,6}/(?:books)?(?:books/[^\?]+\.html)?(?:/reader)?\?(?:[a-zA-Z=&]+&)?id=';
41
42
    const TRACKING_PARAMETERS
43
        = [
44
            'xtor',
45
            'ved',
46
            'ots',
47
            'sig',
48
            'source',
49
            'utm_source',
50
            'utm_medium',
51
            'utm_campaign',
52
            'utm_term',
53
            'utm_content',
54
        ];
55
    /**
56
     * @var array
57
     */
58
59
    protected $parametersByOrder
60
        = ['id', 'titre', 'couv', 'page', 'romain', 'page autre', 'surligne'];
61
62
    /**
63
     * Create {Google Book} from URL.
64
     * See also https://fr.wikipedia.org/wiki/Utilisateur:Jack_ma/GB
65
     * https://stackoverflow.com/questions/11584551/need-information-on-query-parameters-for-google-books-e-g-difference-between-d.
66
     *
67
     * @param string $url
68
     *
69
     * @return GoogleLivresTemplate|null
70
     * @throws Exception
71
     */
72
    public static function createFromURL(string $url): ?self
73
    {
74
        if (!self::isGoogleBookURL($url)) {
75
            throw new DomainException('not a Google Book URL');
76
        }
77
        $gooDat = self::parseGoogleBookQuery($url);
78
79
        if (empty($gooDat['id'])) {
80
            throw new DomainException("no GoogleBook 'id' in URL");
81
        }
82
        if (!preg_match('#[0-9A-Za-z_\-]{12}#', $gooDat['id'])) {
83
            throw new DomainException("GoogleBook 'id' malformed [0-9A-Za-z_\-]{12}");
84
        }
85
86
        $data = self::mapGooData($gooDat);
87
88
        $templ = new self();
89
        $templ->hydrate($data);
90
91
        return $templ;
92
    }
93
94
    /**
95
     * Check google URL pattern.
96
     *
97
     * @param string $text
98
     *
99
     * @return bool
100
     */
101
    public static function isGoogleBookURL(string $text): bool
102
    {
103
        if (preg_match('#^'.self::GOOGLEBOOK_URL_PATTERN.'[^>\]} \n]+$#i', $text) > 0) {
104
            return true;
105
        }
106
107
        return false;
108
    }
109
110
    /**
111
     * Parse URL argument from ?query and #fragment.
112
     *
113
     * @param string $url
114
     *
115
     * @return array
116
     */
117
    public static function parseGoogleBookQuery(string $url): array
118
    {
119
        // Note : Also datas in URL after the '#' !!! (URL fragment)
120
        $queryData = parse_url($url, PHP_URL_QUERY); // after ?
121
        $fragmentData = parse_url($url, PHP_URL_FRAGMENT); // after #
122
        // queryData precedence over fragmentData
123
        parse_str(implode('&', [$fragmentData, $queryData]), $val);
124
125
        return self::arrayKeysToLower($val);
126
    }
127
128
    public static function isTrackingUrl(string $url): bool
129
    {
130
        $data = self::parseGoogleBookQuery($url);
131
        foreach ($data as $param => $value) {
132
            if (in_array($param, self::TRACKING_PARAMETERS)) {
133
                return true;
134
            }
135
        }
136
137
        return false;
138
    }
139
140
    /**
141
     * Mapping Google URL data to {Google Livres} data.
142
     *
143
     * @param array $gooData
144
     *
145
     * @return array
146
     */
147
    private static function mapGooData(array $gooData): array
148
    {
149
        $data = [];
150
        $data['id'] = $gooData['id'];
151
152
        // show cover ?
153
        if (isset($gooData['printsec']) && 'frontcover' === $gooData['printsec']) {
154
            $data['couv'] = '1';
155
        }
156
157
        // page number
158
        if (!empty($gooData['pg'])) {
159
            $data['page autre'] = $gooData['pg'];
160
161
            //  pg=PAx => "page=x"
162
            if (preg_match('/^PA([0-9]+)$/', $gooData['pg'], $matches) > 0) {
163
                $data['page'] = $matches[1];
164
                unset($data['page autre']);
165
            }
166
            //  pg=PRx => "page=x|romain=1"
167
            if (preg_match('/^PR([0-9]+)$/', $gooData['pg'], $matches) > 0) {
168
                $data['page'] = $matches[1];
169
                $data['romain'] = '1';
170
                unset($data['page autre']);
171
            }
172
        }
173
        // q : keywords search / dq : quoted phrase search
174
        // affichage Google : dq ignoré si q existe
175
        if (!empty($gooData['dq']) || !empty($gooData['q'])) {
176
            $data['surligne'] = $gooData['q'] ?? $gooData['dq']; // q prévaut
177
            $data['surligne'] = self::googleUrlEncode($data['surligne']);
178
        }
179
180
        return $data;
181
    }
182
183
    /**
184
     * Instead of url_encode(). No UTF-8 encoding.
185
     *
186
     * @param string $str
187
     *
188
     * @return string
189
     */
190
    public static function googleUrlEncode(string $str): string
191
    {
192
        return str_replace(' ', '+', trim(urldecode($str)));
193
    }
194
195
    /**
196
     * Clean the google book URL from optional&tracking data.
197
     *
198
     * @param string $url
199
     *
200
     * @return string URL
201
     * @throws Exception
202
     */
203
    public static function simplifyGoogleUrl(string $url): string
204
    {
205
        if (!self::isGoogleBookURL($url)) {
206
            // not DomainException for live testing with OuvrageOptimize
207
            throw new Exception('not a Google Book URL');
208
        }
209
210
        $gooDat = self::parseGoogleBookQuery($url);
211
        if (empty($gooDat['id'])) {
212
            throw new DomainException("no GoogleBook 'id' in URL");
213
        }
214
        if (!preg_match('#[0-9A-Za-z_\-]{12}#', $gooDat['id'])) {
215
            throw new DomainException("GoogleBook 'id' malformed");
216
        }
217
218
        // clean encoding q= dq=
219
        //                if(isset($gooDat['q'])) {
220
        //                    $gooDat['q'] = self::googleUrlEncode($gooDat['q']);
221
        //                }
222
        //                if(isset($gooDat['dq'])) {
223
        //                    $gooDat['dq'] = self::googleUrlEncode($gooDat['dq']);
224
        //                }
225
226
        $dat = [];
227
        // keep only a few parameters (+'q' ?)
228
        // q : keywords search / dq : quoted phrase search
229
        $keeps = ['id', 'pg', 'printsec', 'q', 'dq'];
230
        foreach ($keeps as $keep) {
231
            if (!empty($gooDat[$keep])) {
232
                $dat[$keep] = $gooDat[$keep];
233
            }
234
        }
235
236
        // 1 exemple : https://fr.wikipedia.org/w/index.php?title=Foudre_de_Catatumbo&diff=next&oldid=168721836&diffmode=source
237
        // 1. mettre URL &dq= pour final
238
        //
239
        // 2. si q!=dq (changement ultérieur formulaire recherche) alors q= prévaut pour résultat final
240
        // 2. mettre URL &q pour final
241
        //
242
        // 3. Recherche global sur http://books.google.fr => pg= dq= (#q= avec q==dq)
243
        // 3. dans ce cas (q==dq), url final avec seulement dq= donne résultat OK
244
        //
245
        // 4 . if you use a url without &redir_esc=y#v=onepage for a book with "Preview" available,
246
        // usually &dq shows the highlighted text in full page view whereas &q shows the snippet view (so you have to
247
        // click on the snippet to see the full page).
248
        // &dq allows highlighting in books where there is "Preview" available and &pg=PTx is in the URL
249
        //
250
        // #v=onepage ou #v=snippet
251
        if (isset($dat['q']) && isset($dat['dq'])) {
252
            // si q==dq alors dq prévaut pour affichage (sinon affichage différent avec url seulement q=)
253
            if ($dat['q'] === $dat['dq']) {
254
                unset($dat['q']);
255
            } // si q!=dq (exemple : nouveaux mots clés dans formulaire recherche) alors q= prévaut pour résultat final
256
            else {
257
                unset($dat['dq']);
258
            }
259
        }
260
261
        $googleURL = self::DEFAULT_GOOGLEBOOK_URL;
262
263
        // domain .com .fr
264
        $gooDomain = self::parseGoogleDomain($url);
265
        if ($gooDomain) {
266
            $googleURL = str_replace('.com', $gooDomain, $googleURL);
267
        }
268
269
        // todo http_build_query process an urlencode, but a not encoded q= value ("fu+bar") is beautiful
270
        return $googleURL.'?'.http_build_query($dat);
271
    }
272
273
    /**
274
     * return '.fr' or '.com'.
275
     *
276
     * @param string $url
277
     *
278
     * @return string|null
279
     */
280
    private static function parseGoogleDomain(string $url): ?string
281
    {
282
        $host = parse_url($url, PHP_URL_HOST);
283
        if (!empty($host) && preg_match('#\.[a-z]{2,3}$#', $host, $matches) > 0) {
284
            // Maroc : google.co.ma (sous-domaine!!)
285
            return str_replace(['.ma', '.uk', '.au'], ['.co.ma', '.co.uk', '.com.au'], $matches[0]); // .fr
286
        }
287
288
        return null;
289
    }
290
291
    /**
292
     * Check if Google URL or wiki {Google Books} template.
293
     *
294
     * @param string $text
295
     *
296
     * @return bool
297
     */
298
    public static function isGoogleBookValue(string $text): bool
299
    {
300
        if (true === self::isGoogleBookURL($text)) {
301
            return true;
302
        }
303
        if (preg_match('#^{{[ \n]*Google (Livres|Books)[^}]+}}$#i', $text) > 0) {
304
            return true;
305
        }
306
307
        return false;
308
    }
309
310
    /**
311
     * Serialize the wiki-template.
312
     * Improvement : force param order : id/titre/...
313
     *
314
     * @param bool|null $cleanOrder
315
     *
316
     * @return string
317
     */
318
    public function serialize(?bool $cleanOrder = true): string
319
    {
320
        $text = parent::serialize();
321
322
        // Documentation suggère non affichage de ces 2 paramètres
323
        return str_replace(['id=', 'titre='], '', $text);
324
    }
325
}
326