Passed
Push — dev ( 4ef148...eba0dc )
by Dispositif
07:17
created

WstatImport::getPageTitles()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 0

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 0
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 2
rs 10
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Infrastructure;
11
12
use Exception;
13
use GuzzleHttp\Client;
14
15
/**
16
 * Data import from https://wstat.fr (frwiki daily dump parsing).
17
 * https://wstat.fr/template/index.php?title=Ouvrage&query=inclusions&param=isbn&start=50000&limit=50&format=json
18
 * Class WstatImport.
19
 */
20
class WstatImport implements PageListInterface
21
{
22
    const MAX_IMPORT = 50000;
23
24
    private $params = [];
25
26
    private $max = 100;
27
28
    private $client;
29
30
    public function __construct(Client $client, ?array $params = null, ?int $max = 500)
31
    {
32
        $this->client = $client;
33
        $this->max = min(self::MAX_IMPORT, $max);
34
35
        //example
36
        // "nom de page" : https://wstat.fr/template/index.php?title=Ouvrage&query=inclusions-title&start=105000&limit=5000
37
        // "modèle complet" : https://wstat.fr/template/index.php?title=Ouvrage&query=inclusions&start=105000&limit=5000
38
        if (!$params) {
39
            $params = [
40
                'title' => 'Ouvrage',
41
                'query' => 'inclusions-title',
42
//                'param' => 'isbn',
43
                'start' => 50000,
44
                'limit' => 5000,
45
            ];
46
        }
47
        $this->params = $params;
48
    }
49
50
    public function getUrl()
51
    {
52
        $this->params['format'] = 'json';
53
54
        return 'https://wstat.fr/template/index.php?'.http_build_query($this->params);
55
    }
56
57
    /**
58
     * @return array [ ['title' => ..., 'template' => ...] ]
59
     *
60
     * @throws Exception
61
     */
62
    public function getData(): array
63
    {
64
        $data = [];
65
        $flag = true;
66
        while ($flag) {
67
            $json = $this->import($this->getUrl());
68
            $raw = json_decode($json, true);
69
            if (empty($raw)) {
70
                return [];
71
            }
72
            $data = array_merge($data, $this->parsingWstatData($raw));
73
            echo count($data)." titles\n";
74
            if ($this->max <= 0) {
75
                $flag = false;
0 ignored issues
show
Unused Code introduced by
The assignment to $flag is dead and can be removed.
Loading history...
76
77
                break;
78
            }
79
80
            // next page initialisation
81
            $this->params['start'] = (intval($this->params['start']) + $this->params['limit']);
82
            sleep(3);
83
        }
84
85
        return $data;
86
    }
87
88
    /**
89
     * Explode raw string.
90
     *
91
     * @param array $raw
92
     *
93
     * @return array [['title' => ..., 'template' => ...]]
94
     */
95
    private function parsingWstatData(array $raw): array
96
    {
97
        // Generator ?
98
        // Alexandre S. Giffard|{{Ouvrage|langue=|auteur1=|prénom...
99
        $data = [];
100
        foreach ($raw as $line) {
101
            // end of page ?
102
            if ('<!-- + -->' === $line) {
103
                continue;
104
            }
105
            $this->max = ($this->max - 1);
106
107
            // validate and explode wstat data
108
            $pos = mb_strpos($line, '|', 0);
109
            if (false === $pos || 0 === $pos) {
110
                continue;
111
            }
112
            $title = trim(mb_substr($line, 0, $pos));
113
            $template = trim(mb_substr($line, $pos + 1));
114
            $data[] = ['title' => $title, 'template' => $template];
115
        }
116
117
        return (array) $data;
118
    }
119
120
    /**
121
     * @param string $url
122
     *
123
     * @return string
124
     *
125
     * @throws Exception
126
     */
127
    private function import(string $url)
128
    {
129
        $response = $this->client->get($url);
130
        if (200 !== $response->getStatusCode()) {
131
            throw new Exception(sprintf('Error code: %s reason: %s', $response->getStatusCode(), $response->getReasonPhrase()));
132
        }
133
134
        return $response->getBody()->getContents();
135
    }
136
137
    public function getPageTitles(): array
138
    {
139
        // TODO: Implement getPageTitles() method.
140
    }
1 ignored issue
show
Bug Best Practice introduced by
In this branch, the function will implicitly return null which is incompatible with the type-hinted return array. Consider adding a return statement or allowing null as return value.

For hinted functions/methods where all return statements with the correct type are only reachable via conditions, ?null? gets implicitly returned which may be incompatible with the hinted type. Let?s take a look at an example:

interface ReturnsInt {
    public function returnsIntHinted(): int;
}

class MyClass implements ReturnsInt {
    public function returnsIntHinted(): int
    {
        if (foo()) {
            return 123;
        }
        // here: null is implicitly returned
    }
}
Loading history...
141
}
142