Passed
Push — master ( 75e8d8...1927ca )
by Dispositif
13:16
created

CirrusSearch::saveOffsetInFile()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 1
Metric Value
eloc 3
c 1
b 0
f 1
dl 0
loc 6
ccs 0
cts 0
cp 0
rs 10
cc 1
nc 1
nop 1
crap 2
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Infrastructure;
12
13
use App\Application\InfrastructurePorts\HttpClientInterface;
14
use App\Application\InfrastructurePorts\PageListForAppInterface;
15
use App\Domain\Exceptions\ConfigException;
16
use App\Domain\InfrastructurePorts\PageListInterface;
17
use Exception;
18
use GuzzleHttp\Psr7\Response;
19
use InvalidArgumentException;
20
use Throwable;
21
22
/**
23
 * https://fr.wikipedia.org/w/api.php?action=help&modules=query%2Bsearch
24
 * https://www.mediawiki.org/wiki/Help:CirrusSearch#Insource
25
 * raw https://fr.wikipedia.org/w/api.php?action=query&list=search&srsearch=%22https://books.google%22%20insource:/\%3Cref\%3Ehttps\:\/\/books\.google/&formatversion=2&format=json
26
 * Dirty.
27
 * Class CirrusSearch
28
 *
29
 * @package App\Infrastructure
30
 */
31
class CirrusSearch implements PageListInterface, PageListForAppInterface
32
{
33
    final public const BASE_URL = 'https://fr.wikipedia.org/w/api.php';
34
    public const NAMESPACE_MAIN = 0;
35
    private const SEARCH_CONTINUE_FILENAME = __DIR__ . '/../../resources/cirrusSearch-HASH.txt'; // move to config
36
    /**
37
     * @var array|string[]
38
     */
39
    protected array $requestParams = [];
40
41
    private array $defaultParams
42
        = [
43
            'action' => 'query',
44
            'list' => 'search',
45
            'formatversion' => '2',
46
            'format' => 'json',
47
            'srnamespace' => '0',
48
            'srlimit' => '100',
49
        ];
50
    private readonly HttpClientInterface $client;
51
52
    /**
53
     * $options : "continue" => true for continue search
54
     */
55
    public function __construct(private readonly array $params, private ?array $options = [])
56
    {
57
        $this->client = ServiceFactory::getHttpClient();
0 ignored issues
show
Bug introduced by
The property client is declared read-only in App\Infrastructure\CirrusSearch.
Loading history...
58
    }
59
60
    /**
61
     * todo move to ApiSearch
62
     *
63
     * @return array
64
     * @throws ConfigException
65
     */
66
    public function getPageTitles(): array
67
    {
68
        $arrayResp = $this->httpRequest();
69
70
        if (($this->options['continue'] ?? false) &&  (!empty($arrayResp['continue']['sroffset']))) {
71
            $continueOffset = (int) $arrayResp['continue']['sroffset'];
72
            $this->saveOffsetInFile($continueOffset);
73
        }
74
        if (!isset($arrayResp['query']) || empty($arrayResp['query']['search'])) {
75
            return [];
76
        }
77
        $results = $arrayResp['query']['search'];
78
79
        $titles = [];
80
        foreach ($results as $res) {
81
            if (!empty($res['title'])) {
82
                $titles[] = trim((string) $res['title']); // trim utile ?
83
            }
84
        }
85
86
        if (isset($this->options['reverse']) && $this->options['reverse'] === true) {
87
            krsort($titles);
88
        }
89
90
        return $titles;
91
    }
92
93
    public function setOptions(?array $options): void
94
    {
95
        $this->options = $options;
96
    }
97
98
    private function getURL(): string
99
    {
100
        if (empty($this->params['srsearch'])) {
101
            throw new InvalidArgumentException('No "srsearch" argument in params.');
102
        }
103
104
        $this->requestParams = array_merge($this->defaultParams, $this->params);
105
        if ($this->options['continue'] ?? false) {
106
            $this->requestParams['sroffset'] = $this->getOffsetFromFile($this->requestParams);
107
            echo sprintf("Extract offset %s from file \n", $this->requestParams['sroffset']);
108
        }
109
        // RFC3986 : space => %20
110
        $query = http_build_query($this->requestParams, 'bla', '&', PHP_QUERY_RFC3986);
111
112
        return self::BASE_URL.'?'.$query;
113
    }
114
115
    /**
116
     * todo Wiki API ?
117
     *
118
     * @throws ConfigException
119
     * @throws Exception
120
     */
121
    private function httpRequest(): array
122
    {
123
        $e = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $e is dead and can be removed.
Loading history...
124
        $url = $this->getURL();
125
        if ($url === '' || $url === '0') {
126
            throw new ConfigException('CirrusSearch null URL');
127
        }
128
129
        // improve with curl options ?
130
        $response = $this->client->get($url);
131
        /**
132
         * @var $response Response
133
         */
134
        if ($response->getStatusCode() !== 200) {
135
            throw new Exception(
136
                'CirrusSearch error : '.$response->getStatusCode().' '.$response->getReasonPhrase()
137
            );
138
        }
139
        $json = $response->getBody()->getContents();
140
        if (empty($json)) {
141
            return [];
142
        }
143
        try {
144
            $array = json_decode((string) $json, true, 512, JSON_THROW_ON_ERROR);
145
        } catch (Throwable $e) {
146
            throw new Exception($e->getMessage(), $e->getCode(), $e);
147
        }
148
149
        return $array;
150
    }
151
152
    private function getOffsetFromFile(array $allParams): int
153
    {
154
        $hash = $this->hashSearchParams($allParams);
155
        $file = str_replace('HASH', $hash, self::SEARCH_CONTINUE_FILENAME);
156
        if (!file_exists($file)) {
157
            return 0;
158
        }
159
160
        return (int) trim(file_get_contents($file));
161
    }
162
163
    private function saveOffsetInFile(int $continueOffset = 0): void
164
    {
165
        $hash = $this->hashSearchParams($this->requestParams);
166
        $file = str_replace('HASH', $hash, self::SEARCH_CONTINUE_FILENAME);
167
168
        file_put_contents($file, $continueOffset);
169
    }
170
171
    private function hashSearchParams(array $params): string
172
    {
173
        if (empty($params)) {
174
            throw new InvalidArgumentException('No search argument in params.');
175
        }
176
        if (isset($params['sroffset'])) {
177
            unset($params['sroffset']);
178
        }
179
180
        return md5(implode('', $params));
181
    }
182
}
183