|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file contains only the ApiHelper class. |
|
4
|
|
|
*/ |
|
5
|
|
|
|
|
6
|
|
|
namespace AppBundle\Helper; |
|
7
|
|
|
|
|
8
|
|
|
use Mediawiki\Api\MediawikiApi; |
|
9
|
|
|
use Mediawiki\Api\SimpleRequest; |
|
10
|
|
|
use Mediawiki\Api\FluentRequest; |
|
11
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
|
12
|
|
|
use Symfony\Component\Config\Definition\Exception\Exception; |
|
13
|
|
|
use Symfony\Component\DependencyInjection\ContainerInterface; |
|
14
|
|
|
use Xtools\ProjectRepository; |
|
15
|
|
|
|
|
16
|
|
|
/** |
|
17
|
|
|
* This is a helper for calling the MediaWiki API. |
|
18
|
|
|
*/ |
|
19
|
|
|
class ApiHelper extends HelperBase |
|
20
|
|
|
{ |
|
21
|
|
|
/** @var MediawikiApi The API object. */ |
|
22
|
|
|
private $api; |
|
23
|
|
|
|
|
24
|
|
|
/** @var CacheItemPoolInterface The cache. */ |
|
25
|
|
|
protected $cache; |
|
26
|
|
|
|
|
27
|
|
|
/** @var ContainerInterface The DI container. */ |
|
28
|
|
|
protected $container; |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* ApiHelper constructor. |
|
32
|
|
|
* @param ContainerInterface $container |
|
33
|
|
|
*/ |
|
34
|
1 |
|
public function __construct(ContainerInterface $container) |
|
35
|
|
|
{ |
|
36
|
1 |
|
$this->container = $container; |
|
37
|
1 |
|
$this->cache = $container->get('cache.app'); |
|
38
|
1 |
|
} |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* Set up the MediawikiApi object for the given project. |
|
42
|
|
|
* |
|
43
|
|
|
* @param string $project |
|
44
|
|
|
*/ |
|
45
|
|
|
private function setUp($project) |
|
46
|
|
|
{ |
|
47
|
|
|
if (!$this->api instanceof MediawikiApi) { |
|
48
|
|
|
$project = ProjectRepository::getProject($project, $this->container); |
|
49
|
|
|
$this->api = $project->getApi(); |
|
50
|
|
|
} |
|
51
|
|
|
} |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* Get HTML display titles of a set of pages (or the normal title if there's no display title). |
|
55
|
|
|
* This will send t/50 API requests where t is the number of titles supplied. |
|
56
|
|
|
* @param string $project The project. |
|
57
|
|
|
* @param string[] $pageTitles The titles to fetch. |
|
58
|
|
|
* @return string[] Keys are the original supplied title, and values are the display titles. |
|
59
|
|
|
*/ |
|
60
|
|
|
public function displayTitles($project, $pageTitles) |
|
61
|
|
|
{ |
|
62
|
|
|
$this->setUp($project); |
|
63
|
|
|
$displayTitles = []; |
|
64
|
|
|
$numPages = count($pageTitles); |
|
65
|
|
|
for ($n = 0; $n < $numPages; $n += 50) { |
|
66
|
|
|
$titleSlice = array_slice($pageTitles, $n, 50); |
|
67
|
|
|
$params = [ |
|
68
|
|
|
'prop' => 'info|pageprops', |
|
69
|
|
|
'inprop' => 'displaytitle', |
|
70
|
|
|
'titles' => join('|', $titleSlice), |
|
71
|
|
|
]; |
|
72
|
|
|
$query = new SimpleRequest('query', $params); |
|
73
|
|
|
$result = $this->api->postRequest($query); |
|
74
|
|
|
|
|
75
|
|
|
// Extract normalization info. |
|
76
|
|
|
$normalized = []; |
|
77
|
|
|
if (isset($result['query']['normalized'])) { |
|
78
|
|
|
array_map( |
|
79
|
|
|
function ($e) use (&$normalized) { |
|
80
|
|
|
$normalized[$e['to']] = $e['from']; |
|
81
|
|
|
}, |
|
82
|
|
|
$result['query']['normalized'] |
|
83
|
|
|
); |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
|
// Match up the normalized titles with the display titles and the original titles. |
|
87
|
|
|
foreach ($result['query']['pages'] as $pageInfo) { |
|
88
|
|
|
$displayTitle = isset($pageInfo['pageprops']['displaytitle']) |
|
89
|
|
|
? $pageInfo['pageprops']['displaytitle'] |
|
90
|
|
|
: $pageInfo['title']; |
|
91
|
|
|
$origTitle = isset($normalized[$pageInfo['title']]) |
|
92
|
|
|
? $normalized[$pageInfo['title']] : $pageInfo['title']; |
|
93
|
|
|
$displayTitles[$origTitle] = $displayTitle; |
|
94
|
|
|
} |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
return $displayTitles; |
|
98
|
|
|
} |
|
99
|
|
|
|
|
100
|
|
|
/** |
|
101
|
|
|
* Make mass API requests to MediaWiki API |
|
102
|
|
|
* The API normally limits to 500 pages, but gives you a 'continue' value |
|
103
|
|
|
* to finish iterating through the resource. |
|
104
|
|
|
* Adapted from https://github.com/MusikAnimal/pageviews |
|
105
|
|
|
* @param array $params Associative array of params to pass to API |
|
106
|
|
|
* @param string $project Project to query, e.g. en.wikipedia.org |
|
107
|
|
|
* @param string|func $dataKey The key for the main chunk of data, in the query hash |
|
108
|
|
|
* (e.g. 'categorymembers' for API:Categorymembers). |
|
109
|
|
|
* If this is a function it is given the response data, |
|
110
|
|
|
* and expected to return the data we want to concatentate. |
|
111
|
|
|
* @param string [$continueKey] the key to look in the continue hash, if present |
|
112
|
|
|
* (e.g. 'cmcontinue' for API:Categorymembers) |
|
113
|
|
|
* @param integer [$limit] Max number of pages to fetch |
|
114
|
|
|
* @return array Associative array with data |
|
115
|
|
|
*/ |
|
116
|
|
|
public function massApi($params, $project, $dataKey, $continueKey = 'continue', $limit = 5000) |
|
117
|
|
|
{ |
|
118
|
|
|
$this->setUp($project); |
|
119
|
|
|
|
|
120
|
|
|
// Passed by reference to massApiInternal so we can keep track of |
|
121
|
|
|
// everything we need during the recursive calls |
|
122
|
|
|
// The magically essential part here is $data['promise'] which we'll |
|
123
|
|
|
// wait to be resolved |
|
124
|
|
|
$data = [ |
|
125
|
|
|
'params' => $params, |
|
126
|
|
|
'project' => $project, |
|
127
|
|
|
'continueKey' => $continueKey, |
|
128
|
|
|
'dataKey' => $dataKey, |
|
129
|
|
|
'limit' => $limit, |
|
130
|
|
|
'resolveData' => [ |
|
131
|
|
|
'pages' => [] |
|
132
|
|
|
], |
|
133
|
|
|
'continueValue' => null, |
|
134
|
|
|
'promise' => new \GuzzleHttp\Promise\Promise(), |
|
135
|
|
|
]; |
|
136
|
|
|
|
|
137
|
|
|
// wait for all promises to complete, even if some of them fail |
|
138
|
|
|
\GuzzleHttp\Promise\settle($this->massApiInternal($data))->wait(); |
|
139
|
|
|
|
|
140
|
|
|
return $data['resolveData']; |
|
141
|
|
|
} |
|
142
|
|
|
|
|
143
|
|
|
/** |
|
144
|
|
|
* Internal function used by massApi() to make recursive calls |
|
145
|
|
|
* @param array &$data Everything we need to keep track of, as defined in massApi() |
|
146
|
|
|
* @return null Nothing. $data['promise']->then is used to continue flow of |
|
147
|
|
|
* execution after all recursive calls are complete |
|
148
|
|
|
*/ |
|
149
|
|
|
private function massApiInternal(&$data) |
|
150
|
|
|
{ |
|
151
|
|
|
$requestData = array_merge([ |
|
152
|
|
|
'action' => 'query', |
|
153
|
|
|
'format' => 'json', |
|
154
|
|
|
'formatversion' => '2', |
|
155
|
|
|
], $data['params']); |
|
156
|
|
|
|
|
157
|
|
|
if ($data['continueValue']) { |
|
158
|
|
|
$requestData[$data['continueKey']] = $data['continueValue']; |
|
159
|
|
|
} |
|
160
|
|
|
|
|
161
|
|
|
$query = FluentRequest::factory()->setAction('query')->setParams($requestData); |
|
162
|
|
|
$innerPromise = $this->api->getRequestAsync($query); |
|
163
|
|
|
|
|
164
|
|
|
$innerPromise->then(function ($result) use (&$data) { |
|
165
|
|
|
// some failures come back as 200s, so we still resolve and let the outer function handle it |
|
166
|
|
|
if (isset($result['error']) || !isset($result['query'])) { |
|
167
|
|
|
return $data['promise']->resolve($data); |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
$dataKey = $data['dataKey']; |
|
171
|
|
|
$isFinished = false; |
|
|
|
|
|
|
172
|
|
|
|
|
173
|
|
|
// allow custom function to parse the data we want, if provided |
|
174
|
|
|
if (is_callable($dataKey)) { |
|
175
|
|
|
$data['resolveData']['pages'] = array_merge( |
|
176
|
|
|
$data['resolveData']['pages'], |
|
177
|
|
|
$data['dataKey']($result['query']) |
|
178
|
|
|
); |
|
179
|
|
|
$isFinished = count($data['resolveData']['pages']) >= $data['limit']; |
|
180
|
|
|
} else { |
|
181
|
|
|
// append new data to data from last request. We might want both 'pages' and dataKey |
|
182
|
|
|
if (isset($result['query']['pages'])) { |
|
183
|
|
|
$data['resolveData']['pages'] = array_merge( |
|
184
|
|
|
$data['resolveData']['pages'], |
|
185
|
|
|
$result['query']['pages'] |
|
186
|
|
|
); |
|
187
|
|
|
} |
|
188
|
|
|
if ($result['query'][$dataKey]) { |
|
189
|
|
|
$newValues = isset($data['resolveData'][$dataKey]) ? $data['resolveData'][$dataKey] : []; |
|
190
|
|
|
$data['resolveData'][$dataKey] = array_merge($newValues, $result['query'][$dataKey]); |
|
191
|
|
|
} |
|
192
|
|
|
|
|
193
|
|
|
// If pages is not the collection we want, it will be either an empty array or one entry with |
|
194
|
|
|
// basic page info depending on what API we're hitting. So resolveData[dataKey] will hit the limit |
|
195
|
|
|
$isFinished = count($data['resolveData']['pages']) >= $data['limit'] || |
|
196
|
|
|
count($data['resolveData'][$dataKey]) >= $data['limit']; |
|
197
|
|
|
} |
|
198
|
|
|
|
|
199
|
|
|
// make recursive call if needed, waiting 100ms |
|
200
|
|
|
if (!$isFinished && isset($result['continue']) && isset($result['continue'][$data['continueKey']])) { |
|
201
|
|
|
usleep(100000); |
|
202
|
|
|
$data['continueValue'] = $result['continue'][$data['continueKey']]; |
|
203
|
|
|
return $this->massApiInternal($data); |
|
204
|
|
|
} else { |
|
205
|
|
|
// indicate there were more entries than the limit |
|
206
|
|
|
if (isset($result['continue'])) { |
|
207
|
|
|
$data['resolveData']['continue'] = true; |
|
208
|
|
|
} |
|
209
|
|
|
$data['promise']->resolve($data); |
|
210
|
|
|
} |
|
211
|
|
|
}); |
|
212
|
|
|
} |
|
213
|
|
|
} |
|
214
|
|
|
|
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVarassignment in line 1 and the$higherassignment in line 2 are dead. The first because$myVaris never used and the second because$higheris always overwritten for every possible time line.