This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Wikibase\Repo\Maintenance; |
||
4 | |||
5 | use DataValues\DecimalMath; |
||
6 | use DataValues\DecimalValue; |
||
7 | use Maintenance; |
||
8 | use MediaWiki\MediaWikiServices; |
||
9 | use MediaWiki\Sparql\SparqlClient; |
||
10 | use Wikibase\Lib\WikibaseSettings; |
||
11 | use Wikibase\Repo\WikibaseRepo; |
||
12 | |||
13 | $basePath = |
||
14 | getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; |
||
15 | require_once $basePath . '/maintenance/Maintenance.php'; |
||
16 | |||
17 | /** |
||
18 | * Update the conversion table for units. |
||
19 | * Base unit types for Wikidata: |
||
20 | * Q223662,Q208469 |
||
21 | * SI base unit,SI derived unit |
||
22 | * TODO: add support to non-SI units |
||
23 | * Example run: |
||
24 | * mwscript extensions/WikidataBuildResources/extensions/Wikibase/repo/maintenance/updateUnits.php |
||
25 | * --wiki wikidatawiki --base-unit-types Q223662,Q208469 --base-uri http://www.wikidata.org/entity/ |
||
26 | * --unit-class Q1978718 > unitConversion.json |
||
27 | * |
||
28 | * @license GPL-2.0-or-later |
||
29 | * @author Stas Malyshev |
||
30 | */ |
||
31 | class UpdateUnits extends Maintenance { |
||
32 | |||
33 | /** |
||
34 | * @var string |
||
35 | */ |
||
36 | private $baseUri; |
||
37 | |||
38 | /** |
||
39 | * Length of the base URI. |
||
40 | * Helper variable to speed up cutting it out. |
||
41 | * @var int |
||
42 | */ |
||
43 | private $baseLen; |
||
44 | |||
45 | /** |
||
46 | * @var SparqlClient |
||
47 | */ |
||
48 | private $client; |
||
49 | |||
50 | /** |
||
51 | * Should we silence the error output for tests? |
||
52 | * @var boolean |
||
53 | */ |
||
54 | public $silent; |
||
55 | |||
56 | public function __construct() { |
||
57 | parent::__construct(); |
||
58 | $this->addDescription( "Update unit conversion table." ); |
||
59 | |||
60 | $this->addOption( 'base-unit-types', 'Types of base units.', true, true ); |
||
61 | $this->addOption( 'base-uri', 'Base URI for the data.', false, true ); |
||
62 | $this->addOption( 'unit-class', 'Class for units.', false, true ); |
||
63 | $this->addOption( 'format', 'Output format "json" (default) or "csv".', false, true ); |
||
64 | $this->addOption( 'sparql', 'SPARQL endpoint URL.', false, true ); |
||
65 | $this->addOption( 'check-usage', 'Check whether unit is in use?', false ); |
||
66 | } |
||
67 | |||
68 | public function execute() { |
||
69 | if ( !WikibaseSettings::isRepoEnabled() ) { |
||
70 | $this->fatalError( "You need to have Wikibase enabled in order to use this maintenance script!" ); |
||
71 | } |
||
72 | $format = $this->getOption( 'format', 'json' ); |
||
73 | $checkUsage = $this->hasOption( 'check-usage' ); |
||
74 | |||
75 | $repo = WikibaseRepo::getDefaultInstance(); |
||
76 | $endPoint = $this->getOption( 'sparql', |
||
77 | $repo->getSettings()->getSetting( 'sparqlEndpoint' ) ); |
||
78 | if ( !$endPoint ) { |
||
79 | $this->fatalError( 'SPARQL endpoint not defined' ); |
||
80 | } |
||
81 | $this->setBaseUri( $this->getOption( 'base-uri', |
||
82 | $repo->getSettings()->getSetting( 'conceptBaseUri' ) ) ); |
||
83 | $this->client = new SparqlClient( $endPoint, MediaWikiServices::getInstance()->getHttpRequestFactory() ); |
||
84 | $this->client->appendUserAgent( __CLASS__ ); |
||
85 | |||
86 | $unitClass = $this->getOption( 'unit-class' ); |
||
87 | if ( $unitClass ) { |
||
88 | $filter = "FILTER EXISTS { ?unit wdt:P31/wdt:P279* wd:$unitClass }\n"; |
||
89 | } else { |
||
90 | $filter = ''; |
||
91 | } |
||
92 | |||
93 | // Get units usage stats. We don't care about units |
||
94 | // That have been used less than 10 times, for now |
||
95 | if ( $checkUsage ) { |
||
96 | $unitUsage = $this->getUnitUsage( 10 ); |
||
97 | } else { |
||
98 | $unitUsage = null; |
||
99 | } |
||
100 | $baseUnits = $this->getBaseUnits( $filter ); |
||
101 | |||
102 | $convertUnits = []; |
||
103 | $reconvert = []; |
||
104 | |||
105 | if ( $checkUsage ) { |
||
106 | $filter .= "FILTER EXISTS { [] wikibase:quantityUnit ?unit }\n"; |
||
107 | } |
||
108 | |||
109 | $convertableUnits = $this->getConvertableUnits( $filter ); |
||
110 | foreach ( $convertableUnits as $unit ) { |
||
0 ignored issues
–
show
|
|||
111 | $converted = |
||
112 | $this->convertUnit( $unit, $convertUnits, $baseUnits, $unitUsage, $reconvert ); |
||
113 | if ( $converted ) { |
||
114 | $unitName = substr( $unit['unit'], $this->baseLen ); |
||
115 | $convertUnits[$unitName] = $converted; |
||
116 | } |
||
117 | } |
||
118 | |||
119 | $this->reduceUnits( $reconvert, $convertUnits ); |
||
120 | |||
121 | // Add base units |
||
122 | foreach ( $baseUnits as $base => $baseData ) { |
||
123 | $convertUnits[$base] = [ |
||
124 | 'factor' => "1", |
||
125 | 'unit' => $base, |
||
126 | 'label' => $baseData['unitLabel'], |
||
127 | 'siLabel' => $baseData['unitLabel'] |
||
128 | ]; |
||
129 | } |
||
130 | |||
131 | // Sort units by Q-id, as number, to have predictable order |
||
132 | uksort( $convertUnits, |
||
133 | function ( $x, $y ) { |
||
134 | return (int)substr( $x, 1 ) - (int)substr( $y, 1 ); |
||
135 | } |
||
136 | ); |
||
137 | |||
138 | switch ( strtolower( $format ) ) { |
||
139 | case 'csv': |
||
140 | echo $this->formatCSV( $convertUnits ); |
||
141 | break; |
||
142 | case 'json': |
||
143 | echo $this->formatJSON( $convertUnits ); |
||
144 | break; |
||
145 | default: |
||
146 | $this->fatalError( 'Invalid format' ); |
||
147 | } |
||
148 | } |
||
149 | |||
150 | /** |
||
151 | * Reduce units that are not in term of base units into base units. |
||
152 | * If some units are not reducible to base units, warning will be issued. |
||
153 | * @param array $reconvert List of units to be reduced |
||
154 | * @param array &$convertUnits List of unit conversion configs, will be modified if |
||
155 | * it is possible to reduce the unit to base units. |
||
156 | */ |
||
157 | private function reduceUnits( $reconvert, &$convertUnits ) { |
||
158 | while ( $reconvert ) { |
||
0 ignored issues
–
show
The expression
$reconvert of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||
159 | $converted = false; |
||
160 | foreach ( $reconvert as $name => $unit ) { |
||
161 | $convertedUnit = $this->convertDerivedUnit( $unit, $convertUnits ); |
||
162 | if ( $convertedUnit ) { |
||
163 | $convertUnits[$name] = $convertedUnit; |
||
164 | unset( $reconvert[$name] ); |
||
165 | $converted = true; |
||
166 | } |
||
167 | } |
||
168 | // we didn't convert any on this step, no use to continue |
||
169 | // This loop will converge since on each step we will reduce |
||
170 | // the length of $reconvert until we can't do it anymore. |
||
171 | if ( !$converted ) { |
||
172 | break; |
||
173 | } |
||
174 | } |
||
175 | |||
176 | if ( $reconvert ) { |
||
0 ignored issues
–
show
The expression
$reconvert of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||
177 | // still have unconverted units |
||
178 | foreach ( $reconvert as $name => $unit ) { |
||
179 | $this->error( "Weird base unit: {$unit['unit']} reduces to {$unit['siUnit']} which is not base!" ); |
||
180 | } |
||
181 | } |
||
182 | } |
||
183 | |||
184 | /** |
||
185 | * @param string $uri |
||
186 | */ |
||
187 | public function setBaseUri( $uri ) { |
||
188 | $this->baseUri = $uri; |
||
189 | $this->baseLen = strlen( $uri ); |
||
190 | } |
||
191 | |||
192 | /** |
||
193 | * Convert unit that does not reduce to a basic unit. |
||
194 | * |
||
195 | * @param string[] $unit |
||
196 | * @param array[] $convertUnits List of units already converted |
||
197 | * |
||
198 | * @return string[]|null Converted data for the unit or null if no conversion possible. |
||
199 | */ |
||
200 | public function convertDerivedUnit( $unit, $convertUnits ) { |
||
201 | if ( isset( $convertUnits[$unit['siUnit']] ) ) { |
||
202 | // we have conversion now |
||
203 | $math = new DecimalMath(); |
||
204 | $newUnit = $convertUnits[$unit['siUnit']]; |
||
205 | $newFactor = |
||
206 | $math->product( new DecimalValue( $unit['si'] ), |
||
207 | new DecimalValue( $newUnit['factor'] ) ); |
||
208 | return [ |
||
209 | 'factor' => trim( $newFactor->getValue(), '+' ), |
||
210 | 'unit' => $newUnit['unit'], |
||
211 | 'label' => $unit['unitLabel'], |
||
212 | 'siLabel' => $newUnit['siLabel'] |
||
213 | ]; |
||
214 | } |
||
215 | return null; |
||
216 | } |
||
217 | |||
218 | /** |
||
219 | * Create conversion data for a single unit. |
||
220 | * @param string[] $unit Unit data |
||
221 | * @param string[] $convertUnits Already converted data |
||
222 | * @param array[] $baseUnits Base unit list |
||
223 | * @param string[]|null $unitUsage Unit usage data |
||
224 | * @param string[][] &$reconvert Array collecting units that require re-conversion later, |
||
225 | * due to their target unit not being base. |
||
226 | * @return string[]|null Produces conversion data for the unit or null if not possible. |
||
227 | */ |
||
228 | public function convertUnit( $unit, $convertUnits, $baseUnits, $unitUsage, &$reconvert ) { |
||
229 | $unit['unit'] = substr( $unit['unit'], $this->baseLen ); |
||
230 | $unit['siUnit'] = substr( $unit['siUnit'], $this->baseLen ); |
||
231 | |||
232 | if ( $unit['unitLabel'][0] == 'Q' ) { |
||
233 | // Skip exotic units that have no English name for now. |
||
234 | // TODO: drop this |
||
235 | $this->error( "Exotic unit: {$unit['unit']} has no English label, skipping for now." ); |
||
236 | return null; |
||
237 | } |
||
238 | |||
239 | if ( isset( $convertUnits[$unit['unit']] ) ) { |
||
240 | // done already |
||
241 | return null; |
||
242 | } |
||
243 | if ( $unit['unit'] == $unit['siUnit'] ) { |
||
244 | // base unit |
||
245 | if ( $unit['si'] != 1 ) { |
||
246 | $this->error( "Weird unit: {$unit['unit']} is {$unit['si']} of itself!" ); |
||
247 | return null; |
||
248 | } |
||
249 | if ( !isset( $baseUnits[$unit['siUnit']] ) ) { |
||
250 | $this->error( "Weird unit: {$unit['unit']} is self-referring but not base!" ); |
||
251 | return null; |
||
252 | } |
||
253 | } |
||
254 | |||
255 | if ( $unitUsage && !isset( $baseUnits[$unit['unit']] ) && !isset( $unitUsage[$unit['unit']] ) ) { |
||
256 | $this->error( "Low usage unit {$unit['unit']}, skipping..." ); |
||
257 | return null; |
||
258 | } |
||
259 | |||
260 | if ( !isset( $baseUnits[$unit['siUnit']] ) ) { |
||
261 | // target unit is not actually base |
||
262 | $reconvert[$unit['unit']] = $unit; |
||
263 | } else { |
||
264 | return [ |
||
265 | 'factor' => $unit['si'], |
||
266 | 'unit' => $unit['siUnit'], |
||
267 | // These two are just for humans, not used by actual converter |
||
268 | 'label' => $unit['unitLabel'], |
||
269 | 'siLabel' => $unit['siUnitLabel'] |
||
270 | ]; |
||
271 | } |
||
272 | |||
273 | return null; |
||
274 | } |
||
275 | |||
276 | /** |
||
277 | * Format units as JSON |
||
278 | * @param array[] $convertUnits |
||
279 | * @return string |
||
280 | */ |
||
281 | private function formatJSON( array $convertUnits ) { |
||
282 | return json_encode( $convertUnits, JSON_PRETTY_PRINT ); |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Get units that are used at least $min times. |
||
287 | * We don't care about units that have been used less than 10 times, for now. |
||
288 | * Only top 200 will be returned (though so far we don't have that many). |
||
289 | * @param int $min Minimal usage for the unit. |
||
290 | * @return string[] Array of ['unit' => Q-id, 'c' => count] |
||
291 | */ |
||
292 | private function getUnitUsage( $min ) { |
||
293 | $usageQuery = <<<UQUERY |
||
294 | SELECT ?unit (COUNT(DISTINCT ?v) as ?c) WHERE { |
||
295 | ?v wikibase:quantityUnit ?unit . |
||
296 | ?s ?p ?v . |
||
297 | FILTER(?unit != wd:Q199) |
||
298 | # Exclude currencies |
||
299 | FILTER NOT EXISTS { ?unit wdt:P31+ wd:Q8142 } |
||
300 | } GROUP BY ?unit |
||
301 | HAVING(?c >= $min) |
||
302 | ORDER BY DESC(?c) |
||
303 | LIMIT 200 |
||
304 | UQUERY; |
||
305 | $unitUsage = $this->getIDs( $usageQuery, 'unit' ); |
||
306 | $unitUsage = array_flip( $unitUsage ); |
||
307 | return $unitUsage; |
||
308 | } |
||
309 | |||
310 | /** |
||
311 | * Get list of IDs from SPARQL. |
||
312 | * @param string $sparql Query |
||
313 | * @param string $item Variable name where IDs are stored |
||
314 | * @return string[] List of entity ID strings |
||
315 | */ |
||
316 | private function getIDs( $sparql, $item ) { |
||
317 | $data = $this->client->query( $sparql ); |
||
318 | if ( $data ) { |
||
319 | return array_map( function ( $row ) use ( $item ) { |
||
320 | return str_replace( $this->baseUri, '', $row[$item] ); |
||
321 | }, $data ); |
||
322 | } |
||
323 | return []; |
||
324 | } |
||
325 | |||
326 | /** |
||
327 | * Get base units |
||
328 | * @param string $filter Unit filter |
||
329 | * @return array[] |
||
330 | */ |
||
331 | private function getBaseUnits( $filter ) { |
||
332 | $types = |
||
333 | str_replace( [ ',', 'Q' ], [ ' ', 'wd:Q' ], $this->getOption( 'base-unit-types' ) ); |
||
334 | |||
335 | $baseQuery = <<<QUERY |
||
336 | SELECT ?unit ?unitLabel WHERE { |
||
337 | VALUES ?class { $types } |
||
338 | ?unit wdt:P31 ?class . |
||
339 | $filter |
||
340 | SERVICE wikibase:label { |
||
341 | bd:serviceParam wikibase:language "en" . |
||
342 | } |
||
343 | } |
||
344 | QUERY; |
||
345 | $baseUnitsData = $this->client->query( $baseQuery ); |
||
346 | '@phan-var array[] $baseUnitsData'; |
||
347 | $baseUnits = []; |
||
348 | // arrange better lookup |
||
349 | foreach ( $baseUnitsData as $base ) { |
||
350 | $item = substr( $base['unit'], $this->baseLen ); |
||
351 | $baseUnits[$item] = $base; |
||
352 | } |
||
353 | return $baseUnits; |
||
354 | } |
||
355 | |||
356 | /** |
||
357 | * Retrieve the list of convertable units. |
||
358 | * @param string $filter |
||
359 | * @return array[]|false List of units that can be converted |
||
360 | */ |
||
361 | private function getConvertableUnits( $filter ) { |
||
362 | $unitsQuery = <<<QUERY |
||
363 | SELECT REDUCED ?unit ?si ?siUnit ?unitLabel ?siUnitLabel WHERE { |
||
364 | ?unit wdt:P31 ?type . |
||
365 | ?type wdt:P279* wd:Q47574 . |
||
366 | # Not a currency |
||
367 | FILTER (?type != wd:Q8142) |
||
368 | # Not a cardinal number |
||
369 | FILTER NOT EXISTS { ?unit wdt:P31 wd:Q163875 } |
||
370 | $filter |
||
371 | # Has conversion to SI Units |
||
372 | ?unit p:P2370/psv:P2370 [ wikibase:quantityAmount ?si; wikibase:quantityUnit ?siUnit ] . |
||
373 | SERVICE wikibase:label { |
||
374 | bd:serviceParam wikibase:language "en" . |
||
375 | } |
||
376 | # Enable this to select only units that are actually used |
||
377 | } |
||
378 | QUERY; |
||
379 | return $this->client->query( $unitsQuery ); |
||
380 | } |
||
381 | |||
382 | /** |
||
383 | * Format units as CSV |
||
384 | * @param array[] $convertUnits |
||
385 | * @return string |
||
386 | */ |
||
387 | private function formatCSV( array $convertUnits ) { |
||
388 | $str = ''; |
||
389 | foreach ( $convertUnits as $name => $data ) { |
||
390 | $str .= "$name,{$data['unit']},{$data['factor']}\n"; |
||
391 | } |
||
392 | return $str; |
||
393 | } |
||
394 | |||
395 | /** |
||
396 | * @param string $err |
||
397 | * @param int $die If > 0, go ahead and die out using this int as the code |
||
398 | */ |
||
399 | protected function error( $err, $die = 0 ) { |
||
400 | if ( !$this->silent ) { |
||
401 | parent::error( $err, $die ); |
||
402 | } elseif ( $die > 0 ) { |
||
403 | die( $die ); |
||
404 | } |
||
405 | } |
||
406 | |||
407 | } |
||
408 | |||
409 | $maintClass = UpdateUnits::class; |
||
410 | require_once RUN_MAINTENANCE_IF_MAIN; |
||
411 |
There are different options of fixing this problem.
If you want to be on the safe side, you can add an additional type-check:
If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:
Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.