1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Wikibase\Repo\Maintenance; |
4
|
|
|
|
5
|
|
|
use DataValues\DecimalMath; |
6
|
|
|
use DataValues\DecimalValue; |
7
|
|
|
use Maintenance; |
8
|
|
|
use MediaWiki\MediaWikiServices; |
9
|
|
|
use MediaWiki\Sparql\SparqlClient; |
10
|
|
|
use Wikibase\Lib\WikibaseSettings; |
11
|
|
|
use Wikibase\Repo\WikibaseRepo; |
12
|
|
|
|
13
|
|
|
$basePath = |
14
|
|
|
getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; |
15
|
|
|
require_once $basePath . '/maintenance/Maintenance.php'; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* Update the conversion table for units. |
19
|
|
|
* Base unit types for Wikidata: |
20
|
|
|
* Q223662,Q208469 |
21
|
|
|
* SI base unit,SI derived unit |
22
|
|
|
* TODO: add support to non-SI units |
23
|
|
|
* Example run: |
24
|
|
|
* mwscript extensions/WikidataBuildResources/extensions/Wikibase/repo/maintenance/updateUnits.php |
25
|
|
|
* --wiki wikidatawiki --base-unit-types Q223662,Q208469 --base-uri http://www.wikidata.org/entity/ |
26
|
|
|
* --unit-class Q1978718 > unitConversion.json |
27
|
|
|
* |
28
|
|
|
* @license GPL-2.0-or-later |
29
|
|
|
* @author Stas Malyshev |
30
|
|
|
*/ |
31
|
|
|
class UpdateUnits extends Maintenance { |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @var string |
35
|
|
|
*/ |
36
|
|
|
private $baseUri; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Length of the base URI. |
40
|
|
|
* Helper variable to speed up cutting it out. |
41
|
|
|
* @var int |
42
|
|
|
*/ |
43
|
|
|
private $baseLen; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @var SparqlClient |
47
|
|
|
*/ |
48
|
|
|
private $client; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* Should we silence the error output for tests? |
52
|
|
|
* @var boolean |
53
|
|
|
*/ |
54
|
|
|
public $silent; |
55
|
|
|
|
56
|
|
|
public function __construct() { |
57
|
|
|
parent::__construct(); |
58
|
|
|
$this->addDescription( "Update unit conversion table." ); |
59
|
|
|
|
60
|
|
|
$this->addOption( 'base-unit-types', 'Types of base units.', true, true ); |
61
|
|
|
$this->addOption( 'base-uri', 'Base URI for the data.', false, true ); |
62
|
|
|
$this->addOption( 'unit-class', 'Class for units.', false, true ); |
63
|
|
|
$this->addOption( 'format', 'Output format "json" (default) or "csv".', false, true ); |
64
|
|
|
$this->addOption( 'sparql', 'SPARQL endpoint URL.', false, true ); |
65
|
|
|
$this->addOption( 'check-usage', 'Check whether unit is in use?', false ); |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
public function execute() { |
69
|
|
|
if ( !WikibaseSettings::isRepoEnabled() ) { |
70
|
|
|
$this->fatalError( "You need to have Wikibase enabled in order to use this maintenance script!" ); |
71
|
|
|
} |
72
|
|
|
$format = $this->getOption( 'format', 'json' ); |
73
|
|
|
$checkUsage = $this->hasOption( 'check-usage' ); |
74
|
|
|
|
75
|
|
|
$repo = WikibaseRepo::getDefaultInstance(); |
76
|
|
|
$endPoint = $this->getOption( 'sparql', |
77
|
|
|
$repo->getSettings()->getSetting( 'sparqlEndpoint' ) ); |
78
|
|
|
if ( !$endPoint ) { |
79
|
|
|
$this->fatalError( 'SPARQL endpoint not defined' ); |
80
|
|
|
} |
81
|
|
|
$this->setBaseUri( $this->getOption( 'base-uri', |
82
|
|
|
$repo->getSettings()->getSetting( 'conceptBaseUri' ) ) ); |
83
|
|
|
$this->client = new SparqlClient( $endPoint, MediaWikiServices::getInstance()->getHttpRequestFactory() ); |
84
|
|
|
$this->client->appendUserAgent( __CLASS__ ); |
85
|
|
|
|
86
|
|
|
$unitClass = $this->getOption( 'unit-class' ); |
87
|
|
|
if ( $unitClass ) { |
88
|
|
|
$filter = "FILTER EXISTS { ?unit wdt:P31/wdt:P279* wd:$unitClass }\n"; |
89
|
|
|
} else { |
90
|
|
|
$filter = ''; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
// Get units usage stats. We don't care about units |
94
|
|
|
// That have been used less than 10 times, for now |
95
|
|
|
if ( $checkUsage ) { |
96
|
|
|
$unitUsage = $this->getUnitUsage( 10 ); |
97
|
|
|
} else { |
98
|
|
|
$unitUsage = null; |
99
|
|
|
} |
100
|
|
|
$baseUnits = $this->getBaseUnits( $filter ); |
101
|
|
|
|
102
|
|
|
$convertUnits = []; |
103
|
|
|
$reconvert = []; |
104
|
|
|
|
105
|
|
|
if ( $checkUsage ) { |
106
|
|
|
$filter .= "FILTER EXISTS { [] wikibase:quantityUnit ?unit }\n"; |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
$convertableUnits = $this->getConvertableUnits( $filter ); |
110
|
|
|
foreach ( $convertableUnits as $unit ) { |
|
|
|
|
111
|
|
|
$converted = |
112
|
|
|
$this->convertUnit( $unit, $convertUnits, $baseUnits, $unitUsage, $reconvert ); |
113
|
|
|
if ( $converted ) { |
114
|
|
|
$unitName = substr( $unit['unit'], $this->baseLen ); |
115
|
|
|
$convertUnits[$unitName] = $converted; |
116
|
|
|
} |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
$this->reduceUnits( $reconvert, $convertUnits ); |
120
|
|
|
|
121
|
|
|
// Add base units |
122
|
|
|
foreach ( $baseUnits as $base => $baseData ) { |
123
|
|
|
$convertUnits[$base] = [ |
124
|
|
|
'factor' => "1", |
125
|
|
|
'unit' => $base, |
126
|
|
|
'label' => $baseData['unitLabel'], |
127
|
|
|
'siLabel' => $baseData['unitLabel'] |
128
|
|
|
]; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
// Sort units by Q-id, as number, to have predictable order |
132
|
|
|
uksort( $convertUnits, |
133
|
|
|
function ( $x, $y ) { |
134
|
|
|
return (int)substr( $x, 1 ) - (int)substr( $y, 1 ); |
135
|
|
|
} |
136
|
|
|
); |
137
|
|
|
|
138
|
|
|
switch ( strtolower( $format ) ) { |
139
|
|
|
case 'csv': |
140
|
|
|
echo $this->formatCSV( $convertUnits ); |
141
|
|
|
break; |
142
|
|
|
case 'json': |
143
|
|
|
echo $this->formatJSON( $convertUnits ); |
144
|
|
|
break; |
145
|
|
|
default: |
146
|
|
|
$this->fatalError( 'Invalid format' ); |
147
|
|
|
} |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Reduce units that are not in term of base units into base units. |
152
|
|
|
* If some units are not reducible to base units, warning will be issued. |
153
|
|
|
* @param array $reconvert List of units to be reduced |
154
|
|
|
* @param array &$convertUnits List of unit conversion configs, will be modified if |
155
|
|
|
* it is possible to reduce the unit to base units. |
156
|
|
|
*/ |
157
|
|
|
private function reduceUnits( $reconvert, &$convertUnits ) { |
158
|
|
|
while ( $reconvert ) { |
|
|
|
|
159
|
|
|
$converted = false; |
160
|
|
|
foreach ( $reconvert as $name => $unit ) { |
161
|
|
|
$convertedUnit = $this->convertDerivedUnit( $unit, $convertUnits ); |
162
|
|
|
if ( $convertedUnit ) { |
163
|
|
|
$convertUnits[$name] = $convertedUnit; |
164
|
|
|
unset( $reconvert[$name] ); |
165
|
|
|
$converted = true; |
166
|
|
|
} |
167
|
|
|
} |
168
|
|
|
// we didn't convert any on this step, no use to continue |
169
|
|
|
// This loop will converge since on each step we will reduce |
170
|
|
|
// the length of $reconvert until we can't do it anymore. |
171
|
|
|
if ( !$converted ) { |
172
|
|
|
break; |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
if ( $reconvert ) { |
|
|
|
|
177
|
|
|
// still have unconverted units |
178
|
|
|
foreach ( $reconvert as $name => $unit ) { |
179
|
|
|
$this->error( "Weird base unit: {$unit['unit']} reduces to {$unit['siUnit']} which is not base!" ); |
180
|
|
|
} |
181
|
|
|
} |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* @param string $uri |
186
|
|
|
*/ |
187
|
|
|
public function setBaseUri( $uri ) { |
188
|
|
|
$this->baseUri = $uri; |
189
|
|
|
$this->baseLen = strlen( $uri ); |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* Convert unit that does not reduce to a basic unit. |
194
|
|
|
* |
195
|
|
|
* @param string[] $unit |
196
|
|
|
* @param array[] $convertUnits List of units already converted |
197
|
|
|
* |
198
|
|
|
* @return string[]|null Converted data for the unit or null if no conversion possible. |
199
|
|
|
*/ |
200
|
|
|
public function convertDerivedUnit( $unit, $convertUnits ) { |
201
|
|
|
if ( isset( $convertUnits[$unit['siUnit']] ) ) { |
202
|
|
|
// we have conversion now |
203
|
|
|
$math = new DecimalMath(); |
204
|
|
|
$newUnit = $convertUnits[$unit['siUnit']]; |
205
|
|
|
$newFactor = |
206
|
|
|
$math->product( new DecimalValue( $unit['si'] ), |
207
|
|
|
new DecimalValue( $newUnit['factor'] ) ); |
208
|
|
|
return [ |
209
|
|
|
'factor' => trim( $newFactor->getValue(), '+' ), |
210
|
|
|
'unit' => $newUnit['unit'], |
211
|
|
|
'label' => $unit['unitLabel'], |
212
|
|
|
'siLabel' => $newUnit['siLabel'] |
213
|
|
|
]; |
214
|
|
|
} |
215
|
|
|
return null; |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
/** |
219
|
|
|
* Create conversion data for a single unit. |
220
|
|
|
* @param string[] $unit Unit data |
221
|
|
|
* @param string[] $convertUnits Already converted data |
222
|
|
|
* @param array[] $baseUnits Base unit list |
223
|
|
|
* @param string[]|null $unitUsage Unit usage data |
224
|
|
|
* @param string[][] &$reconvert Array collecting units that require re-conversion later, |
225
|
|
|
* due to their target unit not being base. |
226
|
|
|
* @return string[]|null Produces conversion data for the unit or null if not possible. |
227
|
|
|
*/ |
228
|
|
|
public function convertUnit( $unit, $convertUnits, $baseUnits, $unitUsage, &$reconvert ) { |
229
|
|
|
$unit['unit'] = substr( $unit['unit'], $this->baseLen ); |
230
|
|
|
$unit['siUnit'] = substr( $unit['siUnit'], $this->baseLen ); |
231
|
|
|
|
232
|
|
|
if ( $unit['unitLabel'][0] == 'Q' ) { |
233
|
|
|
// Skip exotic units that have no English name for now. |
234
|
|
|
// TODO: drop this |
235
|
|
|
$this->error( "Exotic unit: {$unit['unit']} has no English label, skipping for now." ); |
236
|
|
|
return null; |
237
|
|
|
} |
238
|
|
|
|
239
|
|
|
if ( isset( $convertUnits[$unit['unit']] ) ) { |
240
|
|
|
// done already |
241
|
|
|
return null; |
242
|
|
|
} |
243
|
|
|
if ( $unit['unit'] == $unit['siUnit'] ) { |
244
|
|
|
// base unit |
245
|
|
|
if ( $unit['si'] != 1 ) { |
246
|
|
|
$this->error( "Weird unit: {$unit['unit']} is {$unit['si']} of itself!" ); |
247
|
|
|
return null; |
248
|
|
|
} |
249
|
|
|
if ( !isset( $baseUnits[$unit['siUnit']] ) ) { |
250
|
|
|
$this->error( "Weird unit: {$unit['unit']} is self-referring but not base!" ); |
251
|
|
|
return null; |
252
|
|
|
} |
253
|
|
|
} |
254
|
|
|
|
255
|
|
|
if ( $unitUsage && !isset( $baseUnits[$unit['unit']] ) && !isset( $unitUsage[$unit['unit']] ) ) { |
256
|
|
|
$this->error( "Low usage unit {$unit['unit']}, skipping..." ); |
257
|
|
|
return null; |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
if ( !isset( $baseUnits[$unit['siUnit']] ) ) { |
261
|
|
|
// target unit is not actually base |
262
|
|
|
$reconvert[$unit['unit']] = $unit; |
263
|
|
|
} else { |
264
|
|
|
return [ |
265
|
|
|
'factor' => $unit['si'], |
266
|
|
|
'unit' => $unit['siUnit'], |
267
|
|
|
// These two are just for humans, not used by actual converter |
268
|
|
|
'label' => $unit['unitLabel'], |
269
|
|
|
'siLabel' => $unit['siUnitLabel'] |
270
|
|
|
]; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
return null; |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
/** |
277
|
|
|
* Format units as JSON |
278
|
|
|
* @param array[] $convertUnits |
279
|
|
|
* @return string |
280
|
|
|
*/ |
281
|
|
|
private function formatJSON( array $convertUnits ) { |
282
|
|
|
return json_encode( $convertUnits, JSON_PRETTY_PRINT ); |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
/** |
286
|
|
|
* Get units that are used at least $min times. |
287
|
|
|
* We don't care about units that have been used less than 10 times, for now. |
288
|
|
|
* Only top 200 will be returned (though so far we don't have that many). |
289
|
|
|
* @param int $min Minimal usage for the unit. |
290
|
|
|
* @return string[] Array of ['unit' => Q-id, 'c' => count] |
291
|
|
|
*/ |
292
|
|
|
private function getUnitUsage( $min ) { |
293
|
|
|
$usageQuery = <<<UQUERY |
294
|
|
|
SELECT ?unit (COUNT(DISTINCT ?v) as ?c) WHERE { |
295
|
|
|
?v wikibase:quantityUnit ?unit . |
296
|
|
|
?s ?p ?v . |
297
|
|
|
FILTER(?unit != wd:Q199) |
298
|
|
|
# Exclude currencies |
299
|
|
|
FILTER NOT EXISTS { ?unit wdt:P31+ wd:Q8142 } |
300
|
|
|
} GROUP BY ?unit |
301
|
|
|
HAVING(?c >= $min) |
302
|
|
|
ORDER BY DESC(?c) |
303
|
|
|
LIMIT 200 |
304
|
|
|
UQUERY; |
305
|
|
|
$unitUsage = $this->getIDs( $usageQuery, 'unit' ); |
306
|
|
|
$unitUsage = array_flip( $unitUsage ); |
307
|
|
|
return $unitUsage; |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
/** |
311
|
|
|
* Get list of IDs from SPARQL. |
312
|
|
|
* @param string $sparql Query |
313
|
|
|
* @param string $item Variable name where IDs are stored |
314
|
|
|
* @return string[] List of entity ID strings |
315
|
|
|
*/ |
316
|
|
|
private function getIDs( $sparql, $item ) { |
317
|
|
|
$data = $this->client->query( $sparql ); |
318
|
|
|
if ( $data ) { |
319
|
|
|
return array_map( function ( $row ) use ( $item ) { |
320
|
|
|
return str_replace( $this->baseUri, '', $row[$item] ); |
321
|
|
|
}, $data ); |
322
|
|
|
} |
323
|
|
|
return []; |
324
|
|
|
} |
325
|
|
|
|
326
|
|
|
/** |
327
|
|
|
* Get base units |
328
|
|
|
* @param string $filter Unit filter |
329
|
|
|
* @return array[] |
330
|
|
|
*/ |
331
|
|
|
private function getBaseUnits( $filter ) { |
332
|
|
|
$types = |
333
|
|
|
str_replace( [ ',', 'Q' ], [ ' ', 'wd:Q' ], $this->getOption( 'base-unit-types' ) ); |
334
|
|
|
|
335
|
|
|
$baseQuery = <<<QUERY |
336
|
|
|
SELECT ?unit ?unitLabel WHERE { |
337
|
|
|
VALUES ?class { $types } |
338
|
|
|
?unit wdt:P31 ?class . |
339
|
|
|
$filter |
340
|
|
|
SERVICE wikibase:label { |
341
|
|
|
bd:serviceParam wikibase:language "en" . |
342
|
|
|
} |
343
|
|
|
} |
344
|
|
|
QUERY; |
345
|
|
|
$baseUnitsData = $this->client->query( $baseQuery ); |
346
|
|
|
'@phan-var array[] $baseUnitsData'; |
347
|
|
|
$baseUnits = []; |
348
|
|
|
// arrange better lookup |
349
|
|
|
foreach ( $baseUnitsData as $base ) { |
350
|
|
|
$item = substr( $base['unit'], $this->baseLen ); |
351
|
|
|
$baseUnits[$item] = $base; |
352
|
|
|
} |
353
|
|
|
return $baseUnits; |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
/** |
357
|
|
|
* Retrieve the list of convertable units. |
358
|
|
|
* @param string $filter |
359
|
|
|
* @return array[]|false List of units that can be converted |
360
|
|
|
*/ |
361
|
|
|
private function getConvertableUnits( $filter ) { |
362
|
|
|
$unitsQuery = <<<QUERY |
363
|
|
|
SELECT REDUCED ?unit ?si ?siUnit ?unitLabel ?siUnitLabel WHERE { |
364
|
|
|
?unit wdt:P31 ?type . |
365
|
|
|
?type wdt:P279* wd:Q47574 . |
366
|
|
|
# Not a currency |
367
|
|
|
FILTER (?type != wd:Q8142) |
368
|
|
|
# Not a cardinal number |
369
|
|
|
FILTER NOT EXISTS { ?unit wdt:P31 wd:Q163875 } |
370
|
|
|
$filter |
371
|
|
|
# Has conversion to SI Units |
372
|
|
|
?unit p:P2370/psv:P2370 [ wikibase:quantityAmount ?si; wikibase:quantityUnit ?siUnit ] . |
373
|
|
|
SERVICE wikibase:label { |
374
|
|
|
bd:serviceParam wikibase:language "en" . |
375
|
|
|
} |
376
|
|
|
# Enable this to select only units that are actually used |
377
|
|
|
} |
378
|
|
|
QUERY; |
379
|
|
|
return $this->client->query( $unitsQuery ); |
380
|
|
|
} |
381
|
|
|
|
382
|
|
|
/** |
383
|
|
|
* Format units as CSV |
384
|
|
|
* @param array[] $convertUnits |
385
|
|
|
* @return string |
386
|
|
|
*/ |
387
|
|
|
private function formatCSV( array $convertUnits ) { |
388
|
|
|
$str = ''; |
389
|
|
|
foreach ( $convertUnits as $name => $data ) { |
390
|
|
|
$str .= "$name,{$data['unit']},{$data['factor']}\n"; |
391
|
|
|
} |
392
|
|
|
return $str; |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
/** |
396
|
|
|
* @param string $err |
397
|
|
|
* @param int $die If > 0, go ahead and die out using this int as the code |
398
|
|
|
*/ |
399
|
|
|
protected function error( $err, $die = 0 ) { |
400
|
|
|
if ( !$this->silent ) { |
401
|
|
|
parent::error( $err, $die ); |
402
|
|
|
} elseif ( $die > 0 ) { |
403
|
|
|
die( $die ); |
404
|
|
|
} |
405
|
|
|
} |
406
|
|
|
|
407
|
|
|
} |
408
|
|
|
|
409
|
|
|
$maintClass = UpdateUnits::class; |
410
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN; |
411
|
|
|
|
There are different options of fixing this problem.
If you want to be on the safe side, you can add an additional type-check:
If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:
Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.