UpdateUnits::execute()   F
last analyzed

Complexity

Conditions 11
Paths 576

Size

Total Lines 81

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 81
rs 3.4056
c 0
b 0
f 0
cc 11
nc 576
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Wikibase\Repo\Maintenance;
4
5
use DataValues\DecimalMath;
6
use DataValues\DecimalValue;
7
use Maintenance;
8
use MediaWiki\MediaWikiServices;
9
use MediaWiki\Sparql\SparqlClient;
10
use Wikibase\Lib\WikibaseSettings;
11
use Wikibase\Repo\WikibaseRepo;
12
13
$basePath =
14
	getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..';
15
require_once $basePath . '/maintenance/Maintenance.php';
16
17
/**
18
 * Update the conversion table for units.
19
 * Base unit types for Wikidata:
20
 * Q223662,Q208469
21
 * SI base unit,SI derived unit
22
 * TODO: add support to non-SI units
23
 * Example run:
24
 * mwscript extensions/WikidataBuildResources/extensions/Wikibase/repo/maintenance/updateUnits.php
25
 *   --wiki wikidatawiki  --base-unit-types Q223662,Q208469 --base-uri http://www.wikidata.org/entity/
26
 *   --unit-class Q1978718 > unitConversion.json
27
 *
28
 * @license GPL-2.0-or-later
29
 * @author Stas Malyshev
30
 */
31
class UpdateUnits extends Maintenance {
32
33
	/**
34
	 * @var string
35
	 */
36
	private $baseUri;
37
38
	/**
39
	 * Length of the base URI.
40
	 * Helper variable to speed up cutting it out.
41
	 * @var int
42
	 */
43
	private $baseLen;
44
45
	/**
46
	 * @var SparqlClient
47
	 */
48
	private $client;
49
50
	/**
51
	 * Should we silence the error output for tests?
52
	 * @var boolean
53
	 */
54
	public $silent;
55
56
	public function __construct() {
57
		parent::__construct();
58
		$this->addDescription( "Update unit conversion table." );
59
60
		$this->addOption( 'base-unit-types', 'Types of base units.', true, true );
61
		$this->addOption( 'base-uri', 'Base URI for the data.', false, true );
62
		$this->addOption( 'unit-class', 'Class for units.', false, true );
63
		$this->addOption( 'format', 'Output format "json" (default) or "csv".', false, true );
64
		$this->addOption( 'sparql', 'SPARQL endpoint URL.', false, true );
65
		$this->addOption( 'check-usage', 'Check whether unit is in use?', false );
66
	}
67
68
	public function execute() {
69
		if ( !WikibaseSettings::isRepoEnabled() ) {
70
			$this->fatalError( "You need to have Wikibase enabled in order to use this maintenance script!" );
71
		}
72
		$format = $this->getOption( 'format', 'json' );
73
		$checkUsage = $this->hasOption( 'check-usage' );
74
75
		$repo = WikibaseRepo::getDefaultInstance();
76
		$endPoint = $this->getOption( 'sparql',
77
			$repo->getSettings()->getSetting( 'sparqlEndpoint' ) );
78
		if ( !$endPoint ) {
79
			$this->fatalError( 'SPARQL endpoint not defined' );
80
		}
81
		$this->setBaseUri( $this->getOption( 'base-uri',
82
			$repo->getSettings()->getSetting( 'conceptBaseUri' ) ) );
83
		$this->client = new SparqlClient( $endPoint, MediaWikiServices::getInstance()->getHttpRequestFactory() );
84
		$this->client->appendUserAgent( __CLASS__ );
85
86
		$unitClass = $this->getOption( 'unit-class' );
87
		if ( $unitClass ) {
88
			$filter = "FILTER EXISTS { ?unit wdt:P31/wdt:P279* wd:$unitClass }\n";
89
		} else {
90
			$filter = '';
91
		}
92
93
		// Get units usage stats. We don't care about units
94
		// That have been used less than 10 times, for now
95
		if ( $checkUsage ) {
96
			$unitUsage = $this->getUnitUsage( 10 );
97
		} else {
98
			$unitUsage = null;
99
		}
100
		$baseUnits = $this->getBaseUnits( $filter );
101
102
		$convertUnits = [];
103
		$reconvert = [];
104
105
		if ( $checkUsage ) {
106
			$filter .= "FILTER EXISTS { [] wikibase:quantityUnit ?unit }\n";
107
		}
108
109
		$convertableUnits = $this->getConvertableUnits( $filter );
110
		foreach ( $convertableUnits as $unit ) {
0 ignored issues
show
Bug introduced by
The expression $convertableUnits of type array<integer,array>|false is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
111
			$converted =
112
				$this->convertUnit( $unit, $convertUnits, $baseUnits, $unitUsage, $reconvert );
113
			if ( $converted ) {
114
				$unitName = substr( $unit['unit'], $this->baseLen );
115
				$convertUnits[$unitName] = $converted;
116
			}
117
		}
118
119
		$this->reduceUnits( $reconvert, $convertUnits );
120
121
		// Add base units
122
		foreach ( $baseUnits as $base => $baseData ) {
123
			$convertUnits[$base] = [
124
				'factor' => "1",
125
				'unit' => $base,
126
				'label' => $baseData['unitLabel'],
127
				'siLabel' => $baseData['unitLabel']
128
			];
129
		}
130
131
		// Sort units by Q-id, as number, to have predictable order
132
		uksort( $convertUnits,
133
			function ( $x, $y ) {
134
				return (int)substr( $x, 1 ) - (int)substr( $y, 1 );
135
			}
136
		);
137
138
		switch ( strtolower( $format ) ) {
139
			case 'csv':
140
				echo $this->formatCSV( $convertUnits );
141
				break;
142
			case 'json':
143
				echo $this->formatJSON( $convertUnits );
144
				break;
145
			default:
146
				$this->fatalError( 'Invalid format' );
147
		}
148
	}
149
150
	/**
151
	 * Reduce units that are not in term of base units into base units.
152
	 * If some units are not reducible to base units, warning will be issued.
153
	 * @param array $reconvert List of units to be reduced
154
	 * @param array &$convertUnits List of unit conversion configs, will be modified if
155
	 *                             it is possible to reduce the unit to base units.
156
	 */
157
	private function reduceUnits( $reconvert, &$convertUnits ) {
158
		while ( $reconvert ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $reconvert of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
159
			$converted = false;
160
			foreach ( $reconvert as $name => $unit ) {
161
				$convertedUnit = $this->convertDerivedUnit( $unit, $convertUnits );
162
				if ( $convertedUnit ) {
163
					$convertUnits[$name] = $convertedUnit;
164
					unset( $reconvert[$name] );
165
					$converted = true;
166
				}
167
			}
168
			// we didn't convert any on this step, no use to continue
169
			// This loop will converge since on each step we will reduce
170
			// the length of $reconvert until we can't do it anymore.
171
			if ( !$converted ) {
172
				break;
173
			}
174
		}
175
176
		if ( $reconvert ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $reconvert of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
177
			// still have unconverted units
178
			foreach ( $reconvert as $name => $unit ) {
179
				$this->error( "Weird base unit: {$unit['unit']} reduces to {$unit['siUnit']} which is not base!" );
180
			}
181
		}
182
	}
183
184
	/**
185
	 * @param string $uri
186
	 */
187
	public function setBaseUri( $uri ) {
188
		$this->baseUri = $uri;
189
		$this->baseLen = strlen( $uri );
190
	}
191
192
	/**
193
	 * Convert unit that does not reduce to a basic unit.
194
	 *
195
	 * @param string[] $unit
196
	 * @param array[] $convertUnits List of units already converted
197
	 *
198
	 * @return string[]|null Converted data for the unit or null if no conversion possible.
199
	 */
200
	public function convertDerivedUnit( $unit, $convertUnits ) {
201
		if ( isset( $convertUnits[$unit['siUnit']] ) ) {
202
			// we have conversion now
203
			$math = new DecimalMath();
204
			$newUnit = $convertUnits[$unit['siUnit']];
205
			$newFactor =
206
				$math->product( new DecimalValue( $unit['si'] ),
207
					new DecimalValue( $newUnit['factor'] ) );
208
			return [
209
				'factor' => trim( $newFactor->getValue(), '+' ),
210
				'unit' => $newUnit['unit'],
211
				'label' => $unit['unitLabel'],
212
				'siLabel' => $newUnit['siLabel']
213
			];
214
		}
215
		return null;
216
	}
217
218
	/**
219
	 * Create conversion data for a single unit.
220
	 * @param string[] $unit Unit data
221
	 * @param string[] $convertUnits Already converted data
222
	 * @param array[] $baseUnits Base unit list
223
	 * @param string[]|null $unitUsage Unit usage data
224
	 * @param string[][] &$reconvert Array collecting units that require re-conversion later,
225
	 *                 due to their target unit not being base.
226
	 * @return string[]|null Produces conversion data for the unit or null if not possible.
227
	 */
228
	public function convertUnit( $unit, $convertUnits, $baseUnits, $unitUsage, &$reconvert ) {
229
		$unit['unit'] = substr( $unit['unit'], $this->baseLen );
230
		$unit['siUnit'] = substr( $unit['siUnit'], $this->baseLen );
231
232
		if ( $unit['unitLabel'][0] == 'Q' ) {
233
			// Skip exotic units that have no English name for now.
234
			// TODO: drop this
235
			$this->error( "Exotic unit: {$unit['unit']} has no English label, skipping for now." );
236
			return null;
237
		}
238
239
		if ( isset( $convertUnits[$unit['unit']] ) ) {
240
			// done already
241
			return null;
242
		}
243
		if ( $unit['unit'] == $unit['siUnit'] ) {
244
			// base unit
245
			if ( $unit['si'] != 1 ) {
246
				$this->error( "Weird unit: {$unit['unit']} is {$unit['si']} of itself!" );
247
				return null;
248
			}
249
			if ( !isset( $baseUnits[$unit['siUnit']] ) ) {
250
				$this->error( "Weird unit: {$unit['unit']} is self-referring but not base!" );
251
				return null;
252
			}
253
		}
254
255
		if ( $unitUsage && !isset( $baseUnits[$unit['unit']] ) && !isset( $unitUsage[$unit['unit']] ) ) {
256
			$this->error( "Low usage unit {$unit['unit']}, skipping..." );
257
			return null;
258
		}
259
260
		if ( !isset( $baseUnits[$unit['siUnit']] ) ) {
261
			// target unit is not actually base
262
			$reconvert[$unit['unit']] = $unit;
263
		} else {
264
			return [
265
				'factor' => $unit['si'],
266
				'unit' => $unit['siUnit'],
267
				// These two are just for humans, not used by actual converter
268
				'label' => $unit['unitLabel'],
269
				'siLabel' => $unit['siUnitLabel']
270
			];
271
		}
272
273
		return null;
274
	}
275
276
	/**
277
	 * Format units as JSON
278
	 * @param array[] $convertUnits
279
	 * @return string
280
	 */
281
	private function formatJSON( array $convertUnits ) {
282
		return json_encode( $convertUnits, JSON_PRETTY_PRINT );
283
	}
284
285
	/**
286
	 * Get units that are used at least $min times.
287
	 * We don't care about units that have been used less than 10 times, for now.
288
	 * Only top 200 will be returned (though so far we don't have that many).
289
	 * @param int $min Minimal usage for the unit.
290
	 * @return string[] Array of ['unit' => Q-id, 'c' => count]
291
	 */
292
	private function getUnitUsage( $min ) {
293
		$usageQuery = <<<UQUERY
294
SELECT ?unit (COUNT(DISTINCT ?v) as ?c) WHERE {
295
  ?v wikibase:quantityUnit ?unit .
296
  ?s ?p ?v .
297
  FILTER(?unit != wd:Q199)
298
# Exclude currencies
299
  FILTER NOT EXISTS { ?unit wdt:P31+ wd:Q8142 }
300
} GROUP BY ?unit
301
  HAVING(?c >= $min)
302
  ORDER BY DESC(?c)
303
  LIMIT 200
304
UQUERY;
305
		$unitUsage = $this->getIDs( $usageQuery, 'unit' );
306
		$unitUsage = array_flip( $unitUsage );
307
		return $unitUsage;
308
	}
309
310
	/**
311
	 * Get list of IDs from SPARQL.
312
	 * @param string $sparql Query
313
	 * @param string $item Variable name where IDs are stored
314
	 * @return string[] List of entity ID strings
315
	 */
316
	private function getIDs( $sparql, $item ) {
317
		$data = $this->client->query( $sparql );
318
		if ( $data ) {
319
			return array_map( function ( $row ) use ( $item ) {
320
				return str_replace( $this->baseUri, '', $row[$item] );
321
			}, $data );
322
		}
323
		return [];
324
	}
325
326
	/**
327
	 * Get base units
328
	 * @param string $filter Unit filter
329
	 * @return array[]
330
	 */
331
	private function getBaseUnits( $filter ) {
332
		$types =
333
			str_replace( [ ',', 'Q' ], [ ' ', 'wd:Q' ], $this->getOption( 'base-unit-types' ) );
334
335
		$baseQuery = <<<QUERY
336
SELECT ?unit ?unitLabel WHERE {
337
  VALUES ?class {  $types }
338
  ?unit wdt:P31 ?class .
339
  $filter
340
  SERVICE wikibase:label {
341
    bd:serviceParam wikibase:language "en" .
342
  }
343
}
344
QUERY;
345
		$baseUnitsData = $this->client->query( $baseQuery );
346
		'@phan-var array[] $baseUnitsData';
347
		$baseUnits = [];
348
		// arrange better lookup
349
		foreach ( $baseUnitsData as $base ) {
350
			$item = substr( $base['unit'], $this->baseLen );
351
			$baseUnits[$item] = $base;
352
		}
353
		return $baseUnits;
354
	}
355
356
	/**
357
	 * Retrieve the list of convertable units.
358
	 * @param string $filter
359
	 * @return array[]|false List of units that can be converted
360
	 */
361
	private function getConvertableUnits( $filter ) {
362
		$unitsQuery = <<<QUERY
363
SELECT REDUCED ?unit ?si ?siUnit ?unitLabel ?siUnitLabel WHERE {
364
  ?unit wdt:P31 ?type .
365
  ?type wdt:P279* wd:Q47574 .
366
  # Not a currency
367
  FILTER (?type != wd:Q8142)
368
  # Not a cardinal number
369
  FILTER NOT EXISTS { ?unit wdt:P31 wd:Q163875 }
370
  $filter
371
  # Has conversion to SI Units
372
  ?unit p:P2370/psv:P2370 [ wikibase:quantityAmount ?si; wikibase:quantityUnit ?siUnit ] .
373
  SERVICE wikibase:label {
374
    bd:serviceParam wikibase:language "en" .
375
  }
376
# Enable this to select only units that are actually used
377
}
378
QUERY;
379
		return $this->client->query( $unitsQuery );
380
	}
381
382
	/**
383
	 * Format units as CSV
384
	 * @param array[] $convertUnits
385
	 * @return string
386
	 */
387
	private function formatCSV( array $convertUnits ) {
388
		$str = '';
389
		foreach ( $convertUnits as $name => $data ) {
390
			$str .= "$name,{$data['unit']},{$data['factor']}\n";
391
		}
392
		return $str;
393
	}
394
395
	/**
396
	 * @param string $err
397
	 * @param int $die If > 0, go ahead and die out using this int as the code
398
	 */
399
	protected function error( $err, $die = 0 ) {
400
		if ( !$this->silent ) {
401
			parent::error( $err, $die );
402
		} elseif ( $die > 0 ) {
403
			die( $die );
404
		}
405
	}
406
407
}
408
409
$maintClass = UpdateUnits::class;
410
require_once RUN_MAINTENANCE_IF_MAIN;
411