This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Wikibase\Repo\Maintenance; |
||
4 | |||
5 | use DataValues\DecimalValue; |
||
6 | use DataValues\QuantityValue; |
||
7 | use Maintenance; |
||
8 | use MediaWiki\MediaWikiServices; |
||
9 | use MediaWiki\Sparql\SparqlClient; |
||
10 | use Title; |
||
11 | use Wikibase\DataAccess\EntitySourceDefinitions; |
||
12 | use Wikibase\Lib\EntityTypeDefinitions; |
||
13 | use Wikibase\Lib\Units\JsonUnitStorage; |
||
14 | use Wikibase\Lib\Units\UnitConverter; |
||
15 | use Wikibase\Repo\Rdf\RdfVocabulary; |
||
16 | use Wikibase\Repo\Rdf\Values\ComplexValueRdfHelper; |
||
17 | use Wikibase\Repo\Rdf\Values\QuantityRdfBuilder; |
||
18 | use Wikibase\Repo\WikibaseRepo; |
||
19 | use Wikimedia\Purtle\RdfWriter; |
||
20 | use Wikimedia\Purtle\RdfWriterFactory; |
||
21 | |||
22 | $basePath = |
||
23 | getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; |
||
24 | require_once $basePath . '/maintenance/Maintenance.php'; |
||
25 | |||
26 | /** |
||
27 | * Generate dump-like RDF for newly added units without running full dump. |
||
28 | * |
||
29 | * @license GPL-2.0-or-later |
||
30 | * @author Stas Malyshev |
||
31 | */ |
||
32 | class AddUnitConversions extends Maintenance { |
||
33 | |||
34 | /** |
||
35 | * Max chunk of values processed by one query |
||
36 | */ |
||
37 | const MAX_QUERY_CHUNK = 100; |
||
38 | |||
39 | /** |
||
40 | * @var RdfVocabulary |
||
41 | */ |
||
42 | private $rdfVocabulary; |
||
43 | |||
44 | /** |
||
45 | * @var RdfWriter |
||
46 | */ |
||
47 | protected $rdfWriter; |
||
48 | |||
49 | /** |
||
50 | * @var UnitConverter |
||
51 | */ |
||
52 | protected $unitConverter; |
||
53 | |||
54 | /** |
||
55 | * @var SparqlClient |
||
56 | */ |
||
57 | protected $client; |
||
58 | |||
59 | /** |
||
60 | * @var resource |
||
61 | */ |
||
62 | private $out; |
||
63 | |||
64 | /** |
||
65 | * map of normalization predicates by full name |
||
66 | * @var string[] |
||
67 | */ |
||
68 | private $normMap; |
||
69 | |||
70 | /** |
||
71 | * Value URI prefix |
||
72 | * @var string |
||
73 | */ |
||
74 | private $valueURI; |
||
75 | |||
76 | /** |
||
77 | * Set of normalized namespace names. |
||
78 | * @var bool[] |
||
79 | */ |
||
80 | private $normalizedNames; |
||
81 | |||
82 | /** |
||
83 | * @var QuantityRdfBuilder |
||
84 | */ |
||
85 | protected $builder; |
||
86 | |||
87 | /** |
||
88 | * @var boolean |
||
89 | */ |
||
90 | private $dryRun; |
||
91 | |||
92 | public function __construct() { |
||
93 | parent::__construct(); |
||
94 | $this->addDescription( "Produce RDF for new units." ); |
||
95 | |||
96 | $this->addOption( 'config', 'Current units config.', true, true ); |
||
97 | $this->addOption( 'old-config', 'Previous units config.', false, true ); |
||
98 | $this->addOption( 'output', 'File to output the data to.', true, true ); |
||
99 | $this->addOption( 'format', "Set the dump format.", false, true ); |
||
100 | $this->addOption( 'base-uri', 'Base URI for the data.', false, true ); |
||
101 | $this->addOption( 'sparql', 'SPARQL endpoint URL.', false, true ); |
||
102 | $this->addOption( 'dry-run', 'Do not generate output, only count values.', false, false ); |
||
103 | } |
||
104 | |||
105 | /** |
||
106 | * Do the actual work. All child classes will need to implement this |
||
107 | */ |
||
108 | public function execute() { |
||
109 | $newJsonName = $this->getOption( 'config' ); |
||
110 | $newJson = json_decode( file_get_contents( $newJsonName ), true ); |
||
111 | if ( !$newJson ) { |
||
112 | $this->fatalError( "Cannot load new config" ); |
||
113 | } |
||
114 | |||
115 | $oldJsonName = $this->getOption( 'old-config' ); |
||
116 | if ( !$oldJsonName ) { |
||
117 | $oldJson = []; |
||
118 | } else { |
||
119 | $oldJson = json_decode( file_get_contents( $oldJsonName ), true ); |
||
120 | if ( !$oldJson ) { |
||
121 | $this->fatalError( "Cannot load old config" ); |
||
122 | } |
||
123 | } |
||
124 | |||
125 | $diffUnits = array_diff( array_keys( $newJson ), array_keys( $oldJson ) ); |
||
126 | if ( empty( $diffUnits ) ) { |
||
127 | $this->error( "No new units." ); |
||
128 | return; |
||
129 | } |
||
130 | $this->output( 'Detected ' . count( $diffUnits ) . " new units\n" ); |
||
131 | $this->dryRun = $this->getOption( 'dry-run' ); |
||
132 | |||
133 | if ( !$this->dryRun ) { |
||
134 | $this->out = fopen( $this->getOption( 'output' ), 'w' ); |
||
135 | } |
||
136 | |||
137 | $wikibaseRepo = WikibaseRepo::getDefaultInstance(); |
||
138 | $endPoint = $this->getOption( 'sparql', |
||
139 | $wikibaseRepo->getSettings()->getSetting( 'sparqlEndpoint' ) ); |
||
140 | if ( !$endPoint ) { |
||
141 | $this->fatalError( 'SPARQL endpoint should be supplied in config or parameters' ); |
||
142 | } |
||
143 | |||
144 | $baseUri = $this->getOption( 'base-uri', |
||
145 | $wikibaseRepo->getSettings()->getSetting( 'conceptBaseUri' ) ); |
||
146 | |||
147 | $this->client = new SparqlClient( $endPoint, MediaWikiServices::getInstance()->getHttpRequestFactory() ); |
||
148 | $this->client->appendUserAgent( __CLASS__ ); |
||
149 | $format = $this->getOption( 'format', 'ttl' ); |
||
150 | $this->initializeWriter( $baseUri, $format ); |
||
151 | $this->unitConverter = new UnitConverter( new JsonUnitStorage( $newJsonName ), $baseUri ); |
||
152 | $this->initializeBuilder(); |
||
153 | |||
154 | foreach ( $diffUnits as $unit ) { |
||
155 | $this->processUnit( $unit ); |
||
156 | $this->writeOut(); |
||
157 | } |
||
158 | } |
||
159 | |||
160 | /** |
||
161 | * Initialize RDF writer |
||
162 | * |
||
163 | * @param string $baseUri |
||
164 | * @param string $format File extension or MIME type of the output format. |
||
165 | */ |
||
166 | public function initializeWriter( $baseUri, $format ) { |
||
167 | $wikibaseRepo = WikibaseRepo::getDefaultInstance(); |
||
168 | $this->rdfVocabulary = $this->createRdfVocabulary( $baseUri, |
||
169 | $wikibaseRepo->getDataTypeDefinitions()->getRdfTypeUris() ); |
||
170 | $this->rdfWriter = $this->createRdfWriter( $format ); |
||
171 | |||
172 | $ns = $this->rdfVocabulary->getNamespaces(); |
||
173 | $this->valueURI = $ns[RdfVocabulary::NS_VALUE]; |
||
174 | foreach ( $this->rdfVocabulary->claimToValueNormalized as $value => $norm ) { |
||
175 | $this->normMap[$ns[$this->rdfVocabulary->claimToValue[$value]]] = $norm; |
||
176 | $this->normalizedNames[$ns[$norm]] = true; |
||
177 | } |
||
178 | $this->startDocument(); |
||
179 | } |
||
180 | |||
181 | /** |
||
182 | * Initialize quantity builder. |
||
183 | */ |
||
184 | public function initializeBuilder() { |
||
185 | $this->builder = |
||
186 | new QuantityRdfBuilder( new ComplexValueRdfHelper( $this->rdfVocabulary, |
||
187 | $this->rdfWriter ), $this->unitConverter ); |
||
188 | } |
||
189 | |||
190 | /** |
||
191 | * Generate all statements for a specific unit. |
||
192 | * |
||
193 | * @param string $unit Unit Q-id |
||
194 | */ |
||
195 | public function processUnit( $unit ) { |
||
196 | $this->output( "Processing $unit...\n" ); |
||
197 | $query = <<<QUERY |
||
198 | SELECT * WHERE { |
||
199 | { |
||
200 | SELECT DISTINCT ?v WHERE { |
||
201 | ?v wikibase:quantityUnit wd:$unit . |
||
202 | FILTER EXISTS { ?s ?p ?v } |
||
203 | } |
||
204 | } |
||
205 | ?v wikibase:quantityAmount ?amount . |
||
206 | ?v wikibase:quantityUpperBound ?upper . |
||
207 | ?v wikibase:quantityLowerBound ?lower . |
||
208 | } |
||
209 | QUERY; |
||
210 | $values = $this->client->query( $query ); |
||
211 | '@phan-var array[] $values'; |
||
212 | $this->output( "Got " . count( $values ) . " ids\n" ); |
||
213 | if ( $this->dryRun ) { |
||
214 | return; |
||
215 | } |
||
216 | $map = []; |
||
217 | foreach ( $values as $value ) { |
||
218 | if ( substr_compare( $value['v'], $this->valueURI, 0, strlen( $this->valueURI ) ) !== 0 ) { |
||
219 | $this->error( "Invalid value: {$value['v']}!" ); |
||
220 | continue; |
||
221 | } |
||
222 | $id = str_replace( $this->valueURI, '', $value['v'] ); |
||
223 | $map[$id] = $this->getNormalized( $id, $unit, $value ); |
||
224 | $this->rdfWriter->about( RdfVocabulary::NS_VALUE, $id ) |
||
225 | ->say( RdfVocabulary::NS_ONTOLOGY, 'quantityNormalized' ) |
||
226 | ->is( RdfVocabulary::NS_VALUE, $map[$id] ); |
||
227 | |||
228 | } |
||
229 | $this->writeOut(); |
||
230 | foreach ( array_chunk( array_keys( $map ), self::MAX_QUERY_CHUNK ) as $idChunk ) { |
||
231 | $this->processStatements( $idChunk, $map ); |
||
232 | $this->writeOut(); |
||
233 | } |
||
234 | $this->output( "Done.\n" ); |
||
235 | } |
||
236 | |||
237 | /** |
||
238 | * Normalize unit and return the hash of the normalized node. |
||
239 | * |
||
240 | * @param string $id Original value ID (hash) |
||
241 | * @param string $unit Short ID of the unit |
||
242 | * @param string[] $value Value data array |
||
243 | * |
||
244 | * @return string Hash of the normalized node |
||
245 | */ |
||
246 | private function getNormalized( $id, $unit, array $value ) { |
||
247 | $q = |
||
248 | new QuantityValue( new DecimalValue( $value['amount'] ), $unit, |
||
249 | new DecimalValue( $value['upper'] ), |
||
250 | new DecimalValue( $value['lower'] ) ); |
||
251 | $qNorm = $this->unitConverter->toStandardUnits( $q ); |
||
252 | if ( $q === $qNorm ) { |
||
253 | // didn't actually convert, so return original one |
||
254 | return $id; |
||
255 | } else { |
||
256 | $normLName = $qNorm->getHash(); |
||
257 | |||
258 | $this->rdfWriter->about( RdfVocabulary::NS_VALUE, $normLName ) |
||
259 | ->a( RdfVocabulary::NS_ONTOLOGY, $this->rdfVocabulary->getValueTypeName( $qNorm ) ); |
||
0 ignored issues
–
show
|
|||
260 | |||
261 | $this->builder->writeQuantityValue( $qNorm ); |
||
0 ignored issues
–
show
It seems like
$qNorm defined by $this->unitConverter->toStandardUnits($q) on line 251 can be null ; however, Wikibase\Repo\Rdf\Values...r::writeQuantityValue() does not accept null , maybe add an additional type check?
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: /** @return stdClass|null */
function mayReturnNull() { }
function doesNotAcceptNull(stdClass $x) { }
// With potential error.
function withoutCheck() {
$x = mayReturnNull();
doesNotAcceptNull($x); // Potential error here.
}
// Safe - Alternative 1
function withCheck1() {
$x = mayReturnNull();
if ( ! $x instanceof stdClass) {
throw new \LogicException('$x must be defined.');
}
doesNotAcceptNull($x);
}
// Safe - Alternative 2
function withCheck2() {
$x = mayReturnNull();
if ($x instanceof stdClass) {
doesNotAcceptNull($x);
}
}
![]() |
|||
262 | |||
263 | $this->rdfWriter->about( RdfVocabulary::NS_VALUE, $normLName ) |
||
264 | ->say( RdfVocabulary::NS_ONTOLOGY, 'quantityNormalized' ) |
||
265 | ->is( RdfVocabulary::NS_VALUE, $normLName ); |
||
266 | |||
267 | return $normLName; |
||
268 | } |
||
269 | } |
||
270 | |||
271 | /** |
||
272 | * Process statements for particular set of values. |
||
273 | * Will scan through the triples which use each of the values and |
||
274 | * add appropriate normalized triple referring to the normalized value. |
||
275 | * E.g. <s123> psv:P345 wdv:xys -> <s123> psn:P345 wdv:xyznorm |
||
276 | * |
||
277 | * @param string[] $values Value hashes |
||
278 | * @param string[] $map Map old id -> normalized id |
||
279 | */ |
||
280 | private function processStatements( $values, $map ) { |
||
281 | $shortValues = array_map( function ( $str ) { |
||
282 | return 'wdv:' . $str; |
||
283 | }, $values ); |
||
284 | $valuesStr = implode( ' ', $shortValues ); |
||
285 | $query = <<<QUERY |
||
286 | SELECT ?s ?p ?v WHERE { |
||
287 | VALUES ?v { $valuesStr } |
||
288 | ?s ?p ?v |
||
289 | FILTER (?p != wikibase:quantityNormalized) |
||
290 | } ORDER BY ?s |
||
291 | QUERY; |
||
292 | $data = $this->client->query( $query ); |
||
293 | '@phan-var array[] $data'; |
||
294 | foreach ( $data as $statement ) { |
||
295 | // Split predicate name into $prefix and $name (actual P123 part) |
||
296 | $last = strrpos( $statement['p'], '/' ); |
||
297 | $prefix = substr( $statement['p'], 0, $last + 1 ); |
||
298 | $name = substr( $statement['p'], $last + 1 ); |
||
299 | if ( isset( $this->normalizedNames[$prefix] ) ) { |
||
300 | // This is already normalized predicate |
||
301 | // This can happen when we deployed new config and |
||
302 | // somebody edits the data with that unit - the update will already have |
||
303 | // the normalized value. We can just ignore it. |
||
304 | continue; |
||
305 | } |
||
306 | if ( !isset( $this->normMap[$prefix] ) ) { |
||
307 | // This shouldn't happen - it means value used in predicate |
||
308 | // that is not in RdfVocabulary. |
||
309 | $this->error( "Unknown predicate {$statement['p']}" ); |
||
310 | continue; |
||
311 | } |
||
312 | $v = str_replace( $this->valueURI, '', $statement['v'] ); |
||
313 | $this->rdfWriter->about( $statement['s'] ) |
||
314 | ->say( $this->normMap[$prefix], $name ) |
||
315 | ->is( RdfVocabulary::NS_VALUE, $map[$v] ); |
||
316 | } |
||
317 | $this->output( '.' ); |
||
318 | } |
||
319 | |||
320 | /** |
||
321 | * Kick off the document |
||
322 | */ |
||
323 | public function startDocument() { |
||
324 | foreach ( $this->rdfVocabulary->getNamespaces() as $gname => $uri ) { |
||
325 | $this->rdfWriter->prefix( $gname, $uri ); |
||
326 | } |
||
327 | |||
328 | $this->writeOut(); |
||
329 | } |
||
330 | |||
331 | /** |
||
332 | * Write data to the output |
||
333 | */ |
||
334 | protected function writeOut() { |
||
335 | $data = $this->rdfWriter->drain(); |
||
336 | if ( $this->out ) { |
||
337 | if ( fwrite( $this->out, $data ) === false ) { |
||
338 | $this->fatalError( "Failed to write to the output, exiting." ); |
||
339 | } |
||
340 | } |
||
341 | } |
||
342 | |||
343 | /** |
||
344 | * Get vocabulary instance |
||
345 | * |
||
346 | * @param string $baseUri |
||
347 | * @param string[] $typeUris |
||
348 | * |
||
349 | * @return RdfVocabulary |
||
350 | */ |
||
351 | private function createRdfVocabulary( $baseUri, $typeUris ) { |
||
352 | $entityDataTitle = Title::makeTitle( NS_SPECIAL, 'EntityData' ); |
||
353 | |||
354 | return new RdfVocabulary( |
||
355 | [ '' => $baseUri ], |
||
356 | [ '' => $entityDataTitle->getCanonicalURL() . '/' ], |
||
357 | new EntitySourceDefinitions( [], new EntityTypeDefinitions( [] ) ), |
||
358 | '', |
||
359 | [ '' => 'wd' ], |
||
360 | [ '' => '' ], |
||
361 | [], |
||
362 | $typeUris, |
||
363 | [] |
||
364 | ); |
||
365 | } |
||
366 | |||
367 | /** |
||
368 | * @param string $format File extension or MIME type of the output format. |
||
369 | * |
||
370 | * @return RdfWriter |
||
371 | */ |
||
372 | private function createRdfWriter( $format ) { |
||
373 | $factory = new RdfWriterFactory(); |
||
374 | return $factory->getWriter( $factory->getFormatName( $format ) ); |
||
0 ignored issues
–
show
|
|||
375 | } |
||
376 | |||
377 | } |
||
378 | |||
379 | $maintClass = AddUnitConversions::class; |
||
380 | require_once RUN_MAINTENANCE_IF_MAIN; |
||
381 |
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: