Issues (1401)

Security Analysis    no request data  

This project does not seem to handle request data directly as such no vulnerable execution paths were found.

  Cross-Site Scripting
Cross-Site Scripting enables an attacker to inject code into the response of a web-request that is viewed by other users. It can for example be used to bypass access controls, or even to take over other users' accounts.
  File Exposure
File Exposure allows an attacker to gain access to local files that he should not be able to access. These files can for example include database credentials, or other configuration files.
  File Manipulation
File Manipulation enables an attacker to write custom data to files. This potentially leads to injection of arbitrary code on the server.
  Object Injection
Object Injection enables an attacker to inject an object into PHP code, and can lead to arbitrary code execution, file exposure, or file manipulation attacks.
  Code Injection
Code Injection enables an attacker to execute arbitrary code on the server.
  Response Splitting
Response Splitting can be used to send arbitrary responses.
  File Inclusion
File Inclusion enables an attacker to inject custom files into PHP's file loading mechanism, either explicitly passed to include, or for example via PHP's auto-loading mechanism.
  Command Injection
Command Injection enables an attacker to inject a shell command that is execute with the privileges of the web-server. This can be used to expose sensitive data, or gain access of your server.
  SQL Injection
SQL Injection enables an attacker to execute arbitrary SQL code on your database server gaining access to user data, or manipulating user data.
  XPath Injection
XPath Injection enables an attacker to modify the parts of XML document that are read. If that XML document is for example used for authentication, this can lead to further vulnerabilities similar to SQL Injection.
  LDAP Injection
LDAP Injection enables an attacker to inject LDAP statements potentially granting permission to run unauthorized queries, or modify content inside the LDAP tree.
  Header Injection
  Other Vulnerability
This category comprises other attack vectors such as manipulating the PHP runtime, loading custom extensions, freezing the runtime, or similar.
  Regex Injection
Regex Injection enables an attacker to execute arbitrary code in your PHP process.
  XML Injection
XML Injection enables an attacker to read files on your local filesystem including configuration files, or can be abused to freeze your web-server process.
  Variable Injection
Variable Injection enables an attacker to overwrite program variables with custom data, and can lead to further vulnerabilities.
Unfortunately, the security analysis is currently not available for your project. If you are a non-commercial open-source project, please contact support to gain access.

repo/maintenance/addUnitConversions.php (3 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace Wikibase\Repo\Maintenance;
4
5
use DataValues\DecimalValue;
6
use DataValues\QuantityValue;
7
use Maintenance;
8
use MediaWiki\MediaWikiServices;
9
use MediaWiki\Sparql\SparqlClient;
10
use Title;
11
use Wikibase\DataAccess\EntitySourceDefinitions;
12
use Wikibase\Lib\EntityTypeDefinitions;
13
use Wikibase\Lib\Units\JsonUnitStorage;
14
use Wikibase\Lib\Units\UnitConverter;
15
use Wikibase\Repo\Rdf\RdfVocabulary;
16
use Wikibase\Repo\Rdf\Values\ComplexValueRdfHelper;
17
use Wikibase\Repo\Rdf\Values\QuantityRdfBuilder;
18
use Wikibase\Repo\WikibaseRepo;
19
use Wikimedia\Purtle\RdfWriter;
20
use Wikimedia\Purtle\RdfWriterFactory;
21
22
$basePath =
23
	getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..';
24
require_once $basePath . '/maintenance/Maintenance.php';
25
26
/**
27
 * Generate dump-like RDF for newly added units without running full dump.
28
 *
29
 * @license GPL-2.0-or-later
30
 * @author Stas Malyshev
31
 */
32
class AddUnitConversions extends Maintenance {
33
34
	/**
35
	 * Max chunk of values processed by one query
36
	 */
37
	const MAX_QUERY_CHUNK = 100;
38
39
	/**
40
	 * @var RdfVocabulary
41
	 */
42
	private $rdfVocabulary;
43
44
	/**
45
	 * @var RdfWriter
46
	 */
47
	protected $rdfWriter;
48
49
	/**
50
	 * @var UnitConverter
51
	 */
52
	protected $unitConverter;
53
54
	/**
55
	 * @var SparqlClient
56
	 */
57
	protected $client;
58
59
	/**
60
	 * @var resource
61
	 */
62
	private $out;
63
64
	/**
65
	 * map of normalization predicates by full name
66
	 * @var string[]
67
	 */
68
	private $normMap;
69
70
	/**
71
	 * Value URI prefix
72
	 * @var string
73
	 */
74
	private $valueURI;
75
76
	/**
77
	 * Set of normalized namespace names.
78
	 * @var bool[]
79
	 */
80
	private $normalizedNames;
81
82
	/**
83
	 * @var QuantityRdfBuilder
84
	 */
85
	protected $builder;
86
87
	/**
88
	 * @var boolean
89
	 */
90
	private $dryRun;
91
92
	public function __construct() {
93
		parent::__construct();
94
		$this->addDescription( "Produce RDF for new units." );
95
96
		$this->addOption( 'config', 'Current units config.', true, true );
97
		$this->addOption( 'old-config', 'Previous units config.', false, true );
98
		$this->addOption( 'output', 'File to output the data to.', true, true );
99
		$this->addOption( 'format', "Set the dump format.", false, true );
100
		$this->addOption( 'base-uri', 'Base URI for the data.', false, true );
101
		$this->addOption( 'sparql', 'SPARQL endpoint URL.', false, true );
102
		$this->addOption( 'dry-run', 'Do not generate output, only count values.', false, false );
103
	}
104
105
	/**
106
	 * Do the actual work. All child classes will need to implement this
107
	 */
108
	public function execute() {
109
		$newJsonName = $this->getOption( 'config' );
110
		$newJson = json_decode( file_get_contents( $newJsonName ), true );
111
		if ( !$newJson ) {
112
			$this->fatalError( "Cannot load new config" );
113
		}
114
115
		$oldJsonName = $this->getOption( 'old-config' );
116
		if ( !$oldJsonName ) {
117
			$oldJson = [];
118
		} else {
119
			$oldJson = json_decode( file_get_contents( $oldJsonName ), true );
120
			if ( !$oldJson ) {
121
				$this->fatalError( "Cannot load old config" );
122
			}
123
		}
124
125
		$diffUnits = array_diff( array_keys( $newJson ), array_keys( $oldJson ) );
126
		if ( empty( $diffUnits ) ) {
127
			$this->error( "No new units." );
128
			return;
129
		}
130
		$this->output( 'Detected ' . count( $diffUnits ) . " new units\n" );
131
		$this->dryRun = $this->getOption( 'dry-run' );
132
133
		if ( !$this->dryRun ) {
134
			$this->out = fopen( $this->getOption( 'output' ), 'w' );
135
		}
136
137
		$wikibaseRepo = WikibaseRepo::getDefaultInstance();
138
		$endPoint = $this->getOption( 'sparql',
139
				$wikibaseRepo->getSettings()->getSetting( 'sparqlEndpoint' ) );
140
		if ( !$endPoint ) {
141
			$this->fatalError( 'SPARQL endpoint should be supplied in config or parameters' );
142
		}
143
144
		$baseUri = $this->getOption( 'base-uri',
145
				$wikibaseRepo->getSettings()->getSetting( 'conceptBaseUri' ) );
146
147
		$this->client = new SparqlClient( $endPoint, MediaWikiServices::getInstance()->getHttpRequestFactory() );
148
		$this->client->appendUserAgent( __CLASS__ );
149
		$format = $this->getOption( 'format', 'ttl' );
150
		$this->initializeWriter( $baseUri, $format );
151
		$this->unitConverter = new UnitConverter( new JsonUnitStorage( $newJsonName ), $baseUri );
152
		$this->initializeBuilder();
153
154
		foreach ( $diffUnits as $unit ) {
155
			$this->processUnit( $unit );
156
			$this->writeOut();
157
		}
158
	}
159
160
	/**
161
	 * Initialize RDF writer
162
	 *
163
	 * @param string $baseUri
164
	 * @param string $format File extension or MIME type of the output format.
165
	 */
166
	public function initializeWriter( $baseUri, $format ) {
167
		$wikibaseRepo = WikibaseRepo::getDefaultInstance();
168
		$this->rdfVocabulary = $this->createRdfVocabulary( $baseUri,
169
				$wikibaseRepo->getDataTypeDefinitions()->getRdfTypeUris() );
170
		$this->rdfWriter = $this->createRdfWriter( $format );
171
172
		$ns = $this->rdfVocabulary->getNamespaces();
173
		$this->valueURI = $ns[RdfVocabulary::NS_VALUE];
174
		foreach ( $this->rdfVocabulary->claimToValueNormalized as $value => $norm ) {
175
			$this->normMap[$ns[$this->rdfVocabulary->claimToValue[$value]]] = $norm;
176
			$this->normalizedNames[$ns[$norm]] = true;
177
		}
178
		$this->startDocument();
179
	}
180
181
	/**
182
	 * Initialize quantity builder.
183
	 */
184
	public function initializeBuilder() {
185
		$this->builder =
186
			new QuantityRdfBuilder( new ComplexValueRdfHelper( $this->rdfVocabulary,
187
				$this->rdfWriter ), $this->unitConverter );
188
	}
189
190
	/**
191
	 * Generate all statements for a specific unit.
192
	 *
193
	 * @param string $unit Unit Q-id
194
	 */
195
	public function processUnit( $unit ) {
196
		$this->output( "Processing $unit...\n" );
197
		$query = <<<QUERY
198
SELECT * WHERE {
199
{
200
    SELECT DISTINCT ?v  WHERE {
201
        ?v wikibase:quantityUnit wd:$unit .
202
        FILTER EXISTS { ?s ?p ?v }
203
    }
204
}
205
  ?v wikibase:quantityAmount ?amount .
206
  ?v wikibase:quantityUpperBound ?upper .
207
  ?v wikibase:quantityLowerBound ?lower .
208
}
209
QUERY;
210
		$values = $this->client->query( $query );
211
		'@phan-var array[] $values';
212
		$this->output( "Got " . count( $values ) . " ids\n" );
213
		if ( $this->dryRun ) {
214
			return;
215
		}
216
		$map = [];
217
		foreach ( $values as $value ) {
218
			if ( substr_compare( $value['v'], $this->valueURI, 0, strlen( $this->valueURI ) ) !== 0 ) {
219
				$this->error( "Invalid value: {$value['v']}!" );
220
				continue;
221
			}
222
			$id = str_replace( $this->valueURI, '', $value['v'] );
223
			$map[$id] = $this->getNormalized( $id, $unit, $value );
224
			$this->rdfWriter->about( RdfVocabulary::NS_VALUE, $id )
225
				->say( RdfVocabulary::NS_ONTOLOGY, 'quantityNormalized' )
226
				->is( RdfVocabulary::NS_VALUE, $map[$id] );
227
228
		}
229
		$this->writeOut();
230
		foreach ( array_chunk( array_keys( $map ), self::MAX_QUERY_CHUNK ) as $idChunk ) {
231
			$this->processStatements( $idChunk, $map );
232
			$this->writeOut();
233
		}
234
		$this->output( "Done.\n" );
235
	}
236
237
	/**
238
	 * Normalize unit and return the hash of the normalized node.
239
	 *
240
	 * @param string   $id Original value ID (hash)
241
	 * @param string   $unit Short ID of the unit
242
	 * @param string[] $value Value data array
243
	 *
244
	 * @return string Hash of the normalized node
245
	 */
246
	private function getNormalized( $id, $unit, array $value ) {
247
		$q =
248
			new QuantityValue( new DecimalValue( $value['amount'] ), $unit,
249
				new DecimalValue( $value['upper'] ),
250
				new DecimalValue( $value['lower'] ) );
251
		$qNorm = $this->unitConverter->toStandardUnits( $q );
252
		if ( $q === $qNorm ) {
253
			// didn't actually convert, so return original one
254
			return $id;
255
		} else {
256
			$normLName = $qNorm->getHash();
257
258
			$this->rdfWriter->about( RdfVocabulary::NS_VALUE, $normLName )
259
				->a( RdfVocabulary::NS_ONTOLOGY, $this->rdfVocabulary->getValueTypeName( $qNorm ) );
0 ignored issues
show
It seems like $qNorm defined by $this->unitConverter->toStandardUnits($q) on line 251 can be null; however, Wikibase\Repo\Rdf\RdfVoc...ary::getValueTypeName() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
260
261
			$this->builder->writeQuantityValue( $qNorm );
0 ignored issues
show
It seems like $qNorm defined by $this->unitConverter->toStandardUnits($q) on line 251 can be null; however, Wikibase\Repo\Rdf\Values...r::writeQuantityValue() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
262
263
			$this->rdfWriter->about( RdfVocabulary::NS_VALUE, $normLName )
264
				->say( RdfVocabulary::NS_ONTOLOGY, 'quantityNormalized' )
265
				->is( RdfVocabulary::NS_VALUE, $normLName );
266
267
			return $normLName;
268
		}
269
	}
270
271
	/**
272
	 * Process statements for particular set of values.
273
	 * Will scan through the triples which use each of the values and
274
	 * add appropriate normalized triple referring to the normalized value.
275
	 * E.g. <s123> psv:P345 wdv:xys -> <s123> psn:P345 wdv:xyznorm
276
	 *
277
	 * @param string[] $values Value hashes
278
	 * @param string[] $map Map old id -> normalized id
279
	 */
280
	private function processStatements( $values, $map ) {
281
		$shortValues = array_map( function ( $str ) {
282
			return 'wdv:' . $str;
283
		}, $values );
284
		$valuesStr = implode( ' ', $shortValues );
285
		$query = <<<QUERY
286
SELECT ?s ?p ?v WHERE {
287
	VALUES ?v { $valuesStr }
288
	?s ?p ?v
289
	FILTER (?p != wikibase:quantityNormalized)
290
} ORDER BY ?s
291
QUERY;
292
		$data = $this->client->query( $query );
293
		'@phan-var array[] $data';
294
		foreach ( $data as $statement ) {
295
			// Split predicate name into $prefix and $name (actual P123 part)
296
			$last = strrpos( $statement['p'], '/' );
297
			$prefix = substr( $statement['p'], 0, $last + 1 );
298
			$name = substr( $statement['p'], $last + 1 );
299
			if ( isset( $this->normalizedNames[$prefix] ) ) {
300
				// This is already normalized predicate
301
				// This can happen when we deployed new config and
302
				// somebody edits the data with that unit - the update will already have
303
				// the normalized value. We can just ignore it.
304
				continue;
305
			}
306
			if ( !isset( $this->normMap[$prefix] ) ) {
307
				// This shouldn't happen - it means value used in predicate
308
				// that is not in RdfVocabulary.
309
				$this->error( "Unknown predicate {$statement['p']}" );
310
				continue;
311
			}
312
			$v = str_replace( $this->valueURI, '', $statement['v'] );
313
			$this->rdfWriter->about( $statement['s'] )
314
				->say( $this->normMap[$prefix], $name )
315
				->is( RdfVocabulary::NS_VALUE, $map[$v] );
316
		}
317
		$this->output( '.' );
318
	}
319
320
	/**
321
	 * Kick off the document
322
	 */
323
	public function startDocument() {
324
		foreach ( $this->rdfVocabulary->getNamespaces() as $gname => $uri ) {
325
			$this->rdfWriter->prefix( $gname, $uri );
326
		}
327
328
		$this->writeOut();
329
	}
330
331
	/**
332
	 * Write data to the output
333
	 */
334
	protected function writeOut() {
335
		$data = $this->rdfWriter->drain();
336
		if ( $this->out ) {
337
			if ( fwrite( $this->out, $data ) === false ) {
338
				$this->fatalError( "Failed to write to the output, exiting." );
339
			}
340
		}
341
	}
342
343
	/**
344
	 * Get vocabulary instance
345
	 *
346
	 * @param string   $baseUri
347
	 * @param string[] $typeUris
348
	 *
349
	 * @return RdfVocabulary
350
	 */
351
	private function createRdfVocabulary( $baseUri, $typeUris ) {
352
		$entityDataTitle = Title::makeTitle( NS_SPECIAL, 'EntityData' );
353
354
		return new RdfVocabulary(
355
			[ '' => $baseUri ],
356
			[ '' => $entityDataTitle->getCanonicalURL() . '/' ],
357
			new EntitySourceDefinitions( [], new EntityTypeDefinitions( [] ) ),
358
			'',
359
			[ '' => 'wd' ],
360
			[ '' => '' ],
361
			[],
362
			$typeUris,
363
			[]
364
		);
365
	}
366
367
	/**
368
	 * @param string $format File extension or MIME type of the output format.
369
	 *
370
	 * @return RdfWriter
371
	 */
372
	private function createRdfWriter( $format ) {
373
		$factory = new RdfWriterFactory();
374
		return $factory->getWriter( $factory->getFormatName( $format ) );
0 ignored issues
show
It seems like $factory->getFormatName($format) targeting Wikimedia\Purtle\RdfWriterFactory::getFormatName() can also be of type false; however, Wikimedia\Purtle\RdfWriterFactory::getWriter() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
375
	}
376
377
}
378
379
$maintClass = AddUnitConversions::class;
380
require_once RUN_MAINTENANCE_IF_MAIN;
381