Completed
Push — master ( 64b8b5...1ef719 )
by
unknown
06:56
created

SparqlHelper::runQuery()   C

Complexity

Conditions 10
Paths 18

Size

Total Lines 95

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 95
rs 6.2424
c 0
b 0
f 0
cc 10
nc 18
nop 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace WikibaseQuality\ConstraintReport\ConstraintCheck\Helper;
4
5
use Config;
6
use DataValues\DataValue;
7
use DataValues\MonolingualTextValue;
8
use DateInterval;
9
use IBufferingStatsdDataFactory;
10
use InvalidArgumentException;
11
use MapCacheLRU;
12
use MediaWiki\Http\HttpRequestFactory;
13
use MWException;
14
use Psr\Log\LoggerInterface;
15
use WANObjectCache;
16
use Wikibase\DataModel\Entity\EntityId;
17
use Wikibase\DataModel\Entity\EntityIdParser;
18
use Wikibase\DataModel\Entity\EntityIdParsingException;
19
use Wikibase\DataModel\Entity\EntityIdValue;
20
use Wikibase\DataModel\Services\Lookup\PropertyDataTypeLookup;
21
use Wikibase\DataModel\Snak\PropertyValueSnak;
22
use Wikibase\DataModel\Statement\Statement;
23
use Wikibase\Rdf\RdfVocabulary;
24
use WikibaseQuality\ConstraintReport\Api\ExpiryLock;
25
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedBool;
26
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedEntityIds;
27
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedQueryResults;
28
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata;
29
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata;
30
use WikibaseQuality\ConstraintReport\ConstraintCheck\Context\Context;
31
use WikibaseQuality\ConstraintReport\ConstraintCheck\Message\ViolationMessage;
32
use WikibaseQuality\ConstraintReport\ConstraintCheck\Message\ViolationMessageDeserializer;
33
use WikibaseQuality\ConstraintReport\ConstraintCheck\Message\ViolationMessageSerializer;
34
use WikibaseQuality\ConstraintReport\Role;
35
use Wikimedia\Timestamp\ConvertibleTimestamp;
36
37
/**
38
 * Class for running a SPARQL query on some endpoint and getting the results.
39
 *
40
 * @author Lucas Werkmeister
41
 * @license GPL-2.0-or-later
42
 */
43
class SparqlHelper {
44
45
	/**
46
	 * @var Config
47
	 */
48
	private $config;
49
50
	/**
51
	 * @var RdfVocabulary
52
	 */
53
	private $rdfVocabulary;
54
55
	/**
56
	 * @var string
57
	 */
58
	private $entityPrefix;
59
60
	/**
61
	 * @var string
62
	 */
63
	private $prefixes;
64
65
	/**
66
	 * @var EntityIdParser
67
	 */
68
	private $entityIdParser;
69
70
	/**
71
	 * @var PropertyDataTypeLookup
72
	 */
73
	private $propertyDataTypeLookup;
74
75
	/**
76
	 * @var WANObjectCache
77
	 */
78
	private $cache;
79
80
	/**
81
	 * @var ViolationMessageSerializer
82
	 */
83
	private $violationMessageSerializer;
84
85
	/**
86
	 * @var ViolationMessageDeserializer
87
	 */
88
	private $violationMessageDeserializer;
89
90
	/**
91
	 * @var IBufferingStatsdDataFactory
92
	 */
93
	private $dataFactory;
94
95
	/**
96
	 * @var LoggerInterface
97
	 */
98
	private $loggingHelper;
99
100
	/**
101
	 * @var string
102
	 */
103
	private $defaultUserAgent;
104
105
	/**
106
	 * @var ExpiryLock
107
	 */
108
	private $throttlingLock;
109
110
	/**
111
	 * @var int stands for: No Retry-After header-field was sent back
112
	 */
113
	const NO_RETRY_AFTER = -1;
114
	/**
115
	 * @var int stands for: Empty Retry-After header-field was sent back
116
	 */
117
	const EMPTY_RETRY_AFTER = -2;
118
	/**
119
	 * @var int stands for: Invalid Retry-After header-field was sent back
120
	 * link a string
121
	 */
122
	const INVALID_RETRY_AFTER = -3;
123
	/**
124
	 * @var string ID on which the lock is applied on
125
	 */
126
	const EXPIRY_LOCK_ID = 'SparqlHelper.runQuery';
127
128
	/**
129
	 * @var HTTP response code for too many requests
130
	 */
131
	const HTTP_TOO_MANY_REQUESTS = 429;
132
133
	/**
134
	 * @var HttpRequestFactory
135
	 */
136
	private $requestFactory;
137
138
	public function __construct(
139
		Config $config,
140
		RdfVocabulary $rdfVocabulary,
141
		EntityIdParser $entityIdParser,
142
		PropertyDataTypeLookup $propertyDataTypeLookup,
143
		WANObjectCache $cache,
144
		ViolationMessageSerializer $violationMessageSerializer,
145
		ViolationMessageDeserializer $violationMessageDeserializer,
146
		IBufferingStatsdDataFactory $dataFactory,
147
		ExpiryLock $throttlingLock,
148
		LoggingHelper $loggingHelper,
149
		$defaultUserAgent,
150
		HttpRequestFactory $requestFactory
151
	) {
152
		$this->config = $config;
153
		$this->rdfVocabulary = $rdfVocabulary;
154
		$this->entityIdParser = $entityIdParser;
155
		$this->propertyDataTypeLookup = $propertyDataTypeLookup;
156
		$this->cache = $cache;
157
		$this->violationMessageSerializer = $violationMessageSerializer;
158
		$this->violationMessageDeserializer = $violationMessageDeserializer;
159
		$this->dataFactory = $dataFactory;
160
		$this->throttlingLock = $throttlingLock;
161
		$this->loggingHelper = $loggingHelper;
0 ignored issues
show
Documentation Bug introduced by
It seems like $loggingHelper of type object<WikibaseQuality\C...k\Helper\LoggingHelper> is incompatible with the declared type object<Psr\Log\LoggerInterface> of property $loggingHelper.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
162
		$this->defaultUserAgent = $defaultUserAgent;
163
		$this->requestFactory = $requestFactory;
164
		$this->entityPrefix = $rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_ENTITY );
165
		$this->prefixes = <<<EOT
166
PREFIX wd: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_ENTITY )}>
167
PREFIX wds: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_STATEMENT )}>
168
PREFIX wdt: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_DIRECT_CLAIM )}>
169
PREFIX wdv: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_VALUE )}>
170
PREFIX p: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_CLAIM )}>
171
PREFIX ps: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_CLAIM_STATEMENT )}>
172
PREFIX pq: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_QUALIFIER )}>
173
PREFIX pqv: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_QUALIFIER_VALUE )}>
174
PREFIX pr: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_REFERENCE )}>
175
PREFIX prv: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NSP_REFERENCE_VALUE )}>
176
PREFIX wikibase: <http://wikiba.se/ontology#>
177
PREFIX wikibase-beta: <http://wikiba.se/ontology-beta#>
178
EOT;
179
		// TODO get wikibase: prefix from vocabulary once -beta is dropped (T112127)
180
	}
181
182
	/**
183
	 * @param string $id entity ID serialization of the entity to check
184
	 * @param string[] $classes entity ID serializations of the expected types
185
	 * @param boolean $withInstance true for “instance” relation, false for “subclass” relation
186
	 *
187
	 * @return CachedBool
188
	 * @throws SparqlHelperException if the query times out or some other error occurs
189
	 */
190
	public function hasType( $id, array $classes, $withInstance ) {
191
		$instanceOfId = $this->config->get( 'WBQualityConstraintsInstanceOfId' );
192
		$subclassOfId = $this->config->get( 'WBQualityConstraintsSubclassOfId' );
193
194
		$path = ( $withInstance ? "wdt:$instanceOfId/" : "" ) . "wdt:$subclassOfId*";
195
196
		$metadatas = [];
197
198
		foreach ( array_chunk( $classes, 20 ) as $classesChunk ) {
199
			$classesValues = implode( ' ', array_map(
200
				function( $class ) {
201
					return 'wd:' . $class;
202
				},
203
				$classesChunk
204
			) );
205
206
			$query = <<<EOF
207
ASK {
208
  BIND(wd:$id AS ?item)
209
  VALUES ?class { $classesValues }
210
  ?item $path ?class. hint:Prior hint:gearing "forward".
211
}
212
EOF;
213
			// TODO hint:gearing is a workaround for T168973 and can hopefully be removed eventually
214
215
			$result = $this->runQuery( $query );
216
			$metadatas[] = $result->getMetadata();
217
			if ( $result->getArray()['boolean'] ) {
218
				return new CachedBool(
219
					true,
220
					Metadata::merge( $metadatas )
221
				);
222
			}
223
		}
224
225
		return new CachedBool(
226
			false,
227
			Metadata::merge( $metadatas )
228
		);
229
	}
230
231
	/**
232
	 * @param Statement $statement
233
	 * @param boolean $ignoreDeprecatedStatements Whether to ignore deprecated statements or not.
234
	 *
235
	 * @return CachedEntityIds
236
	 * @throws SparqlHelperException if the query times out or some other error occurs
237
	 */
238
	public function findEntitiesWithSameStatement(
239
		Statement $statement,
240
		$ignoreDeprecatedStatements
241
	) {
242
		$pid = $statement->getPropertyId()->serialize();
243
		$guid = str_replace( '$', '-', $statement->getGuid() );
244
245
		$deprecatedFilter = '';
246
		if ( $ignoreDeprecatedStatements ) {
247
			$deprecatedFilter .= 'MINUS { ?otherStatement wikibase:rank wikibase:DeprecatedRank. }';
248
			$deprecatedFilter .= 'MINUS { ?otherStatement wikibase-beta:rank wikibase-beta:DeprecatedRank. }';
249
		}
250
251
		$query = <<<EOF
252
SELECT DISTINCT ?otherEntity WHERE {
253
  BIND(wds:$guid AS ?statement)
254
  BIND(p:$pid AS ?p)
255
  BIND(ps:$pid AS ?ps)
256
  ?entity ?p ?statement.
257
  ?statement ?ps ?value.
258
  ?otherStatement ?ps ?value.
259
  ?otherEntity ?p ?otherStatement.
260
  FILTER(?otherEntity != ?entity)
261
  $deprecatedFilter
262
}
263
LIMIT 10
264
EOF;
265
266
		$result = $this->runQuery( $query );
267
268
		return $this->getOtherEntities( $result );
269
	}
270
271
	/**
272
	 * @param EntityId $entityId The entity ID on the containing entity
273
	 * @param PropertyValueSnak $snak
274
	 * @param string $type Context::TYPE_QUALIFIER or Context::TYPE_REFERENCE
275
	 * @param boolean $ignoreDeprecatedStatements Whether to ignore deprecated statements or not.
276
	 *
277
	 * @return CachedEntityIds
278
	 * @throws SparqlHelperException if the query times out or some other error occurs
279
	 */
280
	public function findEntitiesWithSameQualifierOrReference(
281
		EntityId $entityId,
282
		PropertyValueSnak $snak,
283
		$type,
284
		$ignoreDeprecatedStatements
285
	) {
286
		$eid = $entityId->getSerialization();
287
		$pid = $snak->getPropertyId()->getSerialization();
288
		$prefix = $type === Context::TYPE_QUALIFIER ? 'pq' : 'pr';
289
		$dataValue = $snak->getDataValue();
290
		$dataType = $this->propertyDataTypeLookup->getDataTypeIdForProperty(
291
			$snak->getPropertyId()
292
		);
293
		list( $value, $isFullValue ) = $this->getRdfLiteral( $dataType, $dataValue );
294
		if ( $isFullValue ) {
295
			$prefix .= 'v';
296
		}
297
		$path = $type === Context::TYPE_QUALIFIER ?
298
			"$prefix:$pid" :
299
			"prov:wasDerivedFrom/$prefix:$pid";
300
301
		$deprecatedFilter = '';
302
		if ( $ignoreDeprecatedStatements ) {
303
			$deprecatedFilter = <<< EOF
304
  MINUS { ?otherStatement wikibase:rank wikibase:DeprecatedRank. }
305
  MINUS { ?otherStatement wikibase-beta:rank wikibase-beta:DeprecatedRank. }
306
EOF;
307
		}
308
309
		$query = <<<EOF
310
SELECT DISTINCT ?otherEntity WHERE {
311
  BIND(wd:$eid AS ?entity)
312
  BIND($value AS ?value)
313
  ?entity ?p ?statement.
314
  ?statement $path ?value.
315
  ?otherStatement $path ?value.
316
  ?otherEntity ?otherP ?otherStatement.
317
  FILTER(?otherEntity != ?entity)
318
$deprecatedFilter
319
}
320
LIMIT 10
321
EOF;
322
323
		$result = $this->runQuery( $query );
324
325
		return $this->getOtherEntities( $result );
326
	}
327
328
	/**
329
	 * Return SPARQL code for a string literal with $text as content.
330
	 *
331
	 * @param string $text
332
	 *
333
	 * @return string
334
	 */
335
	private function stringLiteral( $text ) {
336
		return '"' . strtr( $text, [ '"' => '\\"', '\\' => '\\\\' ] ) . '"';
337
	}
338
339
	/**
340
	 * Extract and parse entity IDs from the ?otherEntity column of a SPARQL query result.
341
	 *
342
	 * @param CachedQueryResults $results
343
	 *
344
	 * @return CachedEntityIds
345
	 */
346
	private function getOtherEntities( CachedQueryResults $results ) {
347
		return new CachedEntityIds( array_map(
348
			function ( $resultBindings ) {
349
				$entityIRI = $resultBindings['otherEntity']['value'];
350
				$entityPrefixLength = strlen( $this->entityPrefix );
351
				if ( substr( $entityIRI, 0, $entityPrefixLength ) === $this->entityPrefix ) {
352
					try {
353
						return $this->entityIdParser->parse(
354
							substr( $entityIRI, $entityPrefixLength )
355
						);
356
					} catch ( EntityIdParsingException $e ) {
357
						// fall through
358
					}
359
				}
360
361
				return null;
362
			},
363
			$results->getArray()['results']['bindings']
364
		), $results->getMetadata() );
365
	}
366
367
	// @codingStandardsIgnoreStart cyclomatic complexity of this function is too high
368
	/**
369
	 * Get an RDF literal or IRI with which the given data value can be matched in a query.
370
	 *
371
	 * @param string $dataType
372
	 * @param DataValue $dataValue
373
	 *
374
	 * @return array the literal or IRI as a string in SPARQL syntax,
375
	 * and a boolean indicating whether it refers to a full value node or not
376
	 */
377
	private function getRdfLiteral( $dataType, DataValue $dataValue ) {
378
		switch ( $dataType ) {
379
			case 'string':
380
			case 'external-id':
381
				return [ $this->stringLiteral( $dataValue->getValue() ), false ];
382
			case 'commonsMedia':
383
				$url = $this->rdfVocabulary->getMediaFileURI( $dataValue->getValue() );
384
				return [ '<' . $url . '>', false ];
385
			case 'geo-shape':
386
				$url = $this->rdfVocabulary->getGeoShapeURI( $dataValue->getValue() );
387
				return [ '<' . $url . '>', false ];
388
			case 'tabular-data':
389
				$url = $this->rdfVocabulary->getTabularDataURI( $dataValue->getValue() );
390
				return [ '<' . $url . '>', false ];
391
			case 'url':
392
				$url = $dataValue->getValue();
393
				if ( !preg_match( '/^[^<>"{}\\\\|^`\\x00-\\x20]*$/D', $url ) ) {
394
					// not a valid URL for SPARQL (see SPARQL spec, production 139 IRIREF)
395
					// such an URL should never reach us, so just throw
396
					throw new InvalidArgumentException( 'invalid URL: ' . $url );
397
				}
398
				return [ '<' . $url . '>', false ];
399
			case 'wikibase-item':
400
			case 'wikibase-property':
401
				/** @var EntityIdValue $dataValue */
402
				return [ 'wd:' . $dataValue->getEntityId()->getSerialization(), false ];
403
			case 'monolingualtext':
404
				/** @var MonolingualTextValue $dataValue */
405
				$lang = $dataValue->getLanguageCode();
406
				if ( !preg_match( '/^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/D', $lang ) ) {
407
					// not a valid language tag for SPARQL (see SPARQL spec, production 145 LANGTAG)
408
					// such a language tag should never reach us, so just throw
409
					throw new InvalidArgumentException( 'invalid language tag: ' . $lang );
410
				}
411
				return [ $this->stringLiteral( $dataValue->getText() ) . '@' . $lang, false ];
412
			case 'globe-coordinate':
413
			case 'quantity':
414
			case 'time':
415
				return [ 'wdv:' . $dataValue->getHash(), true ];
416
			default:
417
				throw new InvalidArgumentException( 'unknown data type: ' . $dataType );
418
		}
419
	}
420
	// @codingStandardsIgnoreEnd
421
422
	/**
423
	 * @param string $text
424
	 * @param string $regex
425
	 *
426
	 * @return boolean
427
	 * @throws SparqlHelperException if the query times out or some other error occurs
428
	 * @throws ConstraintParameterException if the $regex is invalid
429
	 */
430
	public function matchesRegularExpression( $text, $regex ) {
431
		// caching wrapper around matchesRegularExpressionWithSparql
432
433
		$textHash = hash( 'sha256', $text );
434
		$cacheKey = $this->cache->makeKey(
435
			'WikibaseQualityConstraints', // extension
436
			'regex', // action
437
			'WDQS-Java', // regex flavor
438
			hash( 'sha256', $regex )
439
		);
440
		$cacheMapSize = $this->config->get( 'WBQualityConstraintsFormatCacheMapSize' );
441
442
		$cacheMapArray = $this->cache->getWithSetCallback(
443
			$cacheKey,
444
			WANObjectCache::TTL_DAY,
445
			function( $cacheMapArray ) use ( $text, $regex, $textHash, $cacheMapSize ) {
446
				// Initialize the cache map if not set
447
				if ( $cacheMapArray === false ) {
448
					$key = 'wikibase.quality.constraints.regex.cache.refresh.init';
449
					$this->dataFactory->increment( $key );
450
					return [];
451
				}
452
453
				$key = 'wikibase.quality.constraints.regex.cache.refresh';
454
				$this->dataFactory->increment( $key );
455
				$cacheMap = MapCacheLRU::newFromArray( $cacheMapArray, $cacheMapSize );
456
				if ( $cacheMap->has( $textHash ) ) {
457
					$key = 'wikibase.quality.constraints.regex.cache.refresh.hit';
458
					$this->dataFactory->increment( $key );
459
					$cacheMap->get( $textHash ); // ping cache
460
				} else {
461
					$key = 'wikibase.quality.constraints.regex.cache.refresh.miss';
462
					$this->dataFactory->increment( $key );
463
					try {
464
						$matches = $this->matchesRegularExpressionWithSparql( $text, $regex );
465
					} catch ( ConstraintParameterException $e ) {
466
						$matches = $this->serializeConstraintParameterException( $e );
467
					} catch ( SparqlHelperException $e ) {
468
						// don’t cache this
469
						return $cacheMap->toArray();
470
					}
471
					$cacheMap->set(
472
						$textHash,
473
						$matches,
474
						3 / 8
475
					);
476
				}
477
478
				return $cacheMap->toArray();
479
			},
480
			[
481
				// Once map is > 1 sec old, consider refreshing
482
				'ageNew' => 1,
483
				// Update 5 seconds after "ageNew" given a 1 query/sec cache check rate
484
				'hotTTR' => 5,
485
				// avoid querying cache servers multiple times in a request
486
				// (e. g. when checking format of a reference URL used multiple times on an entity)
487
				'pcTTL' => WANObjectCache::TTL_PROC_LONG,
488
			]
489
		);
490
491
		if ( isset( $cacheMapArray[$textHash] ) ) {
492
			$key = 'wikibase.quality.constraints.regex.cache.hit';
493
			$this->dataFactory->increment( $key );
494
			$matches = $cacheMapArray[$textHash];
495
			if ( is_bool( $matches ) ) {
496
				return $matches;
497
			} elseif ( is_array( $matches ) &&
498
				$matches['type'] == ConstraintParameterException::class ) {
499
				throw $this->deserializeConstraintParameterException( $matches );
500
			} else {
501
				throw new MWException(
502
					'Value of unknown type in object cache (' .
503
					'cache key: ' . $cacheKey . ', ' .
504
					'cache map key: ' . $textHash . ', ' .
505
					'value type: ' . gettype( $matches ) . ')'
506
				);
507
			}
508
		} else {
509
			$key = 'wikibase.quality.constraints.regex.cache.miss';
510
			$this->dataFactory->increment( $key );
511
			return $this->matchesRegularExpressionWithSparql( $text, $regex );
512
		}
513
	}
514
515
	private function serializeConstraintParameterException( ConstraintParameterException $cpe ) {
516
		return [
517
			'type' => ConstraintParameterException::class,
518
			'violationMessage' => $this->violationMessageSerializer->serialize( $cpe->getViolationMessage() ),
519
		];
520
	}
521
522
	private function deserializeConstraintParameterException( array $serialization ) {
523
		$message = $this->violationMessageDeserializer->deserialize(
524
			$serialization['violationMessage']
525
		);
526
		return new ConstraintParameterException( $message );
527
	}
528
529
	/**
530
	 * This function is only public for testing purposes;
531
	 * use matchesRegularExpression, which is equivalent but caches results.
532
	 *
533
	 * @param string $text
534
	 * @param string $regex
535
	 *
536
	 * @return boolean
537
	 * @throws SparqlHelperException if the query times out or some other error occurs
538
	 * @throws ConstraintParameterException if the $regex is invalid
539
	 */
540
	public function matchesRegularExpressionWithSparql( $text, $regex ) {
541
		$textStringLiteral = $this->stringLiteral( $text );
542
		$regexStringLiteral = $this->stringLiteral( '^(?:' . $regex . ')$' );
543
544
		$query = <<<EOF
545
SELECT (REGEX($textStringLiteral, $regexStringLiteral) AS ?matches) {}
546
EOF;
547
548
		$result = $this->runQuery( $query, false );
549
550
		$vars = $result->getArray()['results']['bindings'][0];
551
		if ( array_key_exists( 'matches', $vars ) ) {
552
			// true or false ⇒ regex okay, text matches or not
553
			return $vars['matches']['value'] === 'true';
554
		} else {
555
			// empty result: regex broken
556
			throw new ConstraintParameterException(
557
				( new ViolationMessage( 'wbqc-violation-message-parameter-regex' ) )
558
					->withInlineCode( $regex, Role::CONSTRAINT_PARAMETER_VALUE )
559
			);
560
		}
561
	}
562
563
	/**
564
	 * Check whether the text content of an error response indicates a query timeout.
565
	 *
566
	 * @param string $responseContent
567
	 *
568
	 * @return boolean
569
	 */
570
	public function isTimeout( $responseContent ) {
571
		$timeoutRegex = implode( '|', array_map(
572
			function ( $fqn ) {
573
				return preg_quote( $fqn, '/' );
574
			},
575
			$this->config->get( 'WBQualityConstraintsSparqlTimeoutExceptionClasses' )
576
		) );
577
		return (bool)preg_match( '/' . $timeoutRegex . '/', $responseContent );
578
	}
579
580
	/**
581
	 * Return the max-age of a cached response,
582
	 * or a boolean indicating whether the response was cached or not.
583
	 *
584
	 * @param array $responseHeaders see MWHttpRequest::getResponseHeaders()
585
	 *
586
	 * @return int|boolean the max-age (in seconds)
587
	 * or a plain boolean if no max-age can be determined
588
	 */
589
	public function getCacheMaxAge( $responseHeaders ) {
590
		if (
591
			array_key_exists( 'x-cache-status', $responseHeaders ) &&
592
			preg_match( '/^hit(?:-.*)?$/', $responseHeaders['x-cache-status'][0] )
593
		) {
594
			$maxage = [];
595
			if (
596
				array_key_exists( 'cache-control', $responseHeaders ) &&
597
				preg_match( '/\bmax-age=(\d+)\b/', $responseHeaders['cache-control'][0], $maxage )
598
			) {
599
				return intval( $maxage[1] );
600
			} else {
601
				return true;
602
			}
603
		} else {
604
			return false;
605
		}
606
	}
607
608
	/**
609
	 * Get the delaydate of a 429 headered response, which is caused by
610
	 * throttling of to many SPARQL-Requests. The header-format is defined
611
	 * in RFC7231 see: https://tools.ietf.org/html/rfc7231#section-7.1.3
612
	 *
613
	 * @param $responseHeaders
614
	 *
615
	 * @return int|ConvertibleTimestamp
616
	 * or SparlHelper::NO_RETRY_AFTER if there is no Retry-After header
617
	 * or SparlHelper::EMPTY_RETRY_AFTER if there is an empty Retry-After
618
	 * or SparlHelper::INVALID_RETRY_AFTER if there is something wrong with the format
619
	 *
620
	 */
621
	public function getThrottling( array $responseHeaders ) {
622
		if ( !array_key_exists( 'Retry-After', $responseHeaders ) ) {
623
			return self::NO_RETRY_AFTER;
624
		}
625
626
		$trimmedRetryAfterValue = trim( $responseHeaders[ 'Retry-After' ] );
627
		if ( empty( $trimmedRetryAfterValue ) ) {
628
			return self::EMPTY_RETRY_AFTER;
629
		}
630
631
		if ( is_numeric( $trimmedRetryAfterValue ) ) {
632
			$delaySeconds = (int)$trimmedRetryAfterValue;
633
			if ( $delaySeconds >= 0 ) {
634
				return $this->getTimestampInFuture( new DateInterval( 'PT' . $delaySeconds . 'S' ) );
635
			}
636
		} else {
637
			$return = strtotime( $responseHeaders[ 'Retry-After' ] );
638
			if ( !empty( $return ) ) {
639
				return new ConvertibleTimestamp( $return );
640
			}
641
		}
642
		return self::INVALID_RETRY_AFTER;
643
	}
644
645
	private function getTimestampInFuture( DateInterval $delta ) {
646
		$now = new ConvertibleTimestamp();
647
		return new ConvertibleTimestamp( $now->timestamp->add( $delta ) );
648
	}
649
650
	/**
651
	 * Runs a query against the configured endpoint and returns the results.
652
	 * TODO: See if Sparql Client in core can be used instead of rolling our own
653
	 *
654
	 * @param string $query The query, unencoded (plain string).
655
	 * @param bool $needsPrefixes Whether the query requires prefixes or they can be omitted.
656
	 *
657
	 * @return CachedQueryResults
658
	 *
659
	 * @throws SparqlHelperException if the query times out or some other error occurs
660
	 */
661
	public function runQuery( $query, $needsPrefixes = true ) {
662
663
		if ( $this->throttlingLock->isLocked( self::EXPIRY_LOCK_ID ) ) {
664
			$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.throttling' );
665
			throw new TooManySparqlRequestsException();
666
		}
667
668
		$endpoint = $this->config->get( 'WBQualityConstraintsSparqlEndpoint' );
669
		$maxQueryTimeMillis = $this->config->get( 'WBQualityConstraintsSparqlMaxMillis' );
670
		$fallbackBlockDuration = (int)$this->config->get( 'WBQualityConstraintsSparqlThrottlingFallbackDuration' );
671
672
		if ( $fallbackBlockDuration < 0 ) {
673
			throw new InvalidArgumentException( 'Fallback duration must be positive int but is: '.
674
				$fallbackBlockDuration );
675
		}
676
677
		if ( $needsPrefixes ) {
678
			$query = $this->prefixes . $query;
679
		}
680
		$query = "#wbqc\n" . $query;
681
682
		$url = $endpoint . '?' . http_build_query(
683
			[
684
				'query' => $query,
685
				'format' => 'json',
686
				'maxQueryTimeMillis' => $maxQueryTimeMillis,
687
			],
688
			null, ini_get( 'arg_separator.output' ),
689
			// encode spaces with %20, not +
690
			PHP_QUERY_RFC3986
691
		);
692
693
		$options = [
694
			'method' => 'GET',
695
			'timeout' => (int)round( ( $maxQueryTimeMillis + 1000 ) / 1000 ),
696
			'connectTimeout' => 'default',
697
			'userAgent' => $this->defaultUserAgent,
698
		];
699
		$request = $this->requestFactory->create( $url, $options );
700
		$startTime = microtime( true );
701
		$status = $request->execute();
702
		$endTime = microtime( true );
703
		$this->dataFactory->timing(
704
			'wikibase.quality.constraints.sparql.timing',
705
			( $endTime - $startTime ) * 1000
706
		);
707
708
		if ( $request->getStatus() === self::HTTP_TOO_MANY_REQUESTS ) {
709
			$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.throttling' );
710
			$throttlingUntil = $this->getThrottling( $request->getResponseHeaders() );
711
			if ( !( $throttlingUntil instanceof ConvertibleTimestamp ) ) {
0 ignored issues
show
Bug introduced by
The class Wikimedia\Timestamp\ConvertibleTimestamp does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
712
				$this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterInvalid( $request );
0 ignored issues
show
Bug introduced by
The method logSparqlHelperTooManyRequestsRetryAfterInvalid() does not seem to exist on object<Psr\Log\LoggerInterface>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
713
				$this->throttlingLock->lock(
714
					self::EXPIRY_LOCK_ID,
715
					$this->getTimestampInFuture( new DateInterval( 'PT' . $fallbackBlockDuration . 'S' ) )
716
				);
717
			} else {
718
				$this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterPresent( $throttlingUntil, $request );
0 ignored issues
show
Bug introduced by
The method logSparqlHelperTooManyRequestsRetryAfterPresent() does not seem to exist on object<Psr\Log\LoggerInterface>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
719
				$this->throttlingLock->lock( self::EXPIRY_LOCK_ID, $throttlingUntil );
720
			}
721
			throw new TooManySparqlRequestsException();
722
		}
723
724
		$maxAge = $this->getCacheMaxAge( $request->getResponseHeaders() );
725
		if ( $maxAge ) {
726
			$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.cached' );
727
		}
728
729
		if ( $status->isOK() ) {
730
			$json = $request->getContent();
731
			$arr = json_decode( $json, true );
732
			return new CachedQueryResults(
733
				$arr,
734
				Metadata::ofCachingMetadata(
735
					$maxAge ?
736
						CachingMetadata::ofMaximumAgeInSeconds( $maxAge ) :
0 ignored issues
show
Bug introduced by
It seems like $maxAge defined by $this->getCacheMaxAge($r...->getResponseHeaders()) on line 724 can also be of type boolean; however, WikibaseQuality\Constrai...ofMaximumAgeInSeconds() does only seem to accept integer, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
737
						CachingMetadata::fresh()
738
				)
739
			);
740
		} else {
741
			$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.error' );
742
743
			$this->dataFactory->increment(
744
				"wikibase.quality.constraints.sparql.error.http.{$request->getStatus()}"
745
			);
746
747
			if ( $this->isTimeout( $request->getContent() ) ) {
748
				$this->dataFactory->increment(
749
					'wikibase.quality.constraints.sparql.error.timeout'
750
				);
751
			}
752
753
			throw new SparqlHelperException();
754
		}
755
	}
756
757
}
758