Completed
Push — master ( 910e66...d1e643 )
by
unknown
01:57
created

SparqlHelper::guardAgainstTooManyRequestsError()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 26
rs 9.504
c 0
b 0
f 0
cc 4
nc 4
nop 1
1
<?php
2
3
namespace WikibaseQuality\ConstraintReport\ConstraintCheck\Helper;
4
5
use Config;
6
use DataValues\DataValue;
7
use DataValues\MonolingualTextValue;
8
use DateInterval;
9
use FormatJson;
10
use IBufferingStatsdDataFactory;
11
use InvalidArgumentException;
12
use MapCacheLRU;
13
use MediaWiki\Http\HttpRequestFactory;
14
use MWException;
15
use MWHttpRequest;
16
use WANObjectCache;
17
use Wikibase\DataModel\Entity\EntityId;
18
use Wikibase\DataModel\Entity\EntityIdParser;
19
use Wikibase\DataModel\Entity\EntityIdParsingException;
20
use Wikibase\DataModel\Entity\EntityIdValue;
21
use Wikibase\DataModel\Services\Lookup\PropertyDataTypeLookup;
22
use Wikibase\DataModel\Snak\PropertyValueSnak;
23
use Wikibase\DataModel\Statement\Statement;
24
use Wikibase\Repo\Rdf\RdfVocabulary;
25
use WikibaseQuality\ConstraintReport\Api\ExpiryLock;
26
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedBool;
27
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedEntityIds;
28
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedQueryResults;
29
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata;
30
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata;
31
use WikibaseQuality\ConstraintReport\ConstraintCheck\Context\Context;
32
use WikibaseQuality\ConstraintReport\ConstraintCheck\Message\ViolationMessage;
33
use WikibaseQuality\ConstraintReport\ConstraintCheck\Message\ViolationMessageDeserializer;
34
use WikibaseQuality\ConstraintReport\ConstraintCheck\Message\ViolationMessageSerializer;
35
use WikibaseQuality\ConstraintReport\Role;
36
use Wikimedia\Timestamp\ConvertibleTimestamp;
37
38
/**
39
 * Class for running a SPARQL query on some endpoint and getting the results.
40
 *
41
 * @author Lucas Werkmeister
42
 * @license GPL-2.0-or-later
43
 */
44
class SparqlHelper {
45
46
	/**
47
	 * @var Config
48
	 */
49
	private $config;
50
51
	/**
52
	 * @var RdfVocabulary
53
	 */
54
	private $rdfVocabulary;
55
56
	/**
57
	 * @var string[]
58
	 */
59
	private $entityPrefixes;
60
61
	/**
62
	 * @var string
63
	 */
64
	private $prefixes;
65
66
	/**
67
	 * @var EntityIdParser
68
	 */
69
	private $entityIdParser;
70
71
	/**
72
	 * @var PropertyDataTypeLookup
73
	 */
74
	private $propertyDataTypeLookup;
75
76
	/**
77
	 * @var WANObjectCache
78
	 */
79
	private $cache;
80
81
	/**
82
	 * @var ViolationMessageSerializer
83
	 */
84
	private $violationMessageSerializer;
85
86
	/**
87
	 * @var ViolationMessageDeserializer
88
	 */
89
	private $violationMessageDeserializer;
90
91
	/**
92
	 * @var IBufferingStatsdDataFactory
93
	 */
94
	private $dataFactory;
95
96
	/**
97
	 * @var LoggingHelper
98
	 */
99
	private $loggingHelper;
100
101
	/**
102
	 * @var string
103
	 */
104
	private $defaultUserAgent;
105
106
	/**
107
	 * @var ExpiryLock
108
	 */
109
	private $throttlingLock;
110
111
	/**
112
	 * @var int stands for: No Retry-After header-field was sent back
113
	 */
114
	const NO_RETRY_AFTER = -1;
115
	/**
116
	 * @var int stands for: Empty Retry-After header-field was sent back
117
	 */
118
	const EMPTY_RETRY_AFTER = -2;
119
	/**
120
	 * @var int stands for: Invalid Retry-After header-field was sent back
121
	 * link a string
122
	 */
123
	const INVALID_RETRY_AFTER = -3;
124
	/**
125
	 * @var string ID on which the lock is applied on
126
	 */
127
	const EXPIRY_LOCK_ID = 'SparqlHelper.runQuery';
128
129
	/**
130
	 * @var int HTTP response code for too many requests
131
	 */
132
	const HTTP_TOO_MANY_REQUESTS = 429;
133
134
	/**
135
	 * @var HttpRequestFactory
136
	 */
137
	private $requestFactory;
138
139
	public function __construct(
140
		Config $config,
141
		RdfVocabulary $rdfVocabulary,
142
		EntityIdParser $entityIdParser,
143
		PropertyDataTypeLookup $propertyDataTypeLookup,
144
		WANObjectCache $cache,
145
		ViolationMessageSerializer $violationMessageSerializer,
146
		ViolationMessageDeserializer $violationMessageDeserializer,
147
		IBufferingStatsdDataFactory $dataFactory,
148
		ExpiryLock $throttlingLock,
149
		LoggingHelper $loggingHelper,
150
		$defaultUserAgent,
151
		HttpRequestFactory $requestFactory
152
	) {
153
		$this->config = $config;
154
		$this->rdfVocabulary = $rdfVocabulary;
155
		$this->entityIdParser = $entityIdParser;
156
		$this->propertyDataTypeLookup = $propertyDataTypeLookup;
157
		$this->cache = $cache;
158
		$this->violationMessageSerializer = $violationMessageSerializer;
159
		$this->violationMessageDeserializer = $violationMessageDeserializer;
160
		$this->dataFactory = $dataFactory;
161
		$this->throttlingLock = $throttlingLock;
162
		$this->loggingHelper = $loggingHelper;
163
		$this->defaultUserAgent = $defaultUserAgent;
164
		$this->requestFactory = $requestFactory;
165
		$this->entityPrefixes = [];
166
		foreach ( $rdfVocabulary->entityNamespaceNames as $namespaceName ) {
167
			$this->entityPrefixes[] = $rdfVocabulary->getNamespaceURI( $namespaceName );
168
		}
169
170
		$this->prefixes = $this->getQueryPrefixes( $rdfVocabulary );
171
	}
172
173
	private function getQueryPrefixes( RdfVocabulary $rdfVocabulary ) {
174
		// TODO: it would probably be smarter that RdfVocubulary exposed these prefixes somehow
175
		$prefixes = '';
176
		foreach ( $rdfVocabulary->entityNamespaceNames as $sourceName => $namespaceName ) {
177
			$prefixes .= <<<END
178
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
179
END;
180
		}
181
		$prefixes .= <<<END
182
PREFIX wds: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_STATEMENT )}>
183
PREFIX wdv: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_VALUE )}>\n
184
END;
185
186
		foreach ( $rdfVocabulary->propertyNamespaceNames as $sourceName => $sourceNamespaces ) {
187
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_DIRECT_CLAIM];
188
			$prefixes .= <<<END
189
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
190
END;
191
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_CLAIM];
192
			$prefixes .= <<<END
193
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
194
END;
195
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_CLAIM_STATEMENT];
196
			$prefixes .= <<<END
197
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
198
END;
199
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_QUALIFIER];
200
			$prefixes .= <<<END
201
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
202
END;
203
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_QUALIFIER_VALUE];
204
			$prefixes .= <<<END
205
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
206
END;
207
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_REFERENCE];
208
			$prefixes .= <<<END
209
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
210
END;
211
			$namespaceName = $sourceNamespaces[RdfVocabulary::NSP_REFERENCE_VALUE];
212
			$prefixes .= <<<END
213
PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n
214
END;
215
		}
216
		$prefixes .= <<<END
217
PREFIX wikibase: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_ONTOLOGY )}>\n
218
END;
219
		return $prefixes;
220
	}
221
222
	/**
223
	 * @param string $id entity ID serialization of the entity to check
224
	 * @param string[] $classes entity ID serializations of the expected types
225
	 *
226
	 * @return CachedBool
227
	 * @throws SparqlHelperException if the query times out or some other error occurs
228
	 */
229
	public function hasType( $id, array $classes ) {
230
		$subclassOfId = $this->config->get( 'WBQualityConstraintsSubclassOfId' );
231
		// TODO hint:gearing is a workaround for T168973 and can hopefully be removed eventually
232
		$gearingHint = $this->config->get( 'WBQualityConstraintsSparqlHasWikibaseSupport' ) ?
233
			' hint:Prior hint:gearing "forward".' :
234
			'';
235
236
		$metadatas = [];
237
238
		foreach ( array_chunk( $classes, 20 ) as $classesChunk ) {
239
			$classesValues = implode( ' ', array_map(
240
				function( $class ) {
241
					return 'wd:' . $class;
242
				},
243
				$classesChunk
244
			) );
245
246
			$query = <<<EOF
247
ASK {
248
  BIND(wd:$id AS ?item)
249
  VALUES ?class { $classesValues }
250
  ?item wdt:$subclassOfId* ?class.$gearingHint
251
}
252
EOF;
253
254
			$result = $this->runQuery( $query );
255
			$metadatas[] = $result->getMetadata();
256
			if ( $result->getArray()['boolean'] ) {
257
				return new CachedBool(
258
					true,
259
					Metadata::merge( $metadatas )
260
				);
261
			}
262
		}
263
264
		return new CachedBool(
265
			false,
266
			Metadata::merge( $metadatas )
267
		);
268
	}
269
270
	/**
271
	 * @param Statement $statement
272
	 * @param boolean $ignoreDeprecatedStatements Whether to ignore deprecated statements or not.
273
	 *
274
	 * @return CachedEntityIds
275
	 * @throws SparqlHelperException if the query times out or some other error occurs
276
	 */
277
	public function findEntitiesWithSameStatement(
278
		Statement $statement,
279
		$ignoreDeprecatedStatements
280
	) {
281
		$pid = $statement->getPropertyId()->serialize();
282
		$guid = str_replace( '$', '-', $statement->getGuid() );
283
284
		$deprecatedFilter = '';
285
		if ( $ignoreDeprecatedStatements ) {
286
			$deprecatedFilter = 'MINUS { ?otherStatement wikibase:rank wikibase:DeprecatedRank. }';
287
		}
288
289
		$query = <<<EOF
290
SELECT DISTINCT ?otherEntity WHERE {
291
  BIND(wds:$guid AS ?statement)
292
  BIND(p:$pid AS ?p)
293
  BIND(ps:$pid AS ?ps)
294
  ?entity ?p ?statement.
295
  ?statement ?ps ?value.
296
  ?otherStatement ?ps ?value.
297
  ?otherEntity ?p ?otherStatement.
298
  FILTER(?otherEntity != ?entity)
299
  $deprecatedFilter
300
}
301
LIMIT 10
302
EOF;
303
304
		$result = $this->runQuery( $query );
305
306
		return $this->getOtherEntities( $result );
307
	}
308
309
	/**
310
	 * @param EntityId $entityId The entity ID on the containing entity
311
	 * @param PropertyValueSnak $snak
312
	 * @param string $type Context::TYPE_QUALIFIER or Context::TYPE_REFERENCE
313
	 * @param boolean $ignoreDeprecatedStatements Whether to ignore deprecated statements or not.
314
	 *
315
	 * @return CachedEntityIds
316
	 * @throws SparqlHelperException if the query times out or some other error occurs
317
	 */
318
	public function findEntitiesWithSameQualifierOrReference(
319
		EntityId $entityId,
320
		PropertyValueSnak $snak,
321
		$type,
322
		$ignoreDeprecatedStatements
323
	) {
324
		$eid = $entityId->getSerialization();
325
		$pid = $snak->getPropertyId()->getSerialization();
326
		$prefix = $type === Context::TYPE_QUALIFIER ? 'pq' : 'pr';
327
		$dataValue = $snak->getDataValue();
328
		$dataType = $this->propertyDataTypeLookup->getDataTypeIdForProperty(
329
			$snak->getPropertyId()
330
		);
331
		list( $value, $isFullValue ) = $this->getRdfLiteral( $dataType, $dataValue );
332
		if ( $isFullValue ) {
333
			$prefix .= 'v';
334
		}
335
		$path = $type === Context::TYPE_QUALIFIER ?
336
			"$prefix:$pid" :
337
			"prov:wasDerivedFrom/$prefix:$pid";
338
339
		$deprecatedFilter = '';
340
		if ( $ignoreDeprecatedStatements ) {
341
			$deprecatedFilter = <<< EOF
342
  MINUS { ?otherStatement wikibase:rank wikibase:DeprecatedRank. }
343
EOF;
344
		}
345
346
		$query = <<<EOF
347
SELECT DISTINCT ?otherEntity WHERE {
348
  BIND(wd:$eid AS ?entity)
349
  BIND($value AS ?value)
350
  ?entity ?p ?statement.
351
  ?statement $path ?value.
352
  ?otherStatement $path ?value.
353
  ?otherEntity ?otherP ?otherStatement.
354
  FILTER(?otherEntity != ?entity)
355
$deprecatedFilter
356
}
357
LIMIT 10
358
EOF;
359
360
		$result = $this->runQuery( $query );
361
362
		return $this->getOtherEntities( $result );
363
	}
364
365
	/**
366
	 * Return SPARQL code for a string literal with $text as content.
367
	 *
368
	 * @param string $text
369
	 *
370
	 * @return string
371
	 */
372
	private function stringLiteral( $text ) {
373
		return '"' . strtr( $text, [ '"' => '\\"', '\\' => '\\\\' ] ) . '"';
374
	}
375
376
	/**
377
	 * Extract and parse entity IDs from the ?otherEntity column of a SPARQL query result.
378
	 *
379
	 * @param CachedQueryResults $results
380
	 *
381
	 * @return CachedEntityIds
382
	 */
383
	private function getOtherEntities( CachedQueryResults $results ) {
384
		return new CachedEntityIds( array_map(
385
			function ( $resultBindings ) {
386
				$entityIRI = $resultBindings['otherEntity']['value'];
387
				foreach ( $this->entityPrefixes as $entityPrefix ) {
388
					$entityPrefixLength = strlen( $entityPrefix );
389
					if ( substr( $entityIRI, 0, $entityPrefixLength ) === $entityPrefix ) {
390
						try {
391
							return $this->entityIdParser->parse(
392
								substr( $entityIRI, $entityPrefixLength )
393
							);
394
						} catch ( EntityIdParsingException $e ) {
395
							// fall through
396
						}
397
					}
398
399
					return null;
400
				}
401
402
				return null;
403
			},
404
			$results->getArray()['results']['bindings']
405
		), $results->getMetadata() );
406
	}
407
408
	// @codingStandardsIgnoreStart cyclomatic complexity of this function is too high
409
	/**
410
	 * Get an RDF literal or IRI with which the given data value can be matched in a query.
411
	 *
412
	 * @param string $dataType
413
	 * @param DataValue $dataValue
414
	 *
415
	 * @return array the literal or IRI as a string in SPARQL syntax,
416
	 * and a boolean indicating whether it refers to a full value node or not
417
	 */
418
	private function getRdfLiteral( $dataType, DataValue $dataValue ) {
419
		switch ( $dataType ) {
420
			case 'string':
421
			case 'external-id':
422
				return [ $this->stringLiteral( $dataValue->getValue() ), false ];
423
			case 'commonsMedia':
424
				$url = $this->rdfVocabulary->getMediaFileURI( $dataValue->getValue() );
425
				return [ '<' . $url . '>', false ];
426
			case 'geo-shape':
427
				$url = $this->rdfVocabulary->getGeoShapeURI( $dataValue->getValue() );
428
				return [ '<' . $url . '>', false ];
429
			case 'tabular-data':
430
				$url = $this->rdfVocabulary->getTabularDataURI( $dataValue->getValue() );
431
				return [ '<' . $url . '>', false ];
432
			case 'url':
433
				$url = $dataValue->getValue();
434
				if ( !preg_match( '/^[^<>"{}\\\\|^`\\x00-\\x20]*$/D', $url ) ) {
435
					// not a valid URL for SPARQL (see SPARQL spec, production 139 IRIREF)
436
					// such an URL should never reach us, so just throw
437
					throw new InvalidArgumentException( 'invalid URL: ' . $url );
438
				}
439
				return [ '<' . $url . '>', false ];
440
			case 'wikibase-item':
441
			case 'wikibase-property':
442
				/** @var EntityIdValue $dataValue */
443
				'@phan-var EntityIdValue $dataValue';
444
				return [ 'wd:' . $dataValue->getEntityId()->getSerialization(), false ];
445
			case 'monolingualtext':
446
				/** @var MonolingualTextValue $dataValue */
447
				'@phan-var MonolingualTextValue $dataValue';
448
				$lang = $dataValue->getLanguageCode();
449
				if ( !preg_match( '/^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/D', $lang ) ) {
450
					// not a valid language tag for SPARQL (see SPARQL spec, production 145 LANGTAG)
451
					// such a language tag should never reach us, so just throw
452
					throw new InvalidArgumentException( 'invalid language tag: ' . $lang );
453
				}
454
				return [ $this->stringLiteral( $dataValue->getText() ) . '@' . $lang, false ];
455
			case 'globe-coordinate':
456
			case 'quantity':
457
			case 'time':
458
				return [ 'wdv:' . $dataValue->getHash(), true ];
459
			default:
460
				throw new InvalidArgumentException( 'unknown data type: ' . $dataType );
461
		}
462
	}
463
	// @codingStandardsIgnoreEnd
464
465
	/**
466
	 * @param string $text
467
	 * @param string $regex
468
	 *
469
	 * @return boolean
470
	 * @throws SparqlHelperException if the query times out or some other error occurs
471
	 * @throws ConstraintParameterException if the $regex is invalid
472
	 */
473
	public function matchesRegularExpression( $text, $regex ) {
474
		// caching wrapper around matchesRegularExpressionWithSparql
475
476
		$textHash = hash( 'sha256', $text );
477
		$cacheKey = $this->cache->makeKey(
478
			'WikibaseQualityConstraints', // extension
479
			'regex', // action
480
			'WDQS-Java', // regex flavor
481
			hash( 'sha256', $regex )
482
		);
483
		$cacheMapSize = $this->config->get( 'WBQualityConstraintsFormatCacheMapSize' );
484
485
		$cacheMapArray = $this->cache->getWithSetCallback(
486
			$cacheKey,
487
			WANObjectCache::TTL_DAY,
488
			function( $cacheMapArray ) use ( $text, $regex, $textHash, $cacheMapSize ) {
489
				// Initialize the cache map if not set
490
				if ( $cacheMapArray === false ) {
491
					$key = 'wikibase.quality.constraints.regex.cache.refresh.init';
492
					$this->dataFactory->increment( $key );
493
					return [];
494
				}
495
496
				$key = 'wikibase.quality.constraints.regex.cache.refresh';
497
				$this->dataFactory->increment( $key );
498
				$cacheMap = MapCacheLRU::newFromArray( $cacheMapArray, $cacheMapSize );
499
				if ( $cacheMap->has( $textHash ) ) {
500
					$key = 'wikibase.quality.constraints.regex.cache.refresh.hit';
501
					$this->dataFactory->increment( $key );
502
					$cacheMap->get( $textHash ); // ping cache
503
				} else {
504
					$key = 'wikibase.quality.constraints.regex.cache.refresh.miss';
505
					$this->dataFactory->increment( $key );
506
					try {
507
						$matches = $this->matchesRegularExpressionWithSparql( $text, $regex );
508
					} catch ( ConstraintParameterException $e ) {
509
						$matches = $this->serializeConstraintParameterException( $e );
510
					} catch ( SparqlHelperException $e ) {
511
						// don’t cache this
512
						return $cacheMap->toArray();
513
					}
514
					$cacheMap->set(
515
						$textHash,
516
						$matches,
517
						3 / 8
518
					);
519
				}
520
521
				return $cacheMap->toArray();
522
			},
523
			[
524
				// Once map is > 1 sec old, consider refreshing
525
				'ageNew' => 1,
526
				// Update 5 seconds after "ageNew" given a 1 query/sec cache check rate
527
				'hotTTR' => 5,
528
				// avoid querying cache servers multiple times in a request
529
				// (e. g. when checking format of a reference URL used multiple times on an entity)
530
				'pcTTL' => WANObjectCache::TTL_PROC_LONG,
531
			]
532
		);
533
534
		if ( isset( $cacheMapArray[$textHash] ) ) {
535
			$key = 'wikibase.quality.constraints.regex.cache.hit';
536
			$this->dataFactory->increment( $key );
537
			$matches = $cacheMapArray[$textHash];
538
			if ( is_bool( $matches ) ) {
539
				return $matches;
540
			} elseif ( is_array( $matches ) &&
541
				$matches['type'] == ConstraintParameterException::class ) {
542
				throw $this->deserializeConstraintParameterException( $matches );
543
			} else {
544
				throw new MWException(
545
					'Value of unknown type in object cache (' .
546
					'cache key: ' . $cacheKey . ', ' .
547
					'cache map key: ' . $textHash . ', ' .
548
					'value type: ' . gettype( $matches ) . ')'
549
				);
550
			}
551
		} else {
552
			$key = 'wikibase.quality.constraints.regex.cache.miss';
553
			$this->dataFactory->increment( $key );
554
			return $this->matchesRegularExpressionWithSparql( $text, $regex );
555
		}
556
	}
557
558
	private function serializeConstraintParameterException( ConstraintParameterException $cpe ) {
559
		return [
560
			'type' => ConstraintParameterException::class,
561
			'violationMessage' => $this->violationMessageSerializer->serialize( $cpe->getViolationMessage() ),
562
		];
563
	}
564
565
	private function deserializeConstraintParameterException( array $serialization ) {
566
		$message = $this->violationMessageDeserializer->deserialize(
567
			$serialization['violationMessage']
568
		);
569
		return new ConstraintParameterException( $message );
570
	}
571
572
	/**
573
	 * This function is only public for testing purposes;
574
	 * use matchesRegularExpression, which is equivalent but caches results.
575
	 *
576
	 * @param string $text
577
	 * @param string $regex
578
	 *
579
	 * @return boolean
580
	 * @throws SparqlHelperException if the query times out or some other error occurs
581
	 * @throws ConstraintParameterException if the $regex is invalid
582
	 */
583
	public function matchesRegularExpressionWithSparql( $text, $regex ) {
584
		$textStringLiteral = $this->stringLiteral( $text );
585
		$regexStringLiteral = $this->stringLiteral( '^(?:' . $regex . ')$' );
586
587
		$query = <<<EOF
588
SELECT (REGEX($textStringLiteral, $regexStringLiteral) AS ?matches) {}
589
EOF;
590
591
		$result = $this->runQuery( $query, false );
592
593
		$vars = $result->getArray()['results']['bindings'][0];
594
		if ( array_key_exists( 'matches', $vars ) ) {
595
			// true or false ⇒ regex okay, text matches or not
596
			return $vars['matches']['value'] === 'true';
597
		} else {
598
			// empty result: regex broken
599
			throw new ConstraintParameterException(
600
				( new ViolationMessage( 'wbqc-violation-message-parameter-regex' ) )
601
					->withInlineCode( $regex, Role::CONSTRAINT_PARAMETER_VALUE )
602
			);
603
		}
604
	}
605
606
	/**
607
	 * Check whether the text content of an error response indicates a query timeout.
608
	 *
609
	 * @param string $responseContent
610
	 *
611
	 * @return boolean
612
	 */
613
	public function isTimeout( $responseContent ) {
614
		$timeoutRegex = implode( '|', array_map(
615
			function ( $fqn ) {
616
				return preg_quote( $fqn, '/' );
617
			},
618
			$this->config->get( 'WBQualityConstraintsSparqlTimeoutExceptionClasses' )
619
		) );
620
		return (bool)preg_match( '/' . $timeoutRegex . '/', $responseContent );
621
	}
622
623
	/**
624
	 * Return the max-age of a cached response,
625
	 * or a boolean indicating whether the response was cached or not.
626
	 *
627
	 * @param array $responseHeaders see MWHttpRequest::getResponseHeaders()
628
	 *
629
	 * @return int|boolean the max-age (in seconds)
630
	 * or a plain boolean if no max-age can be determined
631
	 */
632
	public function getCacheMaxAge( $responseHeaders ) {
633
		if (
634
			array_key_exists( 'x-cache-status', $responseHeaders ) &&
635
			preg_match( '/^hit(?:-.*)?$/', $responseHeaders['x-cache-status'][0] )
636
		) {
637
			$maxage = [];
638
			if (
639
				array_key_exists( 'cache-control', $responseHeaders ) &&
640
				preg_match( '/\bmax-age=(\d+)\b/', $responseHeaders['cache-control'][0], $maxage )
641
			) {
642
				return intval( $maxage[1] );
643
			} else {
644
				return true;
645
			}
646
		} else {
647
			return false;
648
		}
649
	}
650
651
	/**
652
	 * Get the delay date of a 429 headered response, which is caused by
653
	 * throttling of to many SPARQL-Requests. The header-format is defined
654
	 * in RFC7231 see: https://tools.ietf.org/html/rfc7231#section-7.1.3
655
	 *
656
	 * @param MWHttpRequest $request
657
	 *
658
	 * @return int|ConvertibleTimestamp
659
	 * or SparlHelper::NO_RETRY_AFTER if there is no Retry-After header
660
	 * or SparlHelper::EMPTY_RETRY_AFTER if there is an empty Retry-After
661
	 * or SparlHelper::INVALID_RETRY_AFTER if there is something wrong with the format
662
	 */
663
	public function getThrottling( MWHttpRequest $request ) {
664
		$retryAfterValue = $request->getResponseHeader( 'Retry-After' );
665
		if ( $retryAfterValue === null ) {
666
			return self::NO_RETRY_AFTER;
667
		}
668
669
		$trimmedRetryAfterValue = trim( $retryAfterValue );
670
		if ( empty( $trimmedRetryAfterValue ) ) {
671
			return self::EMPTY_RETRY_AFTER;
672
		}
673
674
		if ( is_numeric( $trimmedRetryAfterValue ) ) {
675
			$delaySeconds = (int)$trimmedRetryAfterValue;
676
			if ( $delaySeconds >= 0 ) {
677
				return $this->getTimestampInFuture( new DateInterval( 'PT' . $delaySeconds . 'S' ) );
678
			}
679
		} else {
680
			$return = strtotime( $trimmedRetryAfterValue );
681
			if ( !empty( $return ) ) {
682
				return new ConvertibleTimestamp( $return );
683
			}
684
		}
685
		return self::INVALID_RETRY_AFTER;
686
	}
687
688
	private function getTimestampInFuture( DateInterval $delta ) {
689
		$now = new ConvertibleTimestamp();
690
		return new ConvertibleTimestamp( $now->timestamp->add( $delta ) );
691
	}
692
693
	/**
694
	 * Runs a query against the configured endpoint and returns the results.
695
	 * TODO: See if Sparql Client in core can be used instead of rolling our own
696
	 *
697
	 * @param string $query The query, unencoded (plain string).
698
	 * @param bool $needsPrefixes Whether the query requires prefixes or they can be omitted.
699
	 *
700
	 * @return CachedQueryResults
701
	 *
702
	 * @throws SparqlHelperException if the query times out or some other error occurs
703
	 */
704
	public function runQuery( $query, $needsPrefixes = true ) {
705
706
		if ( $this->throttlingLock->isLocked( self::EXPIRY_LOCK_ID ) ) {
707
			$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.throttling' );
708
			throw new TooManySparqlRequestsException();
709
		}
710
711
		$endpoint = $this->config->get( 'WBQualityConstraintsSparqlEndpoint' );
712
		$maxQueryTimeMillis = $this->config->get( 'WBQualityConstraintsSparqlMaxMillis' );
713
714
		if ( $this->config->get( 'WBQualityConstraintsSparqlHasWikibaseSupport' ) ) {
715
			$needsPrefixes = false;
716
		}
717
718
		if ( $needsPrefixes ) {
719
			$query = $this->prefixes . $query;
720
		}
721
		$query = "#wbqc\n" . $query;
722
723
		$url = $endpoint . '?' . http_build_query(
724
			[
725
				'query' => $query,
726
				'format' => 'json',
727
				'maxQueryTimeMillis' => $maxQueryTimeMillis,
728
			],
729
			null, ini_get( 'arg_separator.output' ),
730
			// encode spaces with %20, not +
731
			PHP_QUERY_RFC3986
732
		);
733
734
		$options = [
735
			'method' => 'GET',
736
			'timeout' => (int)round( ( $maxQueryTimeMillis + 1000 ) / 1000 ),
737
			'connectTimeout' => 'default',
738
			'userAgent' => $this->defaultUserAgent,
739
		];
740
		$request = $this->requestFactory->create( $url, $options, __METHOD__ );
741
		$startTime = microtime( true );
742
		$requestStatus = $request->execute();
743
		$endTime = microtime( true );
744
		$this->dataFactory->timing(
745
			'wikibase.quality.constraints.sparql.timing',
746
			( $endTime - $startTime ) * 1000
747
		);
748
749
		$this->guardAgainstTooManyRequestsError( $request );
750
751
		$maxAge = $this->getCacheMaxAge( $request->getResponseHeaders() );
752
		if ( $maxAge ) {
753
			$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.cached' );
754
		}
755
756
		if ( $requestStatus->isOK() ) {
757
			$json = $request->getContent();
758
			$jsonStatus = FormatJson::parse( $json, FormatJson::FORCE_ASSOC );
759
			if ( $jsonStatus->isOK() ) {
760
				return new CachedQueryResults(
761
					$jsonStatus->getValue(),
762
					Metadata::ofCachingMetadata(
763
						$maxAge ?
764
							CachingMetadata::ofMaximumAgeInSeconds( $maxAge ) :
0 ignored issues
show
Bug introduced by
It seems like $maxAge defined by $this->getCacheMaxAge($r...->getResponseHeaders()) on line 751 can also be of type boolean; however, WikibaseQuality\Constrai...ofMaximumAgeInSeconds() does only seem to accept integer, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
765
							CachingMetadata::fresh()
766
					)
767
				);
768
			} else {
769
				$jsonErrorCode = $jsonStatus->getErrors()[0]['message'];
770
				$this->dataFactory->increment(
771
					"wikibase.quality.constraints.sparql.error.json.$jsonErrorCode"
772
				);
773
				// fall through to general error handling
774
			}
775
		} else {
776
			$this->dataFactory->increment(
777
				"wikibase.quality.constraints.sparql.error.http.{$request->getStatus()}"
778
			);
779
			// fall through to general error handling
780
		}
781
782
		$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.error' );
783
784
		if ( $this->isTimeout( $request->getContent() ) ) {
785
			$this->dataFactory->increment(
786
				'wikibase.quality.constraints.sparql.error.timeout'
787
			);
788
		}
789
790
		throw new SparqlHelperException();
791
	}
792
793
	/**
794
	 * Handle a potential “too many requests” error.
795
	 *
796
	 * @param MWHttpRequest $request
797
	 * @throws TooManySparqlRequestsException
798
	 */
799
	private function guardAgainstTooManyRequestsError( MWHttpRequest $request ): void {
800
		if ( $request->getStatus() !== self::HTTP_TOO_MANY_REQUESTS ) {
801
			return;
802
		}
803
804
		$fallbackBlockDuration = (int)$this->config->get( 'WBQualityConstraintsSparqlThrottlingFallbackDuration' );
805
806
		if ( $fallbackBlockDuration < 0 ) {
807
			throw new InvalidArgumentException( 'Fallback duration must be positive int but is: ' .
808
				$fallbackBlockDuration );
809
		}
810
811
		$this->dataFactory->increment( 'wikibase.quality.constraints.sparql.throttling' );
812
		$throttlingUntil = $this->getThrottling( $request );
813
		if ( !( $throttlingUntil instanceof ConvertibleTimestamp ) ) {
0 ignored issues
show
Bug introduced by
The class Wikimedia\Timestamp\ConvertibleTimestamp does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
814
			$this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterInvalid( $request );
815
			$this->throttlingLock->lock(
816
				self::EXPIRY_LOCK_ID,
817
				$this->getTimestampInFuture( new DateInterval( 'PT' . $fallbackBlockDuration . 'S' ) )
818
			);
819
		} else {
820
			$this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterPresent( $throttlingUntil, $request );
821
			$this->throttlingLock->lock( self::EXPIRY_LOCK_ID, $throttlingUntil );
822
		}
823
		throw new TooManySparqlRequestsException();
824
	}
825
826
}
827