@@ -1,6 +1,6 @@ discard block |
||
1 | 1 | <?php |
2 | 2 | |
3 | -declare( strict_types = 1 ); |
|
3 | +declare(strict_types=1); |
|
4 | 4 | |
5 | 5 | namespace WikibaseQuality\ConstraintReport\ConstraintCheck\Helper; |
6 | 6 | |
@@ -147,79 +147,79 @@ discard block |
||
147 | 147 | $this->defaultUserAgent = $defaultUserAgent; |
148 | 148 | $this->requestFactory = $requestFactory; |
149 | 149 | $this->entityPrefixes = []; |
150 | - foreach ( $rdfVocabulary->entityNamespaceNames as $namespaceName ) { |
|
151 | - $this->entityPrefixes[] = $rdfVocabulary->getNamespaceURI( $namespaceName ); |
|
150 | + foreach ($rdfVocabulary->entityNamespaceNames as $namespaceName) { |
|
151 | + $this->entityPrefixes[] = $rdfVocabulary->getNamespaceURI($namespaceName); |
|
152 | 152 | } |
153 | 153 | |
154 | - $this->primaryEndpoint = $config->get( 'WBQualityConstraintsSparqlEndpoint' ); |
|
155 | - $this->additionalEndpoints = $config->get( 'WBQualityConstraintsAdditionalSparqlEndpoints' ) ?: []; |
|
156 | - $this->maxQueryTimeMillis = $config->get( 'WBQualityConstraintsSparqlMaxMillis' ); |
|
157 | - $this->subclassOfId = $config->get( 'WBQualityConstraintsSubclassOfId' ); |
|
158 | - $this->cacheMapSize = $config->get( 'WBQualityConstraintsFormatCacheMapSize' ); |
|
154 | + $this->primaryEndpoint = $config->get('WBQualityConstraintsSparqlEndpoint'); |
|
155 | + $this->additionalEndpoints = $config->get('WBQualityConstraintsAdditionalSparqlEndpoints') ?: []; |
|
156 | + $this->maxQueryTimeMillis = $config->get('WBQualityConstraintsSparqlMaxMillis'); |
|
157 | + $this->subclassOfId = $config->get('WBQualityConstraintsSubclassOfId'); |
|
158 | + $this->cacheMapSize = $config->get('WBQualityConstraintsFormatCacheMapSize'); |
|
159 | 159 | $this->timeoutExceptionClasses = $config->get( |
160 | 160 | 'WBQualityConstraintsSparqlTimeoutExceptionClasses' |
161 | 161 | ); |
162 | 162 | $this->sparqlHasWikibaseSupport = $config->get( |
163 | 163 | 'WBQualityConstraintsSparqlHasWikibaseSupport' |
164 | 164 | ); |
165 | - $this->sparqlThrottlingFallbackDuration = (int)$config->get( |
|
165 | + $this->sparqlThrottlingFallbackDuration = (int) $config->get( |
|
166 | 166 | 'WBQualityConstraintsSparqlThrottlingFallbackDuration' |
167 | 167 | ); |
168 | 168 | |
169 | - $this->prefixes = $this->getQueryPrefixes( $rdfVocabulary ); |
|
169 | + $this->prefixes = $this->getQueryPrefixes($rdfVocabulary); |
|
170 | 170 | } |
171 | 171 | |
172 | - private function getQueryPrefixes( RdfVocabulary $rdfVocabulary ): string { |
|
172 | + private function getQueryPrefixes(RdfVocabulary $rdfVocabulary): string { |
|
173 | 173 | // TODO: it would probably be smarter that RdfVocabulary exposed these prefixes somehow |
174 | 174 | $prefixes = ''; |
175 | - foreach ( $rdfVocabulary->entityNamespaceNames as $sourceName => $namespaceName ) { |
|
175 | + foreach ($rdfVocabulary->entityNamespaceNames as $sourceName => $namespaceName) { |
|
176 | 176 | $prefixes .= <<<END |
177 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
177 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
178 | 178 | END; |
179 | 179 | } |
180 | 180 | |
181 | - $itemSourceName = $rdfVocabulary->getEntityRepositoryName( new ItemId( 'Q1' ) ); |
|
181 | + $itemSourceName = $rdfVocabulary->getEntityRepositoryName(new ItemId('Q1')); |
|
182 | 182 | $namespaceName = $rdfVocabulary->statementNamespaceNames[$itemSourceName][RdfVocabulary::NS_STATEMENT]; |
183 | 183 | $prefixes .= <<<END |
184 | -PREFIX wds: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
184 | +PREFIX wds: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
185 | 185 | END; |
186 | 186 | $namespaceName = $rdfVocabulary->statementNamespaceNames[$itemSourceName][RdfVocabulary::NS_VALUE]; |
187 | 187 | $prefixes .= <<<END |
188 | -PREFIX wdv: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
188 | +PREFIX wdv: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
189 | 189 | END; |
190 | 190 | |
191 | - foreach ( $rdfVocabulary->propertyNamespaceNames as $sourceName => $sourceNamespaces ) { |
|
191 | + foreach ($rdfVocabulary->propertyNamespaceNames as $sourceName => $sourceNamespaces) { |
|
192 | 192 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_DIRECT_CLAIM]; |
193 | 193 | $prefixes .= <<<END |
194 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
194 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
195 | 195 | END; |
196 | 196 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_CLAIM]; |
197 | 197 | $prefixes .= <<<END |
198 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
198 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
199 | 199 | END; |
200 | 200 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_CLAIM_STATEMENT]; |
201 | 201 | $prefixes .= <<<END |
202 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
202 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
203 | 203 | END; |
204 | 204 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_QUALIFIER]; |
205 | 205 | $prefixes .= <<<END |
206 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
206 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
207 | 207 | END; |
208 | 208 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_QUALIFIER_VALUE]; |
209 | 209 | $prefixes .= <<<END |
210 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
210 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
211 | 211 | END; |
212 | 212 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_REFERENCE]; |
213 | 213 | $prefixes .= <<<END |
214 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
214 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
215 | 215 | END; |
216 | 216 | $namespaceName = $sourceNamespaces[RdfVocabulary::NSP_REFERENCE_VALUE]; |
217 | 217 | $prefixes .= <<<END |
218 | -PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI( $namespaceName )}>\n |
|
218 | +PREFIX {$namespaceName}: <{$rdfVocabulary->getNamespaceURI($namespaceName)}>\n |
|
219 | 219 | END; |
220 | 220 | } |
221 | 221 | $prefixes .= <<<END |
222 | -PREFIX wikibase: <{$rdfVocabulary->getNamespaceURI( RdfVocabulary::NS_ONTOLOGY )}>\n |
|
222 | +PREFIX wikibase: <{$rdfVocabulary->getNamespaceURI(RdfVocabulary::NS_ONTOLOGY)}>\n |
|
223 | 223 | END; |
224 | 224 | return $prefixes; |
225 | 225 | } |
@@ -231,21 +231,20 @@ discard block |
||
231 | 231 | * @return CachedBool |
232 | 232 | * @throws SparqlHelperException if the query times out or some other error occurs |
233 | 233 | */ |
234 | - public function hasType( string $id, array $classes ): CachedBool { |
|
234 | + public function hasType(string $id, array $classes): CachedBool { |
|
235 | 235 | // TODO hint:gearing is a workaround for T168973 and can hopefully be removed eventually |
236 | 236 | $gearingHint = $this->sparqlHasWikibaseSupport ? |
237 | - ' hint:Prior hint:gearing "forward".' : |
|
238 | - ''; |
|
237 | + ' hint:Prior hint:gearing "forward".' : ''; |
|
239 | 238 | |
240 | 239 | $metadatas = []; |
241 | 240 | |
242 | - foreach ( array_chunk( $classes, 20 ) as $classesChunk ) { |
|
243 | - $classesValues = implode( ' ', array_map( |
|
244 | - static function ( $class ) { |
|
245 | - return 'wd:' . $class; |
|
241 | + foreach (array_chunk($classes, 20) as $classesChunk) { |
|
242 | + $classesValues = implode(' ', array_map( |
|
243 | + static function($class) { |
|
244 | + return 'wd:'.$class; |
|
246 | 245 | }, |
247 | 246 | $classesChunk |
248 | - ) ); |
|
247 | + )); |
|
249 | 248 | |
250 | 249 | $query = <<<EOF |
251 | 250 | ASK { |
@@ -255,19 +254,19 @@ discard block |
||
255 | 254 | } |
256 | 255 | EOF; |
257 | 256 | |
258 | - $result = $this->runQuery( $query, $this->primaryEndpoint ); |
|
257 | + $result = $this->runQuery($query, $this->primaryEndpoint); |
|
259 | 258 | $metadatas[] = $result->getMetadata(); |
260 | - if ( $result->getArray()['boolean'] ) { |
|
259 | + if ($result->getArray()['boolean']) { |
|
261 | 260 | return new CachedBool( |
262 | 261 | true, |
263 | - Metadata::merge( $metadatas ) |
|
262 | + Metadata::merge($metadatas) |
|
264 | 263 | ); |
265 | 264 | } |
266 | 265 | } |
267 | 266 | |
268 | 267 | return new CachedBool( |
269 | 268 | false, |
270 | - Metadata::merge( $metadatas ) |
|
269 | + Metadata::merge($metadatas) |
|
271 | 270 | ); |
272 | 271 | } |
273 | 272 | |
@@ -275,7 +274,7 @@ discard block |
||
275 | 274 | * Helper function used by findEntitiesWithSameStatement to filter |
276 | 275 | * out entities with different qualifiers or no qualifier value. |
277 | 276 | */ |
278 | - private function nestedSeparatorFilter( PropertyId $separator ): string { |
|
277 | + private function nestedSeparatorFilter(PropertyId $separator): string { |
|
279 | 278 | $filter = <<<EOF |
280 | 279 | MINUS { |
281 | 280 | ?statement pq:$separator ?qualifier. |
@@ -312,14 +311,14 @@ discard block |
||
312 | 311 | * @return CachedEntityIds |
313 | 312 | * @throws SparqlHelperException if the query times out or some other error occurs |
314 | 313 | */ |
315 | - public function findEntitiesWithSameStatement( Statement $statement, array $separators ): CachedEntityIds { |
|
314 | + public function findEntitiesWithSameStatement(Statement $statement, array $separators): CachedEntityIds { |
|
316 | 315 | $pid = $statement->getPropertyId()->getSerialization(); |
317 | 316 | $guid = $statement->getGuid(); |
318 | 317 | '@phan-var string $guid'; // statement must have a non-null GUID |
319 | - $guidForRdf = str_replace( '$', '-', $guid ); |
|
318 | + $guidForRdf = str_replace('$', '-', $guid); |
|
320 | 319 | |
321 | - $separatorFilters = array_map( [ $this, 'nestedSeparatorFilter' ], $separators ); |
|
322 | - $finalSeparatorFilter = implode( "\n", $separatorFilters ); |
|
320 | + $separatorFilters = array_map([$this, 'nestedSeparatorFilter'], $separators); |
|
321 | + $finalSeparatorFilter = implode("\n", $separatorFilters); |
|
323 | 322 | |
324 | 323 | $query = <<<EOF |
325 | 324 | SELECT DISTINCT ?otherEntity WHERE { |
@@ -337,12 +336,12 @@ discard block |
||
337 | 336 | LIMIT 10 |
338 | 337 | EOF; |
339 | 338 | |
340 | - $results = [ $this->runQuery( $query, $this->primaryEndpoint ) ]; |
|
341 | - foreach ( $this->additionalEndpoints as $endpoint ) { |
|
342 | - $results[] = $this->runQuery( $query, $endpoint ); |
|
339 | + $results = [$this->runQuery($query, $this->primaryEndpoint)]; |
|
340 | + foreach ($this->additionalEndpoints as $endpoint) { |
|
341 | + $results[] = $this->runQuery($query, $endpoint); |
|
343 | 342 | } |
344 | 343 | |
345 | - return $this->getOtherEntities( $results ); |
|
344 | + return $this->getOtherEntities($results); |
|
346 | 345 | } |
347 | 346 | |
348 | 347 | /** |
@@ -367,16 +366,15 @@ discard block |
||
367 | 366 | $dataType = $this->propertyDataTypeLookup->getDataTypeIdForProperty( |
368 | 367 | $snak->getPropertyId() |
369 | 368 | ); |
370 | - [ $value, $isFullValue ] = $this->getRdfLiteral( $dataType, $dataValue ); |
|
371 | - if ( $isFullValue ) { |
|
369 | + [$value, $isFullValue] = $this->getRdfLiteral($dataType, $dataValue); |
|
370 | + if ($isFullValue) { |
|
372 | 371 | $prefix .= 'v'; |
373 | 372 | } |
374 | 373 | $path = $type === Context::TYPE_QUALIFIER ? |
375 | - "$prefix:$pid" : |
|
376 | - "prov:wasDerivedFrom/$prefix:$pid"; |
|
374 | + "$prefix:$pid" : "prov:wasDerivedFrom/$prefix:$pid"; |
|
377 | 375 | |
378 | 376 | $deprecatedFilter = ''; |
379 | - if ( $ignoreDeprecatedStatements ) { |
|
377 | + if ($ignoreDeprecatedStatements) { |
|
380 | 378 | $deprecatedFilter = <<< EOF |
381 | 379 | MINUS { ?otherStatement wikibase:rank wikibase:DeprecatedRank. } |
382 | 380 | EOF; |
@@ -396,19 +394,19 @@ discard block |
||
396 | 394 | LIMIT 10 |
397 | 395 | EOF; |
398 | 396 | |
399 | - $results = [ $this->runQuery( $query, $this->primaryEndpoint ) ]; |
|
400 | - foreach ( $this->additionalEndpoints as $endpoint ) { |
|
401 | - $results[] = $this->runQuery( $query, $endpoint ); |
|
397 | + $results = [$this->runQuery($query, $this->primaryEndpoint)]; |
|
398 | + foreach ($this->additionalEndpoints as $endpoint) { |
|
399 | + $results[] = $this->runQuery($query, $endpoint); |
|
402 | 400 | } |
403 | 401 | |
404 | - return $this->getOtherEntities( $results ); |
|
402 | + return $this->getOtherEntities($results); |
|
405 | 403 | } |
406 | 404 | |
407 | 405 | /** |
408 | 406 | * Return SPARQL code for a string literal with $text as content. |
409 | 407 | */ |
410 | - private function stringLiteral( string $text ): string { |
|
411 | - return '"' . strtr( $text, [ '"' => '\\"', '\\' => '\\\\' ] ) . '"'; |
|
408 | + private function stringLiteral(string $text): string { |
|
409 | + return '"'.strtr($text, ['"' => '\\"', '\\' => '\\\\']).'"'; |
|
412 | 410 | } |
413 | 411 | |
414 | 412 | /** |
@@ -418,26 +416,26 @@ discard block |
||
418 | 416 | * |
419 | 417 | * @return CachedEntityIds |
420 | 418 | */ |
421 | - private function getOtherEntities( array $results ): CachedEntityIds { |
|
419 | + private function getOtherEntities(array $results): CachedEntityIds { |
|
422 | 420 | $allResultBindings = []; |
423 | 421 | $metadatas = []; |
424 | 422 | |
425 | - foreach ( $results as $result ) { |
|
423 | + foreach ($results as $result) { |
|
426 | 424 | $metadatas[] = $result->getMetadata(); |
427 | - $allResultBindings = array_merge( $allResultBindings, $result->getArray()['results']['bindings'] ); |
|
425 | + $allResultBindings = array_merge($allResultBindings, $result->getArray()['results']['bindings']); |
|
428 | 426 | } |
429 | 427 | |
430 | 428 | $entityIds = array_map( |
431 | - function ( $resultBindings ) { |
|
429 | + function($resultBindings) { |
|
432 | 430 | $entityIRI = $resultBindings['otherEntity']['value']; |
433 | - foreach ( $this->entityPrefixes as $entityPrefix ) { |
|
434 | - $entityPrefixLength = strlen( $entityPrefix ); |
|
435 | - if ( substr( $entityIRI, 0, $entityPrefixLength ) === $entityPrefix ) { |
|
431 | + foreach ($this->entityPrefixes as $entityPrefix) { |
|
432 | + $entityPrefixLength = strlen($entityPrefix); |
|
433 | + if (substr($entityIRI, 0, $entityPrefixLength) === $entityPrefix) { |
|
436 | 434 | try { |
437 | 435 | return $this->entityIdParser->parse( |
438 | - substr( $entityIRI, $entityPrefixLength ) |
|
436 | + substr($entityIRI, $entityPrefixLength) |
|
439 | 437 | ); |
440 | - } catch ( EntityIdParsingException $e ) { |
|
438 | + } catch (EntityIdParsingException $e) { |
|
441 | 439 | // fall through |
442 | 440 | } |
443 | 441 | } |
@@ -451,8 +449,8 @@ discard block |
||
451 | 449 | ); |
452 | 450 | |
453 | 451 | return new CachedEntityIds( |
454 | - array_values( array_filter( array_unique( $entityIds ) ) ), |
|
455 | - Metadata::merge( $metadatas ) |
|
452 | + array_values(array_filter(array_unique($entityIds))), |
|
453 | + Metadata::merge($metadatas) |
|
456 | 454 | ); |
457 | 455 | } |
458 | 456 | |
@@ -463,50 +461,50 @@ discard block |
||
463 | 461 | * @return array the literal or IRI as a string in SPARQL syntax, |
464 | 462 | * and a boolean indicating whether it refers to a full value node or not |
465 | 463 | */ |
466 | - private function getRdfLiteral( string $dataType, DataValue $dataValue ): array { |
|
467 | - switch ( $dataType ) { |
|
464 | + private function getRdfLiteral(string $dataType, DataValue $dataValue): array { |
|
465 | + switch ($dataType) { |
|
468 | 466 | case 'string': |
469 | 467 | case 'external-id': |
470 | - return [ $this->stringLiteral( $dataValue->getValue() ), false ]; |
|
468 | + return [$this->stringLiteral($dataValue->getValue()), false]; |
|
471 | 469 | case 'commonsMedia': |
472 | - $url = $this->rdfVocabulary->getMediaFileURI( $dataValue->getValue() ); |
|
473 | - return [ '<' . $url . '>', false ]; |
|
470 | + $url = $this->rdfVocabulary->getMediaFileURI($dataValue->getValue()); |
|
471 | + return ['<'.$url.'>', false]; |
|
474 | 472 | case 'geo-shape': |
475 | - $url = $this->rdfVocabulary->getGeoShapeURI( $dataValue->getValue() ); |
|
476 | - return [ '<' . $url . '>', false ]; |
|
473 | + $url = $this->rdfVocabulary->getGeoShapeURI($dataValue->getValue()); |
|
474 | + return ['<'.$url.'>', false]; |
|
477 | 475 | case 'tabular-data': |
478 | - $url = $this->rdfVocabulary->getTabularDataURI( $dataValue->getValue() ); |
|
479 | - return [ '<' . $url . '>', false ]; |
|
476 | + $url = $this->rdfVocabulary->getTabularDataURI($dataValue->getValue()); |
|
477 | + return ['<'.$url.'>', false]; |
|
480 | 478 | case 'url': |
481 | 479 | $url = $dataValue->getValue(); |
482 | - if ( !preg_match( '/^[^<>"{}\\\\|^`\\x00-\\x20]*$/D', $url ) ) { |
|
480 | + if (!preg_match('/^[^<>"{}\\\\|^`\\x00-\\x20]*$/D', $url)) { |
|
483 | 481 | // not a valid URL for SPARQL (see SPARQL spec, production 139 IRIREF) |
484 | 482 | // such an URL should never reach us, so just throw |
485 | - throw new InvalidArgumentException( 'invalid URL: ' . $url ); |
|
483 | + throw new InvalidArgumentException('invalid URL: '.$url); |
|
486 | 484 | } |
487 | - return [ '<' . $url . '>', false ]; |
|
485 | + return ['<'.$url.'>', false]; |
|
488 | 486 | case 'wikibase-item': |
489 | 487 | case 'wikibase-property': |
490 | 488 | /** @var EntityIdValue $dataValue */ |
491 | 489 | '@phan-var EntityIdValue $dataValue'; |
492 | - return [ 'wd:' . $dataValue->getEntityId()->getSerialization(), false ]; |
|
490 | + return ['wd:'.$dataValue->getEntityId()->getSerialization(), false]; |
|
493 | 491 | case 'monolingualtext': |
494 | 492 | /** @var MonolingualTextValue $dataValue */ |
495 | 493 | '@phan-var MonolingualTextValue $dataValue'; |
496 | 494 | $lang = $dataValue->getLanguageCode(); |
497 | - if ( !preg_match( '/^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/D', $lang ) ) { |
|
495 | + if (!preg_match('/^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/D', $lang)) { |
|
498 | 496 | // not a valid language tag for SPARQL (see SPARQL spec, production 145 LANGTAG) |
499 | 497 | // such a language tag should never reach us, so just throw |
500 | - throw new InvalidArgumentException( 'invalid language tag: ' . $lang ); |
|
498 | + throw new InvalidArgumentException('invalid language tag: '.$lang); |
|
501 | 499 | } |
502 | - return [ $this->stringLiteral( $dataValue->getText() ) . '@' . $lang, false ]; |
|
500 | + return [$this->stringLiteral($dataValue->getText()).'@'.$lang, false]; |
|
503 | 501 | case 'globe-coordinate': |
504 | 502 | case 'quantity': |
505 | 503 | case 'time': |
506 | 504 | // @phan-suppress-next-line PhanUndeclaredMethod |
507 | - return [ 'wdv:' . $dataValue->getHash(), true ]; |
|
505 | + return ['wdv:'.$dataValue->getHash(), true]; |
|
508 | 506 | default: |
509 | - throw new InvalidArgumentException( 'unknown data type: ' . $dataType ); |
|
507 | + throw new InvalidArgumentException('unknown data type: '.$dataType); |
|
510 | 508 | } |
511 | 509 | } |
512 | 510 | // phpcs:enable |
@@ -515,43 +513,43 @@ discard block |
||
515 | 513 | * @throws SparqlHelperException if the query times out or some other error occurs |
516 | 514 | * @throws ConstraintParameterException if the $regex is invalid |
517 | 515 | */ |
518 | - public function matchesRegularExpression( string $text, string $regex ): bool { |
|
516 | + public function matchesRegularExpression(string $text, string $regex): bool { |
|
519 | 517 | // caching wrapper around matchesRegularExpressionWithSparql |
520 | 518 | |
521 | - $textHash = hash( 'sha256', $text ); |
|
519 | + $textHash = hash('sha256', $text); |
|
522 | 520 | $cacheKey = $this->cache->makeKey( |
523 | 521 | 'WikibaseQualityConstraints', // extension |
524 | 522 | 'regex', // action |
525 | 523 | 'WDQS-Java', // regex flavor |
526 | - hash( 'sha256', $regex ) |
|
524 | + hash('sha256', $regex) |
|
527 | 525 | ); |
528 | 526 | |
529 | 527 | $cacheMapArray = $this->cache->getWithSetCallback( |
530 | 528 | $cacheKey, |
531 | 529 | WANObjectCache::TTL_DAY, |
532 | - function ( $cacheMapArray ) use ( $text, $regex, $textHash ) { |
|
530 | + function($cacheMapArray) use ($text, $regex, $textHash) { |
|
533 | 531 | // Initialize the cache map if not set |
534 | - if ( $cacheMapArray === false ) { |
|
532 | + if ($cacheMapArray === false) { |
|
535 | 533 | $key = 'wikibase.quality.constraints.regex.cache.refresh.init'; |
536 | - $this->dataFactory->increment( $key ); |
|
534 | + $this->dataFactory->increment($key); |
|
537 | 535 | return []; |
538 | 536 | } |
539 | 537 | |
540 | 538 | $key = 'wikibase.quality.constraints.regex.cache.refresh'; |
541 | - $this->dataFactory->increment( $key ); |
|
542 | - $cacheMap = MapCacheLRU::newFromArray( $cacheMapArray, $this->cacheMapSize ); |
|
543 | - if ( $cacheMap->has( $textHash ) ) { |
|
539 | + $this->dataFactory->increment($key); |
|
540 | + $cacheMap = MapCacheLRU::newFromArray($cacheMapArray, $this->cacheMapSize); |
|
541 | + if ($cacheMap->has($textHash)) { |
|
544 | 542 | $key = 'wikibase.quality.constraints.regex.cache.refresh.hit'; |
545 | - $this->dataFactory->increment( $key ); |
|
546 | - $cacheMap->get( $textHash ); // ping cache |
|
543 | + $this->dataFactory->increment($key); |
|
544 | + $cacheMap->get($textHash); // ping cache |
|
547 | 545 | } else { |
548 | 546 | $key = 'wikibase.quality.constraints.regex.cache.refresh.miss'; |
549 | - $this->dataFactory->increment( $key ); |
|
547 | + $this->dataFactory->increment($key); |
|
550 | 548 | try { |
551 | - $matches = $this->matchesRegularExpressionWithSparql( $text, $regex ); |
|
552 | - } catch ( ConstraintParameterException $e ) { |
|
553 | - $matches = $this->serializeConstraintParameterException( $e ); |
|
554 | - } catch ( SparqlHelperException $e ) { |
|
549 | + $matches = $this->matchesRegularExpressionWithSparql($text, $regex); |
|
550 | + } catch (ConstraintParameterException $e) { |
|
551 | + $matches = $this->serializeConstraintParameterException($e); |
|
552 | + } catch (SparqlHelperException $e) { |
|
555 | 553 | // don’t cache this |
556 | 554 | return $cacheMap->toArray(); |
557 | 555 | } |
@@ -575,42 +573,42 @@ discard block |
||
575 | 573 | ] |
576 | 574 | ); |
577 | 575 | |
578 | - if ( isset( $cacheMapArray[$textHash] ) ) { |
|
576 | + if (isset($cacheMapArray[$textHash])) { |
|
579 | 577 | $key = 'wikibase.quality.constraints.regex.cache.hit'; |
580 | - $this->dataFactory->increment( $key ); |
|
578 | + $this->dataFactory->increment($key); |
|
581 | 579 | $matches = $cacheMapArray[$textHash]; |
582 | - if ( is_bool( $matches ) ) { |
|
580 | + if (is_bool($matches)) { |
|
583 | 581 | return $matches; |
584 | - } elseif ( is_array( $matches ) && |
|
585 | - $matches['type'] == ConstraintParameterException::class ) { |
|
586 | - throw $this->deserializeConstraintParameterException( $matches ); |
|
582 | + } elseif (is_array($matches) && |
|
583 | + $matches['type'] == ConstraintParameterException::class) { |
|
584 | + throw $this->deserializeConstraintParameterException($matches); |
|
587 | 585 | } else { |
588 | 586 | throw new UnexpectedValueException( |
589 | - 'Value of unknown type in object cache (' . |
|
590 | - 'cache key: ' . $cacheKey . ', ' . |
|
591 | - 'cache map key: ' . $textHash . ', ' . |
|
592 | - 'value type: ' . get_debug_type( $matches ) . ')' |
|
587 | + 'Value of unknown type in object cache ('. |
|
588 | + 'cache key: '.$cacheKey.', '. |
|
589 | + 'cache map key: '.$textHash.', '. |
|
590 | + 'value type: '.get_debug_type($matches).')' |
|
593 | 591 | ); |
594 | 592 | } |
595 | 593 | } else { |
596 | 594 | $key = 'wikibase.quality.constraints.regex.cache.miss'; |
597 | - $this->dataFactory->increment( $key ); |
|
598 | - return $this->matchesRegularExpressionWithSparql( $text, $regex ); |
|
595 | + $this->dataFactory->increment($key); |
|
596 | + return $this->matchesRegularExpressionWithSparql($text, $regex); |
|
599 | 597 | } |
600 | 598 | } |
601 | 599 | |
602 | - private function serializeConstraintParameterException( ConstraintParameterException $cpe ): array { |
|
600 | + private function serializeConstraintParameterException(ConstraintParameterException $cpe): array { |
|
603 | 601 | return [ |
604 | 602 | 'type' => ConstraintParameterException::class, |
605 | - 'violationMessage' => $this->violationMessageSerializer->serialize( $cpe->getViolationMessage() ), |
|
603 | + 'violationMessage' => $this->violationMessageSerializer->serialize($cpe->getViolationMessage()), |
|
606 | 604 | ]; |
607 | 605 | } |
608 | 606 | |
609 | - private function deserializeConstraintParameterException( array $serialization ): ConstraintParameterException { |
|
607 | + private function deserializeConstraintParameterException(array $serialization): ConstraintParameterException { |
|
610 | 608 | $message = $this->violationMessageDeserializer->deserialize( |
611 | 609 | $serialization['violationMessage'] |
612 | 610 | ); |
613 | - return new ConstraintParameterException( $message ); |
|
611 | + return new ConstraintParameterException($message); |
|
614 | 612 | } |
615 | 613 | |
616 | 614 | /** |
@@ -620,25 +618,25 @@ discard block |
||
620 | 618 | * @throws SparqlHelperException if the query times out or some other error occurs |
621 | 619 | * @throws ConstraintParameterException if the $regex is invalid |
622 | 620 | */ |
623 | - public function matchesRegularExpressionWithSparql( string $text, string $regex ): bool { |
|
624 | - $textStringLiteral = $this->stringLiteral( $text ); |
|
625 | - $regexStringLiteral = $this->stringLiteral( '^(?:' . $regex . ')$' ); |
|
621 | + public function matchesRegularExpressionWithSparql(string $text, string $regex): bool { |
|
622 | + $textStringLiteral = $this->stringLiteral($text); |
|
623 | + $regexStringLiteral = $this->stringLiteral('^(?:'.$regex.')$'); |
|
626 | 624 | |
627 | 625 | $query = <<<EOF |
628 | 626 | SELECT (REGEX($textStringLiteral, $regexStringLiteral) AS ?matches) {} |
629 | 627 | EOF; |
630 | 628 | |
631 | - $result = $this->runQuery( $query, $this->primaryEndpoint, false ); |
|
629 | + $result = $this->runQuery($query, $this->primaryEndpoint, false); |
|
632 | 630 | |
633 | 631 | $vars = $result->getArray()['results']['bindings'][0]; |
634 | - if ( array_key_exists( 'matches', $vars ) ) { |
|
632 | + if (array_key_exists('matches', $vars)) { |
|
635 | 633 | // true or false ⇒ regex okay, text matches or not |
636 | 634 | return $vars['matches']['value'] === 'true'; |
637 | 635 | } else { |
638 | 636 | // empty result: regex broken |
639 | 637 | throw new ConstraintParameterException( |
640 | - ( new ViolationMessage( 'wbqc-violation-message-parameter-regex' ) ) |
|
641 | - ->withInlineCode( $regex, Role::CONSTRAINT_PARAMETER_VALUE ) |
|
638 | + (new ViolationMessage('wbqc-violation-message-parameter-regex')) |
|
639 | + ->withInlineCode($regex, Role::CONSTRAINT_PARAMETER_VALUE) |
|
642 | 640 | ); |
643 | 641 | } |
644 | 642 | } |
@@ -646,14 +644,14 @@ discard block |
||
646 | 644 | /** |
647 | 645 | * Check whether the text content of an error response indicates a query timeout. |
648 | 646 | */ |
649 | - public function isTimeout( string $responseContent ): bool { |
|
650 | - $timeoutRegex = implode( '|', array_map( |
|
651 | - static function ( $fqn ) { |
|
652 | - return preg_quote( $fqn, '/' ); |
|
647 | + public function isTimeout(string $responseContent): bool { |
|
648 | + $timeoutRegex = implode('|', array_map( |
|
649 | + static function($fqn) { |
|
650 | + return preg_quote($fqn, '/'); |
|
653 | 651 | }, |
654 | 652 | $this->timeoutExceptionClasses |
655 | - ) ); |
|
656 | - return (bool)preg_match( '/' . $timeoutRegex . '/', $responseContent ); |
|
653 | + )); |
|
654 | + return (bool) preg_match('/'.$timeoutRegex.'/', $responseContent); |
|
657 | 655 | } |
658 | 656 | |
659 | 657 | /** |
@@ -665,17 +663,17 @@ discard block |
||
665 | 663 | * @return int|boolean the max-age (in seconds) |
666 | 664 | * or a plain boolean if no max-age can be determined |
667 | 665 | */ |
668 | - public function getCacheMaxAge( array $responseHeaders ) { |
|
666 | + public function getCacheMaxAge(array $responseHeaders) { |
|
669 | 667 | if ( |
670 | - array_key_exists( 'x-cache-status', $responseHeaders ) && |
|
671 | - preg_match( '/^hit(?:-.*)?$/', $responseHeaders['x-cache-status'][0] ) |
|
668 | + array_key_exists('x-cache-status', $responseHeaders) && |
|
669 | + preg_match('/^hit(?:-.*)?$/', $responseHeaders['x-cache-status'][0]) |
|
672 | 670 | ) { |
673 | 671 | $maxage = []; |
674 | 672 | if ( |
675 | - array_key_exists( 'cache-control', $responseHeaders ) && |
|
676 | - preg_match( '/\bmax-age=(\d+)\b/', $responseHeaders['cache-control'][0], $maxage ) |
|
673 | + array_key_exists('cache-control', $responseHeaders) && |
|
674 | + preg_match('/\bmax-age=(\d+)\b/', $responseHeaders['cache-control'][0], $maxage) |
|
677 | 675 | ) { |
678 | - return intval( $maxage[1] ); |
|
676 | + return intval($maxage[1]); |
|
679 | 677 | } else { |
680 | 678 | return true; |
681 | 679 | } |
@@ -696,34 +694,34 @@ discard block |
||
696 | 694 | * or SparlHelper::EMPTY_RETRY_AFTER if there is an empty Retry-After |
697 | 695 | * or SparlHelper::INVALID_RETRY_AFTER if there is something wrong with the format |
698 | 696 | */ |
699 | - public function getThrottling( MWHttpRequest $request ) { |
|
700 | - $retryAfterValue = $request->getResponseHeader( 'Retry-After' ); |
|
701 | - if ( $retryAfterValue === null ) { |
|
697 | + public function getThrottling(MWHttpRequest $request) { |
|
698 | + $retryAfterValue = $request->getResponseHeader('Retry-After'); |
|
699 | + if ($retryAfterValue === null) { |
|
702 | 700 | return self::NO_RETRY_AFTER; |
703 | 701 | } |
704 | 702 | |
705 | - $trimmedRetryAfterValue = trim( $retryAfterValue ); |
|
706 | - if ( $trimmedRetryAfterValue === '' ) { |
|
703 | + $trimmedRetryAfterValue = trim($retryAfterValue); |
|
704 | + if ($trimmedRetryAfterValue === '') { |
|
707 | 705 | return self::EMPTY_RETRY_AFTER; |
708 | 706 | } |
709 | 707 | |
710 | - if ( is_numeric( $trimmedRetryAfterValue ) ) { |
|
711 | - $delaySeconds = (int)$trimmedRetryAfterValue; |
|
712 | - if ( $delaySeconds >= 0 ) { |
|
713 | - return $this->getTimestampInFuture( new DateInterval( 'PT' . $delaySeconds . 'S' ) ); |
|
708 | + if (is_numeric($trimmedRetryAfterValue)) { |
|
709 | + $delaySeconds = (int) $trimmedRetryAfterValue; |
|
710 | + if ($delaySeconds >= 0) { |
|
711 | + return $this->getTimestampInFuture(new DateInterval('PT'.$delaySeconds.'S')); |
|
714 | 712 | } |
715 | 713 | } else { |
716 | - $return = strtotime( $trimmedRetryAfterValue ); |
|
717 | - if ( $return !== false ) { |
|
718 | - return new ConvertibleTimestamp( $return ); |
|
714 | + $return = strtotime($trimmedRetryAfterValue); |
|
715 | + if ($return !== false) { |
|
716 | + return new ConvertibleTimestamp($return); |
|
719 | 717 | } |
720 | 718 | } |
721 | 719 | return self::INVALID_RETRY_AFTER; |
722 | 720 | } |
723 | 721 | |
724 | - private function getTimestampInFuture( DateInterval $delta ) { |
|
722 | + private function getTimestampInFuture(DateInterval $delta) { |
|
725 | 723 | $now = new ConvertibleTimestamp(); |
726 | - return new ConvertibleTimestamp( $now->timestamp->add( $delta ) ); |
|
724 | + return new ConvertibleTimestamp($now->timestamp->add($delta)); |
|
727 | 725 | } |
728 | 726 | |
729 | 727 | /** |
@@ -738,64 +736,63 @@ discard block |
||
738 | 736 | * |
739 | 737 | * @throws SparqlHelperException if the query times out or some other error occurs |
740 | 738 | */ |
741 | - protected function runQuery( string $query, string $endpoint, bool $needsPrefixes = true ): CachedQueryResults { |
|
742 | - if ( $this->throttlingLock->isLocked( self::EXPIRY_LOCK_ID ) ) { |
|
743 | - $this->dataFactory->increment( 'wikibase.quality.constraints.sparql.throttling' ); |
|
739 | + protected function runQuery(string $query, string $endpoint, bool $needsPrefixes = true): CachedQueryResults { |
|
740 | + if ($this->throttlingLock->isLocked(self::EXPIRY_LOCK_ID)) { |
|
741 | + $this->dataFactory->increment('wikibase.quality.constraints.sparql.throttling'); |
|
744 | 742 | throw new TooManySparqlRequestsException(); |
745 | 743 | } |
746 | 744 | |
747 | - if ( $this->sparqlHasWikibaseSupport ) { |
|
745 | + if ($this->sparqlHasWikibaseSupport) { |
|
748 | 746 | $needsPrefixes = false; |
749 | 747 | } |
750 | 748 | |
751 | - if ( $needsPrefixes ) { |
|
752 | - $query = $this->prefixes . $query; |
|
749 | + if ($needsPrefixes) { |
|
750 | + $query = $this->prefixes.$query; |
|
753 | 751 | } |
754 | - $query = "#wbqc\n" . $query; |
|
752 | + $query = "#wbqc\n".$query; |
|
755 | 753 | |
756 | - $url = $endpoint . '?' . http_build_query( |
|
754 | + $url = $endpoint.'?'.http_build_query( |
|
757 | 755 | [ |
758 | 756 | 'query' => $query, |
759 | 757 | 'format' => 'json', |
760 | 758 | 'maxQueryTimeMillis' => $this->maxQueryTimeMillis, |
761 | 759 | ], |
762 | - '', ini_get( 'arg_separator.output' ), |
|
760 | + '', ini_get('arg_separator.output'), |
|
763 | 761 | // encode spaces with %20, not + |
764 | 762 | PHP_QUERY_RFC3986 |
765 | 763 | ); |
766 | 764 | |
767 | 765 | $options = [ |
768 | 766 | 'method' => 'GET', |
769 | - 'timeout' => (int)round( ( $this->maxQueryTimeMillis + 1000 ) / 1000 ), |
|
767 | + 'timeout' => (int) round(($this->maxQueryTimeMillis + 1000) / 1000), |
|
770 | 768 | 'connectTimeout' => 'default', |
771 | 769 | 'userAgent' => $this->defaultUserAgent, |
772 | 770 | ]; |
773 | - $request = $this->requestFactory->create( $url, $options, __METHOD__ ); |
|
774 | - $startTime = microtime( true ); |
|
771 | + $request = $this->requestFactory->create($url, $options, __METHOD__); |
|
772 | + $startTime = microtime(true); |
|
775 | 773 | $requestStatus = $request->execute(); |
776 | - $endTime = microtime( true ); |
|
774 | + $endTime = microtime(true); |
|
777 | 775 | $this->dataFactory->timing( |
778 | 776 | 'wikibase.quality.constraints.sparql.timing', |
779 | - ( $endTime - $startTime ) * 1000 |
|
777 | + ($endTime - $startTime) * 1000 |
|
780 | 778 | ); |
781 | 779 | |
782 | - $this->guardAgainstTooManyRequestsError( $request ); |
|
780 | + $this->guardAgainstTooManyRequestsError($request); |
|
783 | 781 | |
784 | - $maxAge = $this->getCacheMaxAge( $request->getResponseHeaders() ); |
|
785 | - if ( $maxAge ) { |
|
786 | - $this->dataFactory->increment( 'wikibase.quality.constraints.sparql.cached' ); |
|
782 | + $maxAge = $this->getCacheMaxAge($request->getResponseHeaders()); |
|
783 | + if ($maxAge) { |
|
784 | + $this->dataFactory->increment('wikibase.quality.constraints.sparql.cached'); |
|
787 | 785 | } |
788 | 786 | |
789 | - if ( $requestStatus->isOK() ) { |
|
787 | + if ($requestStatus->isOK()) { |
|
790 | 788 | $json = $request->getContent(); |
791 | - $jsonStatus = FormatJson::parse( $json, FormatJson::FORCE_ASSOC ); |
|
792 | - if ( $jsonStatus->isOK() ) { |
|
789 | + $jsonStatus = FormatJson::parse($json, FormatJson::FORCE_ASSOC); |
|
790 | + if ($jsonStatus->isOK()) { |
|
793 | 791 | return new CachedQueryResults( |
794 | 792 | $jsonStatus->getValue(), |
795 | 793 | Metadata::ofCachingMetadata( |
796 | 794 | $maxAge ? |
797 | - CachingMetadata::ofMaximumAgeInSeconds( $maxAge ) : |
|
798 | - CachingMetadata::fresh() |
|
795 | + CachingMetadata::ofMaximumAgeInSeconds($maxAge) : CachingMetadata::fresh() |
|
799 | 796 | ) |
800 | 797 | ); |
801 | 798 | } else { |
@@ -812,9 +809,9 @@ discard block |
||
812 | 809 | // fall through to general error handling |
813 | 810 | } |
814 | 811 | |
815 | - $this->dataFactory->increment( 'wikibase.quality.constraints.sparql.error' ); |
|
812 | + $this->dataFactory->increment('wikibase.quality.constraints.sparql.error'); |
|
816 | 813 | |
817 | - if ( $this->isTimeout( $request->getContent() ) ) { |
|
814 | + if ($this->isTimeout($request->getContent())) { |
|
818 | 815 | $this->dataFactory->increment( |
819 | 816 | 'wikibase.quality.constraints.sparql.error.timeout' |
820 | 817 | ); |
@@ -829,29 +826,29 @@ discard block |
||
829 | 826 | * @param MWHttpRequest $request |
830 | 827 | * @throws TooManySparqlRequestsException |
831 | 828 | */ |
832 | - private function guardAgainstTooManyRequestsError( MWHttpRequest $request ): void { |
|
833 | - if ( $request->getStatus() !== self::HTTP_TOO_MANY_REQUESTS ) { |
|
829 | + private function guardAgainstTooManyRequestsError(MWHttpRequest $request): void { |
|
830 | + if ($request->getStatus() !== self::HTTP_TOO_MANY_REQUESTS) { |
|
834 | 831 | return; |
835 | 832 | } |
836 | 833 | |
837 | 834 | $fallbackBlockDuration = $this->sparqlThrottlingFallbackDuration; |
838 | 835 | |
839 | - if ( $fallbackBlockDuration < 0 ) { |
|
840 | - throw new InvalidArgumentException( 'Fallback duration must be positive int but is: ' . |
|
841 | - $fallbackBlockDuration ); |
|
836 | + if ($fallbackBlockDuration < 0) { |
|
837 | + throw new InvalidArgumentException('Fallback duration must be positive int but is: '. |
|
838 | + $fallbackBlockDuration); |
|
842 | 839 | } |
843 | 840 | |
844 | - $this->dataFactory->increment( 'wikibase.quality.constraints.sparql.throttling' ); |
|
845 | - $throttlingUntil = $this->getThrottling( $request ); |
|
846 | - if ( !( $throttlingUntil instanceof ConvertibleTimestamp ) ) { |
|
847 | - $this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterInvalid( $request ); |
|
841 | + $this->dataFactory->increment('wikibase.quality.constraints.sparql.throttling'); |
|
842 | + $throttlingUntil = $this->getThrottling($request); |
|
843 | + if (!($throttlingUntil instanceof ConvertibleTimestamp)) { |
|
844 | + $this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterInvalid($request); |
|
848 | 845 | $this->throttlingLock->lock( |
849 | 846 | self::EXPIRY_LOCK_ID, |
850 | - $this->getTimestampInFuture( new DateInterval( 'PT' . $fallbackBlockDuration . 'S' ) ) |
|
847 | + $this->getTimestampInFuture(new DateInterval('PT'.$fallbackBlockDuration.'S')) |
|
851 | 848 | ); |
852 | 849 | } else { |
853 | - $this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterPresent( $throttlingUntil, $request ); |
|
854 | - $this->throttlingLock->lock( self::EXPIRY_LOCK_ID, $throttlingUntil ); |
|
850 | + $this->loggingHelper->logSparqlHelperTooManyRequestsRetryAfterPresent($throttlingUntil, $request); |
|
851 | + $this->throttlingLock->lock(self::EXPIRY_LOCK_ID, $throttlingUntil); |
|
855 | 852 | } |
856 | 853 | throw new TooManySparqlRequestsException(); |
857 | 854 | } |