Completed
Push — master ( 4f46ca...625545 )
by
unknown
14:36
created

CachingResultsSource::getResults()   B

Complexity

Conditions 9
Paths 6

Size

Total Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 39
rs 7.7404
c 0
b 0
f 0
cc 9
nc 6
nop 4
1
<?php
2
3
4
namespace WikibaseQuality\ConstraintReport\Api;
5
6
use DataValues\TimeValue;
7
use Wikibase\DataModel\Entity\EntityId;
8
use Wikibase\DataModel\Entity\EntityIdParser;
9
use Wikibase\Lib\Store\EntityRevisionLookup;
10
use Wikibase\Lib\Store\Sql\WikiPageEntityMetaDataAccessor;
11
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedCheckResults;
12
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata;
13
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\DependencyMetadata;
14
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata;
15
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\LoggingHelper;
16
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\TimeValueComparer;
17
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResult;
18
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultDeserializer;
19
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultSerializer;
20
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\NullResult;
21
22
/**
23
 * A ResultsSource that wraps another ResultsSource,
24
 * storing results in a cache
25
 * and retrieving them from there if the results are still fresh.
26
 *
27
 * @author Lucas Werkmeister
28
 * @license GPL-2.0-or-later
29
 */
30
class CachingResultsSource implements ResultsSource {
31
32
	/**
33
	 * @var ResultsSource
34
	 */
35
	private $resultsSource;
36
37
	/**
38
	 * @var ResultsCache
39
	 */
40
	private $cache;
41
42
	/**
43
	 * @var CheckResultSerializer
44
	 */
45
	private $checkResultSerializer;
46
47
	/**
48
	 * @var CheckResultDeserializer
49
	 */
50
	private $checkResultDeserializer;
51
52
	/**
53
	 * @var WikiPageEntityMetaDataAccessor
54
	 */
55
	private $wikiPageEntityMetaDataAccessor;
56
57
	/**
58
	 * @var EntityIdParser
59
	 */
60
	private $entityIdParser;
61
62
	/**
63
	 * @var int
64
	 */
65
	private $ttlInSeconds;
66
67
	/**
68
	 * @var string[]
69
	 */
70
	private $possiblyStaleConstraintTypes;
71
72
	/**
73
	 * @var int
74
	 */
75
	private $maxRevisionIds;
76
77
	/**
78
	 * @var LoggingHelper
79
	 */
80
	private $loggingHelper;
81
82
	/**
83
	 * @var TimeValueComparer
84
	 */
85
	private $timeValueComparer;
86
87
	/**
88
	 * @var callable
89
	 */
90
	private $microtime = 'microtime';
91
92
	/**
93
	 * TODO: In PHP 5.6, make this a public class constant instead,
94
	 * and also use it in CheckConstraints::getAllowedParams()
95
	 * and in some of the tests.
96
	 *
97
	 * @var string[]
98
	 */
99
	private $cachedStatuses;
100
101
	private $cachedStatusesFlipped;
102
103
	/**
104
	 * @param ResultsSource $resultsSource The ResultsSource that cache misses are delegated to.
105
	 * @param ResultsCache $cache The cache where results can be stored.
106
	 * @param CheckResultSerializer $checkResultSerializer Used to serialize check results.
107
	 * @param CheckResultDeserializer $checkResultDeserializer Used to deserialize check results.
108
	 * @param WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor Used to get the latest revision ID.
109
	 * @param EntityIdParser $entityIdParser Used to parse entity IDs in cached objects.
110
	 * @param int $ttlInSeconds Time-to-live of the cached values, in seconds.
111
	 * @param string[] $possiblyStaleConstraintTypes item IDs of constraint types
112
	 * where cached results may always be stale, regardless of invalidation logic
113
	 * @param int $maxRevisionIds The maximum number of revision IDs to check;
114
	 * if a check result depends on more entity IDs than this number, it is not cached.
115
	 * @param LoggingHelper $loggingHelper
116
	 */
117
	public function __construct(
118
		ResultsSource $resultsSource,
119
		ResultsCache $cache,
120
		CheckResultSerializer $checkResultSerializer,
121
		CheckResultDeserializer $checkResultDeserializer,
122
		WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor,
123
		EntityIdParser $entityIdParser,
124
		$ttlInSeconds,
125
		array $possiblyStaleConstraintTypes,
126
		$maxRevisionIds,
127
		LoggingHelper $loggingHelper
128
	) {
129
		$this->resultsSource = $resultsSource;
130
		$this->cache = $cache;
131
		$this->checkResultSerializer = $checkResultSerializer;
132
		$this->checkResultDeserializer = $checkResultDeserializer;
133
		$this->wikiPageEntityMetaDataAccessor = $wikiPageEntityMetaDataAccessor;
134
		$this->entityIdParser = $entityIdParser;
135
		$this->ttlInSeconds = $ttlInSeconds;
136
		$this->possiblyStaleConstraintTypes = $possiblyStaleConstraintTypes;
137
		$this->maxRevisionIds = $maxRevisionIds;
138
		$this->loggingHelper = $loggingHelper;
139
		$this->timeValueComparer = new TimeValueComparer();
140
141
		$this->cachedStatuses = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array(\WikibaseQuality\C...:STATUS_BAD_PARAMETERS) of type array<integer,?> is incompatible with the declared type array<integer,string> of property $cachedStatuses.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
142
			CheckResult::STATUS_VIOLATION,
143
			CheckResult::STATUS_WARNING,
144
			CheckResult::STATUS_BAD_PARAMETERS,
145
		];
146
		$this->cachedStatusesFlipped = array_flip( $this->cachedStatuses );
147
	}
148
149
	public function getResults(
150
		array $entityIds,
151
		array $claimIds,
152
		array $constraintIds = null,
153
		array $statuses
154
	) {
155
		$results = [];
156
		$metadatas = [];
157
		$statusesFlipped = array_flip( $statuses );
158
		if ( $this->canUseStoredResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) {
0 ignored issues
show
Bug introduced by
It seems like $constraintIds defined by parameter $constraintIds on line 152 can also be of type array; however, WikibaseQuality\Constrai...::canUseStoredResults() does only seem to accept null|array<integer,string>, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
159
			$storedEntityIds = [];
160
			foreach ( $entityIds as $entityId ) {
161
				$storedResults = $this->getStoredResults( $entityId );
162
				if ( $storedResults !== null ) {
163
					$this->loggingHelper->logCheckConstraintsCacheHit( $entityId );
164
					foreach ( $storedResults->getArray() as $checkResult ) {
165
						if ( $this->statusSelected( $statusesFlipped, $checkResult ) ) {
166
							$results[] = $checkResult;
167
						}
168
					}
169
					$metadatas[] = $storedResults->getMetadata();
170
					$storedEntityIds[] = $entityId;
171
				}
172
			}
173
			$entityIds = array_values( array_diff( $entityIds, $storedEntityIds ) );
174
		}
175
		if ( $entityIds !== [] || $claimIds !== [] ) {
176
			if ( $entityIds !== [] ) {
177
				$this->loggingHelper->logCheckConstraintsCacheMisses( $entityIds );
178
			}
179
			$response = $this->getAndStoreResults( $entityIds, $claimIds, $constraintIds, $statuses );
0 ignored issues
show
Bug introduced by
It seems like $constraintIds defined by parameter $constraintIds on line 152 can also be of type array; however, WikibaseQuality\Constrai...e::getAndStoreResults() does only seem to accept null|array<integer,string>, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
180
			$results = array_merge( $results, $response->getArray() );
181
			$metadatas[] = $response->getMetadata();
182
		}
183
		return new CachedCheckResults(
184
			$results,
185
			Metadata::merge( $metadatas )
186
		);
187
	}
188
189
	/**
190
	 * We can only use cached constraint results
191
	 * if nothing more than the problematic results of a full constraint check were requested:
192
	 * constraint checks for the full entity (not just individual statements),
193
	 * without restricting the set of constraints to check,
194
	 * and with no statuses other than 'violation', 'warning' and 'bad-parameters'.
195
	 *
196
	 * @param EntityId[] $entityIds
197
	 * @param string[] $claimIds
198
	 * @param string[]|null $constraintIds
199
	 * @param string[] $statuses
200
	 * @return bool
201
	 */
202 View Code Duplication
	private function canUseStoredResults(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
203
		array $entityIds,
0 ignored issues
show
Unused Code introduced by
The parameter $entityIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
204
		array $claimIds,
205
		array $constraintIds = null,
206
		array $statuses
207
	) {
208
		if ( $claimIds !== [] ) {
209
			return false;
210
		}
211
		if ( $constraintIds !== null ) {
212
			return false;
213
		}
214
		if ( array_diff( $statuses, $this->cachedStatuses ) !== [] ) {
215
			return false;
216
		}
217
		return true;
218
	}
219
220
	/**
221
	 * Check whether a check result should be used,
222
	 * either because it has the right status
223
	 * or because it is a NullResult whose metadata should be preserved.
224
	 *
225
	 * @param string[] $statusesFlipped
226
	 * @param CheckResult $result
227
	 * @return bool
228
	 */
229
	private function statusSelected( array $statusesFlipped, CheckResult $result ) {
230
		return array_key_exists( $result->getStatus(), $statusesFlipped ) ||
231
			$result instanceof NullResult;
232
	}
233
234
	/**
235
	 * @param EntityId[] $entityIds
236
	 * @param string[] $claimIds
237
	 * @param string[]|null $constraintIds
238
	 * @param string[] $statuses
239
	 * @return CachedCheckResults
240
	 */
241
	public function getAndStoreResults(
242
		array $entityIds,
243
		array $claimIds,
244
		array $constraintIds = null,
245
		array $statuses
246
	) {
247
		$results = $this->resultsSource->getResults( $entityIds, $claimIds, $constraintIds, $statuses );
248
249
		if ( $this->canStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) {
250
			foreach ( $entityIds as $entityId ) {
251
				$this->storeResults( $entityId, $results );
252
			}
253
		}
254
255
		return $results;
256
	}
257
258
	/**
259
	 * We can only store constraint results
260
	 * if the set of constraints to check was not restricted
261
	 * and all the problematic results were requested.
262
	 * However, it doesn’t matter whether constraint checks on individual statements were requested:
263
	 * we only store results for the mentioned entity IDs,
264
	 * and those will be complete regardless of what’s in the statement IDs.
265
	 * And it also doesn’t matter whether the set of statuses requested
266
	 * was exactly the statuses we cache or a superset of it:
267
	 * as long as all the results we want to cache are there,
268
	 * we can filter out the extraneous ones before we serialize them.
269
	 *
270
	 * @param EntityId[] $entityIds
271
	 * @param string[] $claimIds
272
	 * @param string[]|null $constraintIds
273
	 * @param string[] $statuses
274
	 * @return bool
275
	 */
276 View Code Duplication
	private function canStoreResults(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
277
		array $entityIds,
0 ignored issues
show
Unused Code introduced by
The parameter $entityIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
278
		array $claimIds,
0 ignored issues
show
Unused Code introduced by
The parameter $claimIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
279
		array $constraintIds = null,
280
		array $statuses
281
	) {
282
		if ( $constraintIds !== null ) {
283
			return false;
284
		}
285
		if ( array_diff( $this->cachedStatuses, $statuses ) !== [] ) {
286
			return false;
287
		}
288
		return true;
289
	}
290
291
	/**
292
	 * Store check results for the given entity ID in the cache, if possible.
293
	 *
294
	 * @param EntityId $entityId The entity ID.
295
	 * @param CachedCheckResults $results A collection of check results with metadata.
296
	 * May include check results for other entity IDs as well,
297
	 * or check results with statuses that we’re not interested in caching.
298
	 */
299
	private function storeResults( EntityId $entityId, CachedCheckResults $results ) {
300
		$latestRevisionIds = $this->getLatestRevisionIds(
301
			$results->getMetadata()->getDependencyMetadata()->getEntityIds()
302
		);
303
		if ( $latestRevisionIds === null ) {
304
			return;
305
		}
306
307
		$resultSerializations = [];
308
		foreach ( $results->getArray() as $checkResult ) {
309
			if ( $checkResult->getContextCursor()->getEntityId() !== $entityId->getSerialization() ) {
310
				continue;
311
			}
312
			if ( $this->statusSelected( $this->cachedStatusesFlipped, $checkResult ) ) {
313
				$resultSerializations[] = $this->checkResultSerializer->serialize( $checkResult );
314
			}
315
		}
316
317
		$value = [
318
			'results' => $resultSerializations,
319
			'latestRevisionIds' => $latestRevisionIds,
320
		];
321
		$futureTime = $results->getMetadata()->getDependencyMetadata()->getFutureTime();
322
		if ( $futureTime !== null ) {
323
			$value['futureTime'] = $futureTime->getArrayValue();
324
		}
325
326
		$this->cache->set( $entityId, $value, $this->ttlInSeconds );
327
	}
328
329
	/**
330
	 * @param EntityId $entityId
331
	 * @param int $forRevision Requested revision of $entityId
332
	 *            If this parameter is not zero, the results are returned if this is the latest revision,
333
	 *            otherwise null is returned, since we can't get constraints for past revisions.
334
	 * @return CachedCheckResults|null
335
	 */
336
	public function getStoredResults(
337
		EntityId $entityId,
338
		$forRevision = 0
339
	) {
340
		$value = $this->cache->get( $entityId, $curTTL, [], $asOf );
341
		$now = call_user_func( $this->microtime, true );
342
343
		$dependencyMetadata = $this->checkDependencyMetadata( $value,
344
			[ $entityId->getSerialization() => $forRevision ] );
345
		if ( $dependencyMetadata === null ) {
346
			return null;
347
		}
348
349
		$ageInSeconds = (int)ceil( $now - $asOf );
350
		$cachingMetadata = $ageInSeconds > 0 ?
351
			CachingMetadata::ofMaximumAgeInSeconds( $ageInSeconds ) :
352
			CachingMetadata::fresh();
353
354
		$results = [];
355
		foreach ( $value['results'] as $resultSerialization ) {
356
			$results[] = $this->deserializeCheckResult( $resultSerialization, $cachingMetadata );
357
		}
358
359
		return new CachedCheckResults(
360
			$results,
361
			Metadata::merge( [
362
				Metadata::ofCachingMetadata( $cachingMetadata ),
363
				Metadata::ofDependencyMetadata( $dependencyMetadata ),
364
			] )
365
		);
366
	}
367
368
	/**
369
	 * Extract the dependency metadata of $value
370
	 * and check that the dependency metadata does not indicate staleness.
371
	 *
372
	 * @param array|false $value
373
	 * @param int[] $paramRevs Revisions from parameters, id => revision
374
	 *   These revisions are used instead of ones recorded in the metadata,
375
	 *   so we can serve requests specifying concrete revisions, and if they are not latest,
376
	 *   we will reject then.
377
	 * @return DependencyMetadata|null the dependency metadata,
378
	 * or null if $value should no longer be used
379
	 */
380
	private function checkDependencyMetadata( $value, $paramRevs ) {
381
		if ( $value === false ) {
382
			return null;
383
		}
384
385
		if ( array_key_exists( 'futureTime', $value ) ) {
386
			$futureTime = TimeValue::newFromArray( $value['futureTime'] );
0 ignored issues
show
Deprecated Code introduced by
The method DataValues\TimeValue::newFromArray() has been deprecated with message: since 0.8.6. Static DataValue::newFromArray constructors like this are underspecified (not in the DataValue interface), and misleadingly named (should be named newFromArrayValue). Instead, use DataValue builder callbacks in @see DataValueDeserializer.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
387
			if ( !$this->timeValueComparer->isFutureTime( $futureTime ) ) {
388
				return null;
389
			}
390
			$futureTimeDependencyMetadata = DependencyMetadata::ofFutureTime( $futureTime );
391
		} else {
392
			$futureTimeDependencyMetadata = DependencyMetadata::blank();
393
		}
394
395
		foreach ( $paramRevs as $id => $revision ) {
396
			if ( $revision > 0 ) {
397
				$value['latestRevisionIds'][$id] = min( $revision, $value['latestRevisionIds'][$id] ?? PHP_INT_MAX );
398
			}
399
		}
400
401
		$dependedEntityIds = array_map(
402
			[ $this->entityIdParser, "parse" ],
403
			array_keys( $value['latestRevisionIds'] )
404
		);
405
406
		if ( $value['latestRevisionIds'] !== $this->getLatestRevisionIds( $dependedEntityIds ) ) {
407
			return null;
408
		}
409
410
		return array_reduce(
411
			$dependedEntityIds,
412
			function( DependencyMetadata $metadata, EntityId $entityId ) {
413
				return DependencyMetadata::merge( [
414
					$metadata,
415
					DependencyMetadata::ofEntityId( $entityId )
416
				] );
417
			},
418
			$futureTimeDependencyMetadata
419
		);
420
	}
421
422
	/**
423
	 * Deserialize a check result.
424
	 * If the result might be stale after caching
425
	 * (because its dependencies cannot be fully tracked in its dependency metadata),
426
	 * also add $cachingMetadata to it.
427
	 *
428
	 * @param array $resultSerialization
429
	 * @param CachingMetadata $cachingMetadata
430
	 * @return CheckResult
431
	 */
432
	private function deserializeCheckResult(
433
		array $resultSerialization,
434
		CachingMetadata $cachingMetadata
435
	) {
436
		$result = $this->checkResultDeserializer->deserialize( $resultSerialization );
437
		if ( $this->isPossiblyStaleResult( $result ) ) {
438
			$result->withMetadata(
439
				Metadata::merge( [
440
					$result->getMetadata(),
441
					Metadata::ofCachingMetadata( $cachingMetadata ),
442
				] )
443
			);
444
		}
445
		return $result;
446
	}
447
448
	/**
449
	 * @param CheckResult $result
450
	 * @return bool
451
	 */
452
	private function isPossiblyStaleResult( CheckResult $result ) {
453
		if ( $result instanceof NullResult ) {
454
			return false;
455
		}
456
457
		return in_array(
458
			$result->getConstraint()->getConstraintTypeItemId(),
459
			$this->possiblyStaleConstraintTypes
460
		);
461
	}
462
463
	/**
464
	 * @param EntityId[] $entityIds
465
	 * @return int[]|null array from entity ID serializations to revision ID,
466
	 * or null to indicate that not all revision IDs could be loaded
467
	 */
468
	private function getLatestRevisionIds( array $entityIds ) {
469
		if ( $entityIds === [] ) {
470
			$this->loggingHelper->logEmptyDependencyMetadata();
471
			return [];
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array(); (array) is incompatible with the return type documented by WikibaseQuality\Constrai...e::getLatestRevisionIds of type integer[]|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
472
		}
473
		if ( count( $entityIds ) > $this->maxRevisionIds ) {
474
			// one of those entities will probably be edited soon, so might as well skip caching
475
			$this->loggingHelper->logHugeDependencyMetadata( $entityIds, $this->maxRevisionIds );
476
			return null;
477
		}
478
479
		$latestRevisionIds = $this->wikiPageEntityMetaDataAccessor->loadLatestRevisionIds(
480
			$entityIds,
481
			EntityRevisionLookup::LATEST_FROM_REPLICA
482
		);
483
		if ( $this->hasFalseElements( $latestRevisionIds ) ) {
484
			return null;
485
		}
486
		return $latestRevisionIds;
487
	}
488
489
	/**
490
	 * @param array $array
491
	 * @return bool
492
	 */
493
	private function hasFalseElements( array $array ) {
494
		return in_array( false, $array, true );
495
	}
496
497
	/**
498
	 * Set a custom function to get the current time, instead of microtime().
499
	 *
500
	 * @param callable $microtime
501
	 */
502
	public function setMicrotimeFunction( callable $microtime ) {
503
		$this->microtime = $microtime;
504
	}
505
506
}
507