Completed
Push — master ( f2f357...ccb861 )
by
unknown
02:21
created

CachingResultsSource::getAndStoreResults()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 16
rs 9.7333
c 0
b 0
f 0
cc 3
nc 2
nop 4
1
<?php
2
3
4
namespace WikibaseQuality\ConstraintReport\Api;
5
6
use DataValues\TimeValue;
7
use Wikibase\DataModel\Entity\EntityId;
8
use Wikibase\DataModel\Entity\EntityIdParser;
9
use Wikibase\Lib\Store\EntityRevisionLookup;
10
use Wikibase\Lib\Store\Sql\WikiPageEntityMetaDataAccessor;
11
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedCheckResults;
12
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata;
13
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\DependencyMetadata;
14
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata;
15
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\LoggingHelper;
16
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\TimeValueComparer;
17
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResult;
18
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultDeserializer;
19
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultSerializer;
20
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\NullResult;
21
22
/**
23
 * A ResultsSource that wraps another ResultsSource,
24
 * storing results in a cache
25
 * and retrieving them from there if the results are still fresh.
26
 *
27
 * @author Lucas Werkmeister
28
 * @license GPL-2.0-or-later
29
 */
30
class CachingResultsSource implements ResultsSource {
31
32
	/**
33
	 * @var ResultsSource
34
	 */
35
	private $resultsSource;
36
37
	/**
38
	 * @var ResultsCache
39
	 */
40
	private $cache;
41
42
	/**
43
	 * @var CheckResultSerializer
44
	 */
45
	private $checkResultSerializer;
46
47
	/**
48
	 * @var CheckResultDeserializer
49
	 */
50
	private $checkResultDeserializer;
51
52
	/**
53
	 * @var WikiPageEntityMetaDataAccessor
54
	 */
55
	private $wikiPageEntityMetaDataAccessor;
56
57
	/**
58
	 * @var EntityIdParser
59
	 */
60
	private $entityIdParser;
61
62
	/**
63
	 * @var int
64
	 */
65
	private $ttlInSeconds;
66
67
	/**
68
	 * @var string[]
69
	 */
70
	private $possiblyStaleConstraintTypes;
71
72
	/**
73
	 * @var int
74
	 */
75
	private $maxRevisionIds;
76
77
	/**
78
	 * @var LoggingHelper
79
	 */
80
	private $loggingHelper;
81
82
	/**
83
	 * @var TimeValueComparer
84
	 */
85
	private $timeValueComparer;
86
87
	/**
88
	 * @var callable
89
	 */
90
	private $microtime = 'microtime';
91
92
	/**
93
	 * TODO: In PHP 5.6, make this a public class constant instead,
94
	 * and also use it in CheckConstraints::getAllowedParams()
95
	 * and in some of the tests.
96
	 *
97
	 * @var string[]
98
	 */
99
	private $cachedStatuses;
100
101
	private $cachedStatusesFlipped;
102
103
	/**
104
	 * @param ResultsSource $resultsSource The ResultsSource that cache misses are delegated to.
105
	 * @param ResultsCache $cache The cache where results can be stored.
106
	 * @param CheckResultSerializer $checkResultSerializer Used to serialize check results.
107
	 * @param CheckResultDeserializer $checkResultDeserializer Used to deserialize check results.
108
	 * @param WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor Used to get the latest revision ID.
109
	 * @param EntityIdParser $entityIdParser Used to parse entity IDs in cached objects.
110
	 * @param int $ttlInSeconds Time-to-live of the cached values, in seconds.
111
	 * @param string[] $possiblyStaleConstraintTypes item IDs of constraint types
112
	 * where cached results may always be stale, regardless of invalidation logic
113
	 * @param int $maxRevisionIds The maximum number of revision IDs to check;
114
	 * if a check result depends on more entity IDs than this number, it is not cached.
115
	 * @param LoggingHelper $loggingHelper
116
	 */
117
	public function __construct(
118
		ResultsSource $resultsSource,
119
		ResultsCache $cache,
120
		CheckResultSerializer $checkResultSerializer,
121
		CheckResultDeserializer $checkResultDeserializer,
122
		WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor,
123
		EntityIdParser $entityIdParser,
124
		$ttlInSeconds,
125
		array $possiblyStaleConstraintTypes,
126
		$maxRevisionIds,
127
		LoggingHelper $loggingHelper
128
	) {
129
		$this->resultsSource = $resultsSource;
130
		$this->cache = $cache;
131
		$this->checkResultSerializer = $checkResultSerializer;
132
		$this->checkResultDeserializer = $checkResultDeserializer;
133
		$this->wikiPageEntityMetaDataAccessor = $wikiPageEntityMetaDataAccessor;
134
		$this->entityIdParser = $entityIdParser;
135
		$this->ttlInSeconds = $ttlInSeconds;
136
		$this->possiblyStaleConstraintTypes = $possiblyStaleConstraintTypes;
137
		$this->maxRevisionIds = $maxRevisionIds;
138
		$this->loggingHelper = $loggingHelper;
139
		$this->timeValueComparer = new TimeValueComparer();
140
141
		$this->cachedStatuses = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array(\WikibaseQuality\C...:STATUS_BAD_PARAMETERS) of type array<integer,?> is incompatible with the declared type array<integer,string> of property $cachedStatuses.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
142
			CheckResult::STATUS_VIOLATION,
143
			CheckResult::STATUS_WARNING,
144
			CheckResult::STATUS_BAD_PARAMETERS,
145
		];
146
		$this->cachedStatusesFlipped = array_flip( $this->cachedStatuses );
147
	}
148
149
	public function getResults(
150
		array $entityIds,
151
		array $claimIds,
152
		array $constraintIds = null,
153
		array $statuses
154
	) {
155
		$results = [];
156
		$metadatas = [];
157
		$statusesFlipped = array_flip( $statuses );
158
		if ( $this->canUseStoredResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) {
0 ignored issues
show
Bug introduced by
It seems like $constraintIds defined by parameter $constraintIds on line 152 can also be of type array; however, WikibaseQuality\Constrai...::canUseStoredResults() does only seem to accept null|array<integer,string>, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
159
			$storedEntityIds = [];
160
			foreach ( $entityIds as $entityId ) {
161
				$storedResults = $this->getStoredResults( $entityId );
162
				if ( $storedResults !== null ) {
163
					$this->loggingHelper->logCheckConstraintsCacheHit( $entityId );
164
					foreach ( $storedResults->getArray() as $checkResult ) {
165
						if ( $this->statusSelected( $statusesFlipped, $checkResult ) ) {
166
							$results[] = $checkResult;
167
						}
168
					}
169
					$metadatas[] = $storedResults->getMetadata();
170
					$storedEntityIds[] = $entityId;
171
				}
172
			}
173
			$entityIds = array_values( array_diff( $entityIds, $storedEntityIds ) );
174
		}
175
		if ( $entityIds !== [] || $claimIds !== [] ) {
176
			if ( $entityIds !== [] ) {
177
				$this->loggingHelper->logCheckConstraintsCacheMisses( $entityIds );
178
			}
179
			$response = $this->getAndStoreResults( $entityIds, $claimIds, $constraintIds, $statuses );
0 ignored issues
show
Bug introduced by
It seems like $constraintIds defined by parameter $constraintIds on line 152 can also be of type array; however, WikibaseQuality\Constrai...e::getAndStoreResults() does only seem to accept null|array<integer,string>, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
180
			$results = array_merge( $results, $response->getArray() );
181
			$metadatas[] = $response->getMetadata();
182
		}
183
		return new CachedCheckResults(
184
			$results,
185
			Metadata::merge( $metadatas )
186
		);
187
	}
188
189
	/**
190
	 * We can only use cached constraint results
191
	 * if nothing more than the problematic results of a full constraint check were requested:
192
	 * constraint checks for the full entity (not just individual statements),
193
	 * without restricting the set of constraints to check,
194
	 * and with no statuses other than 'violation', 'warning' and 'bad-parameters'.
195
	 *
196
	 * @param EntityId[] $entityIds
197
	 * @param string[] $claimIds
198
	 * @param string[]|null $constraintIds
199
	 * @param string[] $statuses
200
	 * @return bool
201
	 */
202 View Code Duplication
	private function canUseStoredResults(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
203
		array $entityIds,
0 ignored issues
show
Unused Code introduced by
The parameter $entityIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
204
		array $claimIds,
205
		array $constraintIds = null,
206
		array $statuses
207
	) {
208
		if ( $claimIds !== [] ) {
209
			return false;
210
		}
211
		if ( $constraintIds !== null ) {
212
			return false;
213
		}
214
		if ( array_diff( $statuses, $this->cachedStatuses ) !== [] ) {
215
			return false;
216
		}
217
		return true;
218
	}
219
220
	/**
221
	 * Check whether a check result should be used,
222
	 * either because it has the right status
223
	 * or because it is a NullResult whose metadata should be preserved.
224
	 *
225
	 * @param string[] $statusesFlipped
226
	 * @param CheckResult $result
227
	 * @return bool
228
	 */
229
	private function statusSelected( array $statusesFlipped, CheckResult $result ) {
230
		return array_key_exists( $result->getStatus(), $statusesFlipped ) ||
231
			$result instanceof NullResult;
232
	}
233
234
	/**
235
	 * @param EntityId[] $entityIds
236
	 * @param string[] $claimIds
237
	 * @param string[]|null $constraintIds
238
	 * @param string[] $statuses
239
	 * @return CachedCheckResults
240
	 */
241
	public function getAndStoreResults(
242
		array $entityIds,
243
		array $claimIds,
244
		array $constraintIds = null,
245
		array $statuses
246
	) {
247
		$results = $this->resultsSource->getResults( $entityIds, $claimIds, $constraintIds, $statuses );
248
249
		if ( $this->canStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) {
250
			foreach ( $entityIds as $entityId ) {
251
				$this->storeResults( $entityId, $results );
252
			}
253
		}
254
255
		return $results;
256
	}
257
258
	/**
259
	 * We can only store constraint results
260
	 * if the set of constraints to check was not restricted
261
	 * and all the problematic results were requested.
262
	 * However, it doesn’t matter whether constraint checks on individual statements were requested:
263
	 * we only store results for the mentioned entity IDs,
264
	 * and those will be complete regardless of what’s in the statement IDs.
265
	 * And it also doesn’t matter whether the set of statuses requested
266
	 * was exactly the statuses we cache or a superset of it:
267
	 * as long as all the results we want to cache are there,
268
	 * we can filter out the extraneous ones before we serialize them.
269
	 *
270
	 * @param EntityId[] $entityIds
271
	 * @param string[] $claimIds
272
	 * @param string[]|null $constraintIds
273
	 * @param string[] $statuses
274
	 * @return bool
275
	 */
276 View Code Duplication
	private function canStoreResults(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
277
		array $entityIds,
0 ignored issues
show
Unused Code introduced by
The parameter $entityIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
278
		array $claimIds,
0 ignored issues
show
Unused Code introduced by
The parameter $claimIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
279
		array $constraintIds = null,
280
		array $statuses
281
	) {
282
		if ( $constraintIds !== null ) {
283
			return false;
284
		}
285
		if ( array_diff( $this->cachedStatuses, $statuses ) !== [] ) {
286
			return false;
287
		}
288
		return true;
289
	}
290
291
	/**
292
	 * Store check results for the given entity ID in the cache, if possible.
293
	 *
294
	 * @param EntityId $entityId The entity ID.
295
	 * @param CachedCheckResults $results A collection of check results with metadata.
296
	 * May include check results for other entity IDs as well,
297
	 * or check results with statuses that we’re not interested in caching.
298
	 */
299
	private function storeResults( EntityId $entityId, CachedCheckResults $results ) {
300
		$latestRevisionIds = $this->getLatestRevisionIds(
301
			$results->getMetadata()->getDependencyMetadata()->getEntityIds()
302
		);
303
		if ( $latestRevisionIds === null ) {
304
			return;
305
		}
306
307
		$resultSerializations = [];
308
		foreach ( $results->getArray() as $checkResult ) {
309
			if ( $checkResult->getContextCursor()->getEntityId() !== $entityId->getSerialization() ) {
310
				continue;
311
			}
312
			if ( $this->statusSelected( $this->cachedStatusesFlipped, $checkResult ) ) {
313
				$resultSerializations[] = $this->checkResultSerializer->serialize( $checkResult );
314
			}
315
		}
316
317
		$value = [
318
			'results' => $resultSerializations,
319
			'latestRevisionIds' => $latestRevisionIds,
320
		];
321
		$futureTime = $results->getMetadata()->getDependencyMetadata()->getFutureTime();
322
		if ( $futureTime !== null ) {
323
			$value['futureTime'] = $futureTime->getArrayValue();
324
		}
325
326
		$this->cache->set( $entityId, $value, $this->ttlInSeconds );
327
	}
328
329
	/**
330
	 * @param EntityId $entityId
331
	 * @return CachedCheckResults|null
332
	 */
333
	public function getStoredResults(
334
		EntityId $entityId
335
	) {
336
		$value = $this->cache->get( $entityId, $curTTL, [], $asOf );
337
		$now = call_user_func( $this->microtime, true );
338
339
		$dependencyMetadata = $this->checkDependencyMetadata( $value );
340
		if ( $dependencyMetadata === null ) {
341
			return null;
342
		}
343
344
		$ageInSeconds = (int)ceil( $now - $asOf );
345
		$cachingMetadata = $ageInSeconds > 0 ?
346
			CachingMetadata::ofMaximumAgeInSeconds( $ageInSeconds ) :
347
			CachingMetadata::fresh();
348
349
		$results = [];
350
		foreach ( $value['results'] as $resultSerialization ) {
351
			$results[] = $this->deserializeCheckResult( $resultSerialization, $cachingMetadata );
352
		}
353
354
		return new CachedCheckResults(
355
			$results,
356
			Metadata::merge( [
357
				Metadata::ofCachingMetadata( $cachingMetadata ),
358
				Metadata::ofDependencyMetadata( $dependencyMetadata ),
359
			] )
360
		);
361
	}
362
363
	/**
364
	 * Extract the dependency metadata of $value
365
	 * and check that the dependency metadata does not indicate staleness.
366
	 *
367
	 * @param array|bool $value
368
	 * @return DependencyMetadata|null the dependency metadata,
369
	 * or null if $value should no longer be used
370
	 */
371
	private function checkDependencyMetadata( $value ) {
372
		if ( $value === false ) {
373
			return null;
374
		}
375
376
		if ( array_key_exists( 'futureTime', $value ) ) {
377
			$futureTime = TimeValue::newFromArray( $value['futureTime'] );
378
			if ( !$this->timeValueComparer->isFutureTime( $futureTime ) ) {
379
				return null;
380
			}
381
			$futureTimeDependencyMetadata = DependencyMetadata::ofFutureTime( $futureTime );
382
		} else {
383
			$futureTimeDependencyMetadata = DependencyMetadata::blank();
384
		}
385
386
		$dependedEntityIds = array_map(
387
			[ $this->entityIdParser, "parse" ],
388
			array_keys( $value['latestRevisionIds'] )
389
		);
390
391
		if ( $value['latestRevisionIds'] !== $this->getLatestRevisionIds( $dependedEntityIds ) ) {
392
			return null;
393
		}
394
395
		return array_reduce(
396
			$dependedEntityIds,
397
			function( DependencyMetadata $metadata, EntityId $entityId ) {
398
				return DependencyMetadata::merge( [
399
					$metadata,
400
					DependencyMetadata::ofEntityId( $entityId )
401
				] );
402
			},
403
			$futureTimeDependencyMetadata
404
		);
405
	}
406
407
	/**
408
	 * Deserialize a check result.
409
	 * If the result might be stale after caching
410
	 * (because its dependencies cannot be fully tracked in its dependency metadata),
411
	 * also add $cachingMetadata to it.
412
	 *
413
	 * @param array $resultSerialization
414
	 * @param CachingMetadata $cachingMetadata
415
	 * @return CheckResult
416
	 */
417
	private function deserializeCheckResult(
418
		array $resultSerialization,
419
		CachingMetadata $cachingMetadata
420
	) {
421
		$result = $this->checkResultDeserializer->deserialize( $resultSerialization );
422
		if ( $this->isPossiblyStaleResult( $result ) ) {
423
			$result->withMetadata(
424
				Metadata::merge( [
425
					$result->getMetadata(),
426
					Metadata::ofCachingMetadata( $cachingMetadata ),
427
				] )
428
			);
429
		}
430
		return $result;
431
	}
432
433
	/**
434
	 * @param CheckResult $result
435
	 * @return bool
436
	 */
437
	private function isPossiblyStaleResult( CheckResult $result ) {
438
		if ( $result instanceof NullResult ) {
439
			return false;
440
		}
441
442
		return in_array(
443
			$result->getConstraint()->getConstraintTypeItemId(),
444
			$this->possiblyStaleConstraintTypes
445
		);
446
	}
447
448
	/**
449
	 * @param EntityId[] $entityIds
450
	 * @return int[]|null array from entity ID serializations to revision ID,
451
	 * or null to indicate that not all revision IDs could be loaded
452
	 */
453
	private function getLatestRevisionIds( array $entityIds ) {
454
		if ( $entityIds === [] ) {
455
			$this->loggingHelper->logEmptyDependencyMetadata();
456
			return [];
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array(); (array) is incompatible with the return type documented by WikibaseQuality\Constrai...e::getLatestRevisionIds of type integer[]|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
457
		}
458
		if ( count( $entityIds ) > $this->maxRevisionIds ) {
459
			// one of those entities will probably be edited soon, so might as well skip caching
460
			$this->loggingHelper->logHugeDependencyMetadata( $entityIds, $this->maxRevisionIds );
461
			return null;
462
		}
463
464
		$latestRevisionIds = $this->wikiPageEntityMetaDataAccessor->loadLatestRevisionIds(
465
			$entityIds,
466
			EntityRevisionLookup::LATEST_FROM_REPLICA
467
		);
468
		if ( $this->hasFalseElements( $latestRevisionIds ) ) {
469
			return null;
470
		}
471
		return $latestRevisionIds;
472
	}
473
474
	/**
475
	 * @param array $array
476
	 * @return bool
477
	 */
478
	private function hasFalseElements( array $array ) {
479
		return in_array( false, $array, true );
480
	}
481
482
	/**
483
	 * Set a custom function to get the current time, instead of microtime().
484
	 *
485
	 * @param callable $microtime
486
	 */
487
	public function setMicrotimeFunction( callable $microtime ) {
488
		$this->microtime = $microtime;
489
	}
490
491
}
492