Completed
Push — master ( 7e1aa1...af1662 )
by
unknown
06:36
created

CachingResultsSource::getResults()   D

Complexity

Conditions 9
Paths 6

Size

Total Lines 38
Code Lines 28

Duplication

Lines 8
Ratio 21.05 %

Importance

Changes 0
Metric Value
dl 8
loc 38
rs 4.909
c 0
b 0
f 0
cc 9
eloc 28
nc 6
nop 4
1
<?php
2
3
4
namespace WikibaseQuality\ConstraintReport\Api;
5
6
use DataValues\TimeValue;
7
use Wikibase\DataModel\Entity\EntityId;
8
use Wikibase\DataModel\Entity\EntityIdParser;
9
use Wikibase\Lib\Store\EntityRevisionLookup;
10
use Wikibase\Lib\Store\Sql\WikiPageEntityMetaDataAccessor;
11
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedCheckResults;
12
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata;
13
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\DependencyMetadata;
14
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata;
15
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\LoggingHelper;
16
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\TimeValueComparer;
17
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResult;
18
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultDeserializer;
19
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultSerializer;
20
21
/**
22
 * A ResultsSource that wraps another ResultsSource,
23
 * storing results in a cache
24
 * and retrieving them from there if the results are still fresh.
25
 *
26
 * @author Lucas Werkmeister
27
 * @license GPL-2.0-or-later
28
 */
29
class CachingResultsSource implements ResultsSource {
30
31
	/**
32
	 * @var ResultsSource
33
	 */
34
	private $resultsSource;
35
36
	/**
37
	 * @var ResultsCache
38
	 */
39
	private $cache;
40
41
	/**
42
	 * @var CheckResultSerializer
43
	 */
44
	private $checkResultSerializer;
45
46
	/**
47
	 * @var CheckResultDeserializer
48
	 */
49
	private $checkResultDeserializer;
50
51
	/**
52
	 * @var WikiPageEntityMetaDataAccessor
53
	 */
54
	private $wikiPageEntityMetaDataAccessor;
55
56
	/**
57
	 * @var EntityIdParser
58
	 */
59
	private $entityIdParser;
60
61
	/**
62
	 * @var int
63
	 */
64
	private $ttlInSeconds;
65
66
	/**
67
	 * @var string[]
68
	 */
69
	private $possiblyStaleConstraintTypes;
70
71
	/**
72
	 * @var int
73
	 */
74
	private $maxRevisionIds;
75
76
	/**
77
	 * @var LoggingHelper
78
	 */
79
	private $loggingHelper;
80
81
	/**
82
	 * @var TimeValueComparer
83
	 */
84
	private $timeValueComparer;
85
86
	/**
87
	 * @var callable
88
	 */
89
	private $microtime = 'microtime';
90
91
	/**
92
	 * TODO: In PHP 5.6, make this a public class constant instead,
93
	 * and also use it in CheckConstraints::getAllowedParams()
94
	 * and in some of the tests.
95
	 *
96
	 * @var string[]
97
	 */
98
	private $cachedStatuses;
99
100
	/**
101
	 * @param ResultsSource $resultsSource The ResultsSource that cache misses are delegated to.
102
	 * @param ResultsCache $cache The cache where results can be stored.
103
	 * @param CheckResultSerializer $checkResultSerializer Used to serialize check results.
104
	 * @param CheckResultDeserializer $checkResultDeserializer Used to deserialize check results.
105
	 * @param WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor Used to get the latest revision ID.
106
	 * @param EntityIdParser $entityIdParser Used to parse entity IDs in cached objects.
107
	 * @param int $ttlInSeconds Time-to-live of the cached values, in seconds.
108
	 * @param string[] $possiblyStaleConstraintTypes item IDs of constraint types
109
	 * where cached results may always be stale, regardless of invalidation logic
110
	 * @param int $maxRevisionIds The maximum number of revision IDs to check;
111
	 * if a check result depends on more entity IDs than this number, it is not cached.
112
	 * @param LoggingHelper $loggingHelper
113
	 */
114
	public function __construct(
115
		ResultsSource $resultsSource,
116
		ResultsCache $cache,
117
		CheckResultSerializer $checkResultSerializer,
118
		CheckResultDeserializer $checkResultDeserializer,
119
		WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor,
120
		EntityIdParser $entityIdParser,
121
		$ttlInSeconds,
122
		array $possiblyStaleConstraintTypes,
123
		$maxRevisionIds,
124
		LoggingHelper $loggingHelper
125
	) {
126
		$this->resultsSource = $resultsSource;
127
		$this->cache = $cache;
128
		$this->checkResultSerializer = $checkResultSerializer;
129
		$this->checkResultDeserializer = $checkResultDeserializer;
130
		$this->wikiPageEntityMetaDataAccessor = $wikiPageEntityMetaDataAccessor;
131
		$this->entityIdParser = $entityIdParser;
132
		$this->ttlInSeconds = $ttlInSeconds;
133
		$this->possiblyStaleConstraintTypes = $possiblyStaleConstraintTypes;
134
		$this->maxRevisionIds = $maxRevisionIds;
135
		$this->loggingHelper = $loggingHelper;
136
		$this->timeValueComparer = new TimeValueComparer();
137
138
		$this->cachedStatuses = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array(\WikibaseQuality\C...:STATUS_BAD_PARAMETERS) of type array<integer,?> is incompatible with the declared type array<integer,string> of property $cachedStatuses.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
139
			CheckResult::STATUS_VIOLATION,
140
			CheckResult::STATUS_WARNING,
141
			CheckResult::STATUS_BAD_PARAMETERS,
142
		];
143
	}
144
145
	public function getResults(
146
		array $entityIds,
147
		array $claimIds,
148
		array $constraintIds = null,
149
		array $statuses
150
	) {
151
		$results = [];
152
		$metadatas = [];
153
		if ( $this->canUseStoredResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) {
0 ignored issues
show
Bug introduced by
It seems like $constraintIds defined by parameter $constraintIds on line 148 can also be of type array; however, WikibaseQuality\Constrai...::canUseStoredResults() does only seem to accept null|array<integer,string>, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
154
			$storedEntityIds = [];
155
			foreach ( $entityIds as $entityId ) {
156
				$storedResults = $this->getStoredResults( $entityId );
157
				if ( $storedResults !== null ) {
158
					$this->loggingHelper->logCheckConstraintsCacheHit( $entityId );
159
					foreach ( $storedResults->getArray() as $checkResult ) {
160
						if ( in_array( $checkResult->getStatus(), $statuses ) ) {
161
							$results[] = $checkResult;
162
						}
163
					}
164
					$metadatas[] = $storedResults->getMetadata();
165
					$storedEntityIds[] = $entityId;
166
				}
167
			}
168
			$entityIds = array_values( array_diff( $entityIds, $storedEntityIds ) );
169
		}
170 View Code Duplication
		if ( $entityIds !== [] || $claimIds !== [] ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
171
			if ( $entityIds !== [] ) {
172
				$this->loggingHelper->logCheckConstraintsCacheMisses( $entityIds );
173
			}
174
			$response = $this->getAndStoreResults( $entityIds, $claimIds, $constraintIds, $statuses );
0 ignored issues
show
Bug introduced by
It seems like $constraintIds defined by parameter $constraintIds on line 148 can also be of type array; however, WikibaseQuality\Constrai...e::getAndStoreResults() does only seem to accept null|array<integer,string>, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
175
			$results = array_merge( $results, $response->getArray() );
176
			$metadatas[] = $response->getMetadata();
177
		}
178
		return new CachedCheckResults(
179
			$results,
180
			Metadata::merge( $metadatas )
181
		);
182
	}
183
184
	/**
185
	 * We can only use cached constraint results
186
	 * if nothing more than the problematic results of a full constraint check were requested:
187
	 * constraint checks for the full entity (not just individual statements),
188
	 * without restricting the set of constraints to check,
189
	 * and with no statuses other than 'violation', 'warning' and 'bad-parameters'.
190
	 *
191
	 * @param EntityId[] $entityIds
192
	 * @param string[] $claimIds
193
	 * @param string[]|null $constraintIds
194
	 * @param string[] $statuses
195
	 * @return bool
196
	 */
197 View Code Duplication
	private function canUseStoredResults(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
198
		array $entityIds,
0 ignored issues
show
Unused Code introduced by
The parameter $entityIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
199
		array $claimIds,
200
		array $constraintIds = null,
201
		array $statuses
202
	) {
203
		if ( $claimIds !== [] ) {
204
			return false;
205
		}
206
		if ( $constraintIds !== null ) {
207
			return false;
208
		}
209
		if ( array_diff( $statuses, $this->cachedStatuses ) !== [] ) {
210
			return false;
211
		}
212
		return true;
213
	}
214
215
	/**
216
	 * @param EntityId[] $entityIds
217
	 * @param string[] $claimIds
218
	 * @param string[]|null $constraintIds
219
	 * @param string[] $statuses
220
	 * @return CachedCheckResults
221
	 */
222
	public function getAndStoreResults(
223
		array $entityIds,
224
		array $claimIds,
225
		array $constraintIds = null,
226
		array $statuses
227
	) {
228
		$results = $this->resultsSource->getResults( $entityIds, $claimIds, $constraintIds, $statuses );
229
230
		if ( $this->canStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) {
231
			foreach ( $entityIds as $entityId ) {
232
				$this->storeResults( $entityId, $results );
233
			}
234
		}
235
236
		return $results;
237
	}
238
239
	/**
240
	 * We can only store constraint results
241
	 * if the set of constraints to check was not restricted
242
	 * and all the problematic results were requested.
243
	 * However, it doesn’t matter whether constraint checks on individual statements were requested:
244
	 * we only store results for the mentioned entity IDs,
245
	 * and those will be complete regardless of what’s in the statement IDs.
246
	 * And it also doesn’t matter whether the set of statuses requested
247
	 * was exactly the statuses we cache or a superset of it:
248
	 * as long as all the results we want to cache are there,
249
	 * we can filter out the extraneous ones before we serialize them.
250
	 *
251
	 * @param EntityId[] $entityIds
252
	 * @param string[] $claimIds
253
	 * @param string[]|null $constraintIds
254
	 * @param string[] $statuses
255
	 * @return bool
256
	 */
257 View Code Duplication
	private function canStoreResults(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
258
		array $entityIds,
0 ignored issues
show
Unused Code introduced by
The parameter $entityIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
259
		array $claimIds,
0 ignored issues
show
Unused Code introduced by
The parameter $claimIds is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
260
		array $constraintIds = null,
261
		array $statuses
262
	) {
263
		if ( $constraintIds !== null ) {
264
			return false;
265
		}
266
		if ( array_diff( $this->cachedStatuses, $statuses ) !== [] ) {
267
			return false;
268
		}
269
		return true;
270
	}
271
272
	/**
273
	 * Store check results for the given entity ID in the cache, if possible.
274
	 *
275
	 * @param EntityId $entityId The entity ID.
276
	 * @param CachedCheckResults $results A collection of check results with metadata.
277
	 * May include check results for other entity IDs as well,
278
	 * or check results with statuses that we’re not interested in caching.
279
	 */
280
	private function storeResults( EntityId $entityId, CachedCheckResults $results ) {
281
		$latestRevisionIds = $this->getLatestRevisionIds(
282
			$results->getMetadata()->getDependencyMetadata()->getEntityIds()
283
		);
284
		if ( $latestRevisionIds === null ) {
285
			return;
286
		}
287
288
		$resultSerializations = [];
289
		foreach ( $results->getArray() as $checkResult ) {
290
			if ( $checkResult->getContextCursor()->getEntityId() !== $entityId->getSerialization() ) {
291
				continue;
292
			}
293
			if ( !in_array( $checkResult->getStatus(), $this->cachedStatuses ) ) {
294
				continue;
295
			}
296
			$resultSerializations[] = $this->checkResultSerializer->serialize( $checkResult );
297
		}
298
299
		$value = [
300
			'results' => $resultSerializations,
301
			'latestRevisionIds' => $latestRevisionIds,
302
		];
303
		$futureTime = $results->getMetadata()->getDependencyMetadata()->getFutureTime();
304
		if ( $futureTime !== null ) {
305
			$value['futureTime'] = $futureTime->getArrayValue();
306
		}
307
308
		$this->cache->set( $entityId, $value, $this->ttlInSeconds );
309
	}
310
311
	/**
312
	 * @param EntityId $entityId
313
	 * @return CachedCheckResults|null
314
	 */
315
	public function getStoredResults(
316
		EntityId $entityId
317
	) {
318
		$value = $this->cache->get( $entityId, $curTTL, [], $asOf );
319
		$now = call_user_func( $this->microtime, true );
320
321
		$dependencyMetadata = $this->checkDependencyMetadata( $value );
322
		if ( $dependencyMetadata === null ) {
323
			return null;
324
		}
325
326
		$ageInSeconds = (int)ceil( $now - $asOf );
327
		$cachingMetadata = $ageInSeconds > 0 ?
328
			CachingMetadata::ofMaximumAgeInSeconds( $ageInSeconds ) :
329
			CachingMetadata::fresh();
330
331
		$results = [];
332
		foreach ( $value['results'] as $resultSerialization ) {
333
			$results[] = $this->deserializeCheckResult( $resultSerialization, $cachingMetadata );
334
		}
335
336
		return new CachedCheckResults(
337
			$results,
338
			Metadata::merge( [
339
				Metadata::ofCachingMetadata( $cachingMetadata ),
340
				Metadata::ofDependencyMetadata( $dependencyMetadata ),
341
			] )
342
		);
343
	}
344
345
	/**
346
	 * Extract the dependency metadata of $value
347
	 * and check that the dependency metadata does not indicate staleness.
348
	 *
349
	 * @param array|bool $value
350
	 * @return DependencyMetadata|null the dependency metadata,
351
	 * or null if $value should no longer be used
352
	 */
353
	private function checkDependencyMetadata( $value ) {
354
		if ( $value === false ) {
355
			return null;
356
		}
357
358 View Code Duplication
		if ( array_key_exists( 'futureTime', $value ) ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
359
			$futureTime = TimeValue::newFromArray( $value['futureTime'] );
360
			if ( !$this->timeValueComparer->isFutureTime( $futureTime ) ) {
361
				return null;
362
			}
363
			$futureTimeDependencyMetadata = DependencyMetadata::ofFutureTime( $futureTime );
364
		} else {
365
			$futureTimeDependencyMetadata = DependencyMetadata::blank();
366
		}
367
368
		$dependedEntityIds = array_map(
369
			[ $this->entityIdParser, "parse" ],
370
			array_keys( $value['latestRevisionIds'] )
371
		);
372
373
		if ( $value['latestRevisionIds'] !== $this->getLatestRevisionIds( $dependedEntityIds ) ) {
374
			return null;
375
		}
376
377
		return array_reduce(
378
			$dependedEntityIds,
379
			function( DependencyMetadata $metadata, EntityId $entityId ) {
380
				return DependencyMetadata::merge( [
381
					$metadata,
382
					DependencyMetadata::ofEntityId( $entityId )
383
				] );
384
			},
385
			$futureTimeDependencyMetadata
386
		);
387
	}
388
389
	/**
390
	 * Deserialize a check result.
391
	 * If the result might be stale after caching
392
	 * (because its dependencies cannot be fully tracked in its dependency metadata),
393
	 * also add $cachingMetadata to it.
394
	 *
395
	 * @param array $resultSerialization
396
	 * @param CachingMetadata $cachingMetadata
397
	 * @return CheckResult
398
	 */
399
	private function deserializeCheckResult(
400
		array $resultSerialization,
401
		CachingMetadata $cachingMetadata
402
	) {
403
		$result = $this->checkResultDeserializer->deserialize( $resultSerialization );
404
		if ( $this->isPossiblyStaleResult( $result ) ) {
405
			$result->withMetadata(
406
				Metadata::merge( [
407
					$result->getMetadata(),
408
					Metadata::ofCachingMetadata( $cachingMetadata ),
409
				] )
410
			);
411
		}
412
		return $result;
413
	}
414
415
	/**
416
	 * @param CheckResult $result
417
	 * @return bool
418
	 */
419
	private function isPossiblyStaleResult( CheckResult $result ) {
420
		return in_array(
421
			$result->getConstraint()->getConstraintTypeItemId(),
422
			$this->possiblyStaleConstraintTypes
423
		);
424
	}
425
426
	/**
427
	 * @param EntityId[] $entityIds
428
	 * @return int[]|null array from entity ID serializations to revision ID,
429
	 * or null to indicate that not all revision IDs could be loaded
430
	 */
431 View Code Duplication
	private function getLatestRevisionIds( array $entityIds ) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
432
		if ( $entityIds === [] ) {
433
			$this->loggingHelper->logEmptyDependencyMetadata();
434
			return [];
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array(); (array) is incompatible with the return type documented by WikibaseQuality\Constrai...e::getLatestRevisionIds of type integer[]|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
435
		}
436
		if ( count( $entityIds ) > $this->maxRevisionIds ) {
437
			// one of those entities will probably be edited soon, so might as well skip caching
438
			$this->loggingHelper->logHugeDependencyMetadata( $entityIds, $this->maxRevisionIds );
439
			return null;
440
		}
441
442
		$latestRevisionIds = $this->wikiPageEntityMetaDataAccessor->loadLatestRevisionIds(
443
			$entityIds,
444
			EntityRevisionLookup::LATEST_FROM_REPLICA
445
		);
446
		if ( $this->hasFalseElements( $latestRevisionIds ) ) {
447
			return null;
448
		}
449
		return $latestRevisionIds;
450
	}
451
452
	/**
453
	 * @param array $array
454
	 * @return bool
455
	 */
456
	private function hasFalseElements( array $array ) {
457
		return in_array( false, $array, true );
458
	}
459
460
	/**
461
	 * Set a custom function to get the current time, instead of microtime().
462
	 *
463
	 * @param callable $microtime
464
	 */
465
	public function setMicrotimeFunction( callable $microtime ) {
466
		$this->microtime = $microtime;
467
	}
468
469
}
470