1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace WikibaseQuality\ConstraintReport\Api; |
4
|
|
|
|
5
|
|
|
use DataValues\TimeValue; |
6
|
|
|
use Wikibase\DataModel\Entity\EntityId; |
7
|
|
|
use Wikibase\DataModel\Entity\EntityIdParser; |
8
|
|
|
use Wikibase\Lib\Store\EntityRevisionLookup; |
9
|
|
|
use Wikibase\Lib\Store\Sql\WikiPageEntityMetaDataAccessor; |
10
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedCheckConstraintsResponse; |
11
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata; |
12
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\DependencyMetadata; |
13
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata; |
14
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\LoggingHelper; |
15
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\TimeValueComparer; |
16
|
|
|
use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResult; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* A wrapper around another ResultsBuilder that caches results in a ResultsCache. |
20
|
|
|
* |
21
|
|
|
* Results are cached independently per entity, |
22
|
|
|
* and the outermost level of the response returned by the wrapped ResultsBuilder |
23
|
|
|
* must be an array from entity ID serialization to results for that entity. |
24
|
|
|
* Apart from that, the array structure does not matter. |
25
|
|
|
* |
26
|
|
|
* However, if the response for an entity is an array |
27
|
|
|
* which contains 'cached' keys anywhere (possibly nested), |
28
|
|
|
* the corresponding value is assumed to be CachingMetadata in array form, |
29
|
|
|
* and updated with the age of the value in the WANObjectCache; |
30
|
|
|
* and if the response contains arrays with a 'constraint' key (also possibly nested), |
31
|
|
|
* these arrays are assumed to be a CheckResult in array form |
32
|
|
|
* (as converted by CheckingResultsBuilder::checkResultToArray), |
33
|
|
|
* and if their 'type' is in the list of $possiblyStaleConstraintTypes, |
34
|
|
|
* their 'cached' information is also updated. |
35
|
|
|
* |
36
|
|
|
* @author Lucas Werkmeister |
37
|
|
|
* @license GPL-2.0-or-later |
38
|
|
|
*/ |
39
|
|
|
class CachingResultsBuilder implements ResultsBuilder { |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @var ResultsBuilder |
43
|
|
|
*/ |
44
|
|
|
private $resultsBuilder; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* @var ResultsCache |
48
|
|
|
*/ |
49
|
|
|
private $cache; |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* @var WikiPageEntityMetaDataAccessor |
53
|
|
|
*/ |
54
|
|
|
private $wikiPageEntityMetaDataAccessor; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* @var EntityIdParser |
58
|
|
|
*/ |
59
|
|
|
private $entityIdParser; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* @var int |
63
|
|
|
*/ |
64
|
|
|
private $ttlInSeconds; |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* @var string[] |
68
|
|
|
*/ |
69
|
|
|
private $possiblyStaleConstraintTypes; |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* @var int |
73
|
|
|
*/ |
74
|
|
|
private $maxRevisionIds; |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* @var LoggingHelper |
78
|
|
|
*/ |
79
|
|
|
private $loggingHelper; |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* @var TimeValueComparer |
83
|
|
|
*/ |
84
|
|
|
private $timeValueComparer; |
85
|
|
|
|
86
|
|
|
/** |
87
|
|
|
* @var callable |
88
|
|
|
*/ |
89
|
|
|
private $microtime = 'microtime'; |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* TODO: In PHP 5.6, make this a public class constant instead, |
93
|
|
|
* and also use it in CheckConstraints::getAllowedParams() |
94
|
|
|
* and in some of the tests. |
95
|
|
|
* |
96
|
|
|
* @var string[] |
97
|
|
|
*/ |
98
|
|
|
private $cachedStatuses; |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* @param ResultsBuilder $resultsBuilder The ResultsBuilder that cache misses are delegated to. |
102
|
|
|
* @param ResultsCache $cache The cache where results can be stored. |
103
|
|
|
* @param WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor Used to get the latest revision ID. |
104
|
|
|
* @param EntityIdParser $entityIdParser Used to parse entity IDs in cached objects. |
105
|
|
|
* @param int $ttlInSeconds Time-to-live of the cached values, in seconds. |
106
|
|
|
* @param string[] $possiblyStaleConstraintTypes item IDs of constraint types |
107
|
|
|
* where cached results may always be stale, regardless of invalidation logic |
108
|
|
|
* @param int $maxRevisionIds The maximum number of revision IDs to check; |
109
|
|
|
* if a check result depends on more entity IDs than this number, it is not cached. |
110
|
|
|
* @param LoggingHelper $loggingHelper |
111
|
|
|
*/ |
112
|
|
|
public function __construct( |
113
|
|
|
ResultsBuilder $resultsBuilder, |
114
|
|
|
ResultsCache $cache, |
115
|
|
|
WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor, |
116
|
|
|
EntityIdParser $entityIdParser, |
117
|
|
|
$ttlInSeconds, |
118
|
|
|
array $possiblyStaleConstraintTypes, |
119
|
|
|
$maxRevisionIds, |
120
|
|
|
LoggingHelper $loggingHelper |
121
|
|
|
) { |
122
|
|
|
$this->resultsBuilder = $resultsBuilder; |
123
|
|
|
$this->cache = $cache; |
124
|
|
|
$this->wikiPageEntityMetaDataAccessor = $wikiPageEntityMetaDataAccessor; |
125
|
|
|
$this->entityIdParser = $entityIdParser; |
126
|
|
|
$this->ttlInSeconds = $ttlInSeconds; |
127
|
|
|
$this->possiblyStaleConstraintTypes = $possiblyStaleConstraintTypes; |
128
|
|
|
$this->maxRevisionIds = $maxRevisionIds; |
129
|
|
|
$this->loggingHelper = $loggingHelper; |
130
|
|
|
$this->timeValueComparer = new TimeValueComparer(); |
131
|
|
|
|
132
|
|
|
$this->cachedStatuses = [ |
|
|
|
|
133
|
|
|
CheckResult::STATUS_VIOLATION, |
134
|
|
|
CheckResult::STATUS_WARNING, |
135
|
|
|
CheckResult::STATUS_BAD_PARAMETERS, |
136
|
|
|
]; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* @param EntityId[] $entityIds |
141
|
|
|
* @param string[] $claimIds |
142
|
|
|
* @param string[]|null $constraintIds |
143
|
|
|
* @param string[] $statuses |
144
|
|
|
* @return CachedCheckConstraintsResponse |
145
|
|
|
*/ |
146
|
|
|
public function getResults( |
147
|
|
|
array $entityIds, |
148
|
|
|
array $claimIds, |
149
|
|
|
array $constraintIds = null, |
150
|
|
|
array $statuses |
151
|
|
|
) { |
152
|
|
|
$results = []; |
153
|
|
|
$metadatas = []; |
154
|
|
|
if ( $this->canUseStoredResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) { |
155
|
|
|
$storedEntityIds = []; |
156
|
|
|
foreach ( $entityIds as $entityId ) { |
157
|
|
|
$storedResults = $this->getStoredResults( $entityId ); |
158
|
|
|
if ( $storedResults !== null ) { |
159
|
|
|
$this->loggingHelper->logCheckConstraintsCacheHit( $entityId ); |
160
|
|
|
$results += $storedResults->getArray(); |
161
|
|
|
$metadatas[] = $storedResults->getMetadata(); |
162
|
|
|
$storedEntityIds[] = $entityId; |
163
|
|
|
} |
164
|
|
|
} |
165
|
|
|
$entityIds = array_values( array_diff( $entityIds, $storedEntityIds ) ); |
166
|
|
|
} |
167
|
|
|
if ( $entityIds !== [] || $claimIds !== [] ) { |
168
|
|
|
if ( $entityIds !== [] ) { |
169
|
|
|
$this->loggingHelper->logCheckConstraintsCacheMisses( $entityIds ); |
170
|
|
|
} |
171
|
|
|
$response = $this->getAndStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ); |
172
|
|
|
$results += $response->getArray(); |
173
|
|
|
$metadatas[] = $response->getMetadata(); |
174
|
|
|
} |
175
|
|
|
return new CachedCheckConstraintsResponse( |
176
|
|
|
$results, |
177
|
|
|
Metadata::merge( $metadatas ) |
178
|
|
|
); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
/** |
182
|
|
|
* We can only use cached constraint results |
183
|
|
|
* if exactly the problematic results of a full constraint check were requested: |
184
|
|
|
* constraint checks for the full entity (not just individual statements), |
185
|
|
|
* without restricting the set of constraints to check, |
186
|
|
|
* and with exactly the 'violation', 'warning' and 'bad-parameters' statuses. |
187
|
|
|
* |
188
|
|
|
* (In theory, we could also use results for requests |
189
|
|
|
* that asked for a subset of these result statuses, |
190
|
|
|
* but removing the extra results from the cached value is tricky, |
191
|
|
|
* especially if you consider that they might have added qualifier contexts to the output |
192
|
|
|
* that should not only be empty, but completely absent.) |
193
|
|
|
* |
194
|
|
|
* @param EntityId[] $entityIds |
195
|
|
|
* @param string[] $claimIds |
196
|
|
|
* @param string[]|null $constraintIds |
197
|
|
|
* @param string[] $statuses |
198
|
|
|
* @return bool |
199
|
|
|
*/ |
200
|
|
View Code Duplication |
private function canUseStoredResults( |
|
|
|
|
201
|
|
|
array $entityIds, |
|
|
|
|
202
|
|
|
array $claimIds, |
203
|
|
|
array $constraintIds = null, |
204
|
|
|
array $statuses |
205
|
|
|
) { |
206
|
|
|
if ( $claimIds !== [] ) { |
207
|
|
|
return false; |
208
|
|
|
} |
209
|
|
|
if ( $constraintIds !== null ) { |
210
|
|
|
return false; |
211
|
|
|
} |
212
|
|
|
if ( $statuses != $this->cachedStatuses ) { |
213
|
|
|
return false; |
214
|
|
|
} |
215
|
|
|
return true; |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
/** |
219
|
|
|
* @param EntityId[] $entityIds |
220
|
|
|
* @param string[] $claimIds |
221
|
|
|
* @param string[]|null $constraintIds |
222
|
|
|
* @param string[] $statuses |
223
|
|
|
* @return CachedCheckConstraintsResponse |
224
|
|
|
*/ |
225
|
|
|
public function getAndStoreResults( |
226
|
|
|
array $entityIds, |
227
|
|
|
array $claimIds, |
228
|
|
|
array $constraintIds = null, |
229
|
|
|
array $statuses |
230
|
|
|
) { |
231
|
|
|
$results = $this->resultsBuilder->getResults( $entityIds, $claimIds, $constraintIds, $statuses ); |
232
|
|
|
|
233
|
|
|
if ( $this->canStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) { |
234
|
|
|
foreach ( $entityIds as $entityId ) { |
235
|
|
|
$latestRevisionIds = $this->getLatestRevisionIds( |
236
|
|
|
$results->getMetadata()->getDependencyMetadata()->getEntityIds() |
237
|
|
|
); |
238
|
|
|
if ( $latestRevisionIds === null ) { |
239
|
|
|
continue; |
240
|
|
|
} |
241
|
|
|
$value = [ |
242
|
|
|
'results' => $results->getArray()[$entityId->getSerialization()], |
243
|
|
|
'latestRevisionIds' => $latestRevisionIds, |
244
|
|
|
]; |
245
|
|
|
$futureTime = $results->getMetadata()->getDependencyMetadata()->getFutureTime(); |
246
|
|
|
if ( $futureTime !== null ) { |
247
|
|
|
$value['futureTime'] = $futureTime->getArrayValue(); |
248
|
|
|
} |
249
|
|
|
$this->cache->set( $entityId, $value, $this->ttlInSeconds ); |
250
|
|
|
} |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
return $results; |
254
|
|
|
} |
255
|
|
|
|
256
|
|
|
/** |
257
|
|
|
* We can only store constraint results |
258
|
|
|
* if the set of constraints to check was not restricted |
259
|
|
|
* and exactly the problematic results were requested. |
260
|
|
|
* However, it doesn’t matter whether constraint checks on individual statements were requested: |
261
|
|
|
* we only store results for the mentioned entity IDs, |
262
|
|
|
* and those will be complete regardless of what’s in the statement IDs. |
263
|
|
|
* |
264
|
|
|
* (In theory, we could also store results of checks that requested extra result statuses, |
265
|
|
|
* but removing the extra results before caching the value is tricky, |
266
|
|
|
* especially if you consider that they might have added qualifier contexts to the output |
267
|
|
|
* that should not only be empty, but completely absent.) |
268
|
|
|
* |
269
|
|
|
* @param EntityId[] $entityIds |
270
|
|
|
* @param string[] $claimIds |
271
|
|
|
* @param string[]|null $constraintIds |
272
|
|
|
* @param string[] $statuses |
273
|
|
|
* @return bool |
274
|
|
|
*/ |
275
|
|
View Code Duplication |
private function canStoreResults( |
|
|
|
|
276
|
|
|
array $entityIds, |
|
|
|
|
277
|
|
|
array $claimIds, |
|
|
|
|
278
|
|
|
array $constraintIds = null, |
279
|
|
|
array $statuses |
280
|
|
|
) { |
281
|
|
|
if ( $constraintIds !== null ) { |
282
|
|
|
return false; |
283
|
|
|
} |
284
|
|
|
if ( $statuses != $this->cachedStatuses ) { |
285
|
|
|
return false; |
286
|
|
|
} |
287
|
|
|
return true; |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
/** |
291
|
|
|
* @param EntityId $entityId |
292
|
|
|
* @return CachedCheckConstraintsResponse|null |
293
|
|
|
*/ |
294
|
|
|
public function getStoredResults( |
295
|
|
|
EntityId $entityId |
296
|
|
|
) { |
297
|
|
|
$value = $this->cache->get( $entityId, $curTTL, [], $asOf ); |
298
|
|
|
$now = call_user_func( $this->microtime, true ); |
299
|
|
|
|
300
|
|
|
if ( $value === false ) { |
301
|
|
|
return null; |
302
|
|
|
} |
303
|
|
|
|
304
|
|
|
$ageInSeconds = (int)ceil( $now - $asOf ); |
305
|
|
|
|
306
|
|
|
$dependedEntityIds = array_map( |
307
|
|
|
[ $this->entityIdParser, "parse" ], |
308
|
|
|
array_keys( $value['latestRevisionIds'] ) |
309
|
|
|
); |
310
|
|
|
|
311
|
|
|
if ( $value['latestRevisionIds'] !== $this->getLatestRevisionIds( $dependedEntityIds ) ) { |
312
|
|
|
return null; |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
if ( array_key_exists( 'futureTime', $value ) ) { |
316
|
|
|
$futureTime = TimeValue::newFromArray( $value['futureTime'] ); |
317
|
|
|
if ( !$this->timeValueComparer->isFutureTime( $futureTime ) ) { |
318
|
|
|
return null; |
319
|
|
|
} |
320
|
|
|
$futureTimeDependencyMetadata = DependencyMetadata::ofFutureTime( $futureTime ); |
321
|
|
|
} else { |
322
|
|
|
$futureTimeDependencyMetadata = DependencyMetadata::blank(); |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
$cachingMetadata = $ageInSeconds > 0 ? |
326
|
|
|
CachingMetadata::ofMaximumAgeInSeconds( $ageInSeconds ) : |
327
|
|
|
CachingMetadata::fresh(); |
328
|
|
|
|
329
|
|
|
if ( is_array( $value['results'] ) ) { |
330
|
|
|
array_walk( $value['results'], [ $this, 'updateCachingMetadata' ], $cachingMetadata ); |
331
|
|
|
} |
332
|
|
|
|
333
|
|
|
return new CachedCheckConstraintsResponse( |
334
|
|
|
[ $entityId->getSerialization() => $value['results'] ], |
335
|
|
|
$this->mergeStoredMetadata( $cachingMetadata, $dependedEntityIds, $futureTimeDependencyMetadata ) |
336
|
|
|
); |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
/** |
340
|
|
|
* @param CachingMetadata $cachingMetadata |
341
|
|
|
* @param EntityId[] $dependedEntityIds |
342
|
|
|
* @param DependencyMetadata|null $futureTimeDependencyMetadata |
343
|
|
|
* @return Metadata |
344
|
|
|
*/ |
345
|
|
|
private function mergeStoredMetadata( |
346
|
|
|
CachingMetadata $cachingMetadata, |
347
|
|
|
array $dependedEntityIds, |
348
|
|
|
DependencyMetadata $futureTimeDependencyMetadata = null |
349
|
|
|
) { |
350
|
|
|
return Metadata::merge( [ |
351
|
|
|
Metadata::ofCachingMetadata( $cachingMetadata ), |
352
|
|
|
Metadata::ofDependencyMetadata( array_reduce( |
353
|
|
|
$dependedEntityIds, |
354
|
|
|
function( DependencyMetadata $metadata, EntityId $entityId ) { |
355
|
|
|
return DependencyMetadata::merge( [ |
356
|
|
|
$metadata, |
357
|
|
|
DependencyMetadata::ofEntityId( $entityId ) |
358
|
|
|
] ); |
359
|
|
|
}, |
360
|
|
|
$futureTimeDependencyMetadata |
361
|
|
|
) ) |
362
|
|
|
] ); |
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
/** |
366
|
|
|
* @param EntityId[] $entityIds |
367
|
|
|
* @return int[]|null array from entity ID serializations to revision ID, |
368
|
|
|
* or null to indicate that not all revision IDs could be loaded |
369
|
|
|
*/ |
370
|
|
|
private function getLatestRevisionIds( array $entityIds ) { |
371
|
|
|
if ( $entityIds === [] ) { |
372
|
|
|
$this->loggingHelper->logEmptyDependencyMetadata(); |
373
|
|
|
return []; |
|
|
|
|
374
|
|
|
} |
375
|
|
|
if ( count( $entityIds ) > $this->maxRevisionIds ) { |
376
|
|
|
// one of those entities will probably be edited soon, so might as well skip caching |
377
|
|
|
$this->loggingHelper->logHugeDependencyMetadata( $entityIds, $this->maxRevisionIds ); |
378
|
|
|
return null; |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
$latestRevisionIds = $this->wikiPageEntityMetaDataAccessor->loadLatestRevisionIds( |
382
|
|
|
$entityIds, |
383
|
|
|
EntityRevisionLookup::LATEST_FROM_REPLICA |
384
|
|
|
); |
385
|
|
|
if ( $this->hasFalseElements( $latestRevisionIds ) ) { |
386
|
|
|
return null; |
387
|
|
|
} |
388
|
|
|
return $latestRevisionIds; |
389
|
|
|
} |
390
|
|
|
|
391
|
|
|
/** |
392
|
|
|
* @param array $array |
393
|
|
|
* @return bool |
394
|
|
|
*/ |
395
|
|
|
private function hasFalseElements( array $array ) { |
396
|
|
|
return in_array( false, $array, true ); |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
public function updateCachingMetadata( &$element, $key, CachingMetadata $cachingMetadata ) { |
400
|
|
|
if ( $key === 'cached' ) { |
401
|
|
|
$element = CachingMetadata::merge( [ |
402
|
|
|
$cachingMetadata, |
403
|
|
|
CachingMetadata::ofArray( $element ), |
404
|
|
|
] )->toArray(); |
405
|
|
|
} |
406
|
|
|
if ( |
407
|
|
|
is_array( $element ) && |
408
|
|
|
array_key_exists( 'constraint', $element ) && |
409
|
|
|
in_array( $element['constraint']['type'], $this->possiblyStaleConstraintTypes, true ) |
410
|
|
|
) { |
411
|
|
|
$element['cached'] = CachingMetadata::merge( [ |
412
|
|
|
$cachingMetadata, |
413
|
|
|
CachingMetadata::ofArray( |
414
|
|
|
array_key_exists( 'cached', $element ) ? $element['cached'] : null |
415
|
|
|
), |
416
|
|
|
] )->toArray(); |
417
|
|
|
} |
418
|
|
|
if ( is_array( $element ) ) { |
419
|
|
|
array_walk( $element, [ $this, __FUNCTION__ ], $cachingMetadata ); |
420
|
|
|
} |
421
|
|
|
} |
422
|
|
|
|
423
|
|
|
/** |
424
|
|
|
* Set a custom function to get the current time, instead of microtime(). |
425
|
|
|
* |
426
|
|
|
* @param callable $microtime |
427
|
|
|
*/ |
428
|
|
|
public function setMicrotimeFunction( callable $microtime ) { |
429
|
|
|
$this->microtime = $microtime; |
430
|
|
|
} |
431
|
|
|
|
432
|
|
|
} |
433
|
|
|
|
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..