|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace WikibaseQuality\ExternalValidation\CrossCheck; |
|
4
|
|
|
|
|
5
|
|
|
use InvalidArgumentException; |
|
6
|
|
|
use Wikibase\DataModel\Services\Lookup\EntityLookup; |
|
7
|
|
|
use Wikibase\DataModel\Services\Statement\StatementGuidParser; |
|
8
|
|
|
use Wikibase\DataModel\Statement\StatementListProvider; |
|
9
|
|
|
use Wikimedia\Assert\Assert; |
|
10
|
|
|
use Wikibase\DataModel\Entity\Entity; |
|
11
|
|
|
use Wikibase\DataModel\Entity\EntityId; |
|
12
|
|
|
use Wikibase\DataModel\Entity\PropertyId; |
|
13
|
|
|
use Wikibase\DataModel\Statement\StatementList; |
|
14
|
|
|
use WikibaseQuality\ExternalValidation\CrossCheck\Result\CrossCheckResultList; |
|
15
|
|
|
|
|
16
|
|
|
/** |
|
17
|
|
|
* Helper class for interacting with CrossChecker. It makes it possible to run cross-checks for various parameter types |
|
18
|
|
|
* and combinations, since the CrossChecker only accepts statements. |
|
19
|
|
|
* |
|
20
|
|
|
* @package WikibaseQuality\ExternalValidation\CrossCheck |
|
21
|
|
|
* @author BP2014N1 |
|
22
|
|
|
* @license GNU GPL v2+ |
|
23
|
|
|
*/ |
|
24
|
|
|
class CrossCheckInteractor { |
|
25
|
|
|
|
|
26
|
|
|
/** |
|
27
|
|
|
* @var EntityLookup |
|
28
|
|
|
*/ |
|
29
|
|
|
private $entityLookup; |
|
30
|
|
|
|
|
31
|
|
|
/** |
|
32
|
|
|
* @var StatementGuidParser |
|
33
|
|
|
*/ |
|
34
|
|
|
private $statementGuidParser; |
|
35
|
|
|
|
|
36
|
|
|
/** |
|
37
|
|
|
* @var CrossChecker |
|
38
|
|
|
*/ |
|
39
|
|
|
private $crossChecker; |
|
40
|
|
|
|
|
41
|
|
|
/** |
|
42
|
|
|
* @param EntityLookup $entityLookup |
|
43
|
|
|
* @param StatementGuidParser $statementGuidParser |
|
44
|
|
|
* @param CrossChecker $crossChecker |
|
45
|
|
|
*/ |
|
46
|
|
|
public function __construct( EntityLookup $entityLookup, StatementGuidParser $statementGuidParser, CrossChecker $crossChecker ) { |
|
47
|
|
|
$this->entityLookup = $entityLookup; |
|
48
|
|
|
$this->statementGuidParser = $statementGuidParser; |
|
49
|
|
|
$this->crossChecker = $crossChecker; |
|
50
|
|
|
} |
|
51
|
|
|
|
|
52
|
|
|
/** |
|
53
|
|
|
* Runs cross-check for all statements of multiple entities represented by ids. |
|
54
|
|
|
* |
|
55
|
|
|
* @param EntityId $entityId |
|
56
|
|
|
* |
|
57
|
|
|
* @return CrossCheckResultList|null |
|
58
|
|
|
*/ |
|
59
|
|
|
public function crossCheckEntityById( EntityId $entityId ) { |
|
60
|
|
|
$entity = $this->entityLookup->getEntity( $entityId ); |
|
61
|
|
|
|
|
62
|
|
|
if ( $entity instanceof StatementListProvider ) { |
|
63
|
|
|
return $this->crossCheckStatements( $entity->getStatements() ); |
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
return null; |
|
67
|
|
|
} |
|
68
|
|
|
|
|
69
|
|
|
/** |
|
70
|
|
|
* Runs cross-check for all statements of a single entity represented by id. |
|
71
|
|
|
* |
|
72
|
|
|
* @param EntityId[] $entityIds |
|
73
|
|
|
* |
|
74
|
|
|
* @return CrossCheckResultList[] |
|
75
|
|
|
*/ |
|
76
|
|
|
public function crossCheckEntitiesByIds( array $entityIds ) { |
|
77
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\EntityId', $entityIds, '$entityIds' ); |
|
78
|
|
|
|
|
79
|
|
|
$results = array(); |
|
80
|
|
|
foreach ( $entityIds as $entityId ) { |
|
81
|
|
|
$results[$entityId->getSerialization()] = $this->crossCheckEntityById( $entityId ); |
|
82
|
|
|
} |
|
83
|
|
|
|
|
84
|
|
|
return $results; |
|
85
|
|
|
} |
|
86
|
|
|
|
|
87
|
|
|
/** |
|
88
|
|
|
* Runs cross-check for all statements of a single entity. |
|
89
|
|
|
* |
|
90
|
|
|
* @param StatementList $statements |
|
91
|
|
|
* |
|
92
|
|
|
* @return CrossCheckResultList |
|
93
|
|
|
*/ |
|
94
|
|
|
public function crossCheckStatements( StatementList $statements ) { |
|
95
|
|
|
return $this->crossChecker->crossCheckStatements( $statements, $statements ); |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
|
|
/** |
|
99
|
|
|
* Runs cross-check for all statements of multiple entities. |
|
100
|
|
|
* |
|
101
|
|
|
* @param Entity[] $entities |
|
102
|
|
|
* |
|
103
|
|
|
* @return CrossCheckResultList[] |
|
104
|
|
|
* @throws InvalidArgumentException |
|
105
|
|
|
*/ |
|
106
|
|
|
public function crossCheckEntities( array $entities ) { |
|
107
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\Entity', $entities, '$entities' ); |
|
108
|
|
|
|
|
109
|
|
|
$results = array(); |
|
110
|
|
View Code Duplication |
foreach ( $entities as $entity ) { |
|
|
|
|
|
|
111
|
|
|
$entityId = $entity->getId()->getSerialization(); |
|
112
|
|
|
if ( $entity instanceof StatementListProvider ) { |
|
113
|
|
|
$results[$entityId] = $this->crossCheckStatements( $entity->getStatements() ); |
|
114
|
|
|
} |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
|
|
return $results; |
|
118
|
|
|
} |
|
119
|
|
|
|
|
120
|
|
|
/** |
|
121
|
|
|
* Runs cross-check for all statements with any of the given property ids of a single entity represented by its id. |
|
122
|
|
|
* |
|
123
|
|
|
* @param EntityId $entityId |
|
124
|
|
|
* @param PropertyId[] $propertyIds |
|
125
|
|
|
* |
|
126
|
|
|
* @return CrossCheckResultList|null |
|
127
|
|
|
* @throws InvalidArgumentException |
|
128
|
|
|
*/ |
|
129
|
|
|
public function crossCheckEntityByIdWithProperties( EntityId $entityId, array $propertyIds ) { |
|
130
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\PropertyId', $propertyIds, '$propertyIds' ); |
|
131
|
|
|
|
|
132
|
|
|
$entity = $this->entityLookup->getEntity( $entityId ); |
|
133
|
|
|
|
|
134
|
|
|
if ( $entity instanceof StatementListProvider ) { |
|
135
|
|
|
return $this->crossCheckStatementsWithProperties( $entity->getStatements(), $propertyIds ); |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
return null; |
|
139
|
|
|
} |
|
140
|
|
|
|
|
141
|
|
|
/** |
|
142
|
|
|
* Runs cross-check for all statements with any of the given property ids of multiple single entity represented by its ids. |
|
143
|
|
|
* |
|
144
|
|
|
* @param EntityId[] $entityIds |
|
145
|
|
|
* @param PropertyId[] $propertyIds |
|
146
|
|
|
* |
|
147
|
|
|
* @return CrossCheckResultList[] |
|
148
|
|
|
* @throws InvalidArgumentException |
|
149
|
|
|
*/ |
|
150
|
|
|
public function crossCheckEntitiesByIdWithProperties( array $entityIds, array $propertyIds ) { |
|
151
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\EntityId', $entityIds, '$entityIds' ); |
|
152
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\PropertyId', $propertyIds, '$propertyIds' ); |
|
153
|
|
|
|
|
154
|
|
|
$results = array(); |
|
155
|
|
|
foreach ( $entityIds as $entityId ) { |
|
156
|
|
|
$results[$entityId->getSerialization()] = $this->crossCheckEntityByIdWithProperties( $entityId, $propertyIds ); |
|
157
|
|
|
} |
|
158
|
|
|
|
|
159
|
|
|
return $results; |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
/** |
|
163
|
|
|
* Runs cross-check for all statements with any of the given property ids of a single entity. |
|
164
|
|
|
* |
|
165
|
|
|
* @param StatementList $entityStatements |
|
166
|
|
|
* @param PropertyId[] $propertyIds |
|
167
|
|
|
* |
|
168
|
|
|
* @return CrossCheckResultList |
|
169
|
|
|
* @throws InvalidArgumentException |
|
170
|
|
|
*/ |
|
171
|
|
|
public function crossCheckStatementsWithProperties( StatementList $entityStatements, array $propertyIds ) { |
|
172
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\PropertyId', $propertyIds, '$propertyIds' ); |
|
173
|
|
|
|
|
174
|
|
|
$statements = new StatementList(); |
|
175
|
|
|
foreach ( $entityStatements->toArray() as $statement ) { |
|
176
|
|
|
if ( in_array( $statement->getPropertyId(), $propertyIds ) ) { |
|
177
|
|
|
$statements->addStatement( $statement ); |
|
178
|
|
|
} |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
return $this->crossChecker->crossCheckStatements( $entityStatements, $statements ); |
|
182
|
|
|
} |
|
183
|
|
|
|
|
184
|
|
|
/** |
|
185
|
|
|
* Runs cross-check for all statements with any of the given property ids of multiple entities. |
|
186
|
|
|
* |
|
187
|
|
|
* @param Entity[] $entities |
|
188
|
|
|
* @param PropertyId[] $propertyIds |
|
189
|
|
|
* |
|
190
|
|
|
* @return CrossCheckResultList[] |
|
191
|
|
|
* @throws InvalidArgumentException |
|
192
|
|
|
*/ |
|
193
|
|
|
public function crossCheckEntitiesWithProperties( array $entities, array $propertyIds ) { |
|
194
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\Entity', $entities, '$entities' ); |
|
195
|
|
|
Assert::parameterElementType( 'Wikibase\DataModel\Entity\PropertyId', $propertyIds, '$propertyIds' ); |
|
196
|
|
|
|
|
197
|
|
|
$results = array(); |
|
198
|
|
View Code Duplication |
foreach ( $entities as $entity ) { |
|
|
|
|
|
|
199
|
|
|
$entityId = $entity->getId()->getSerialization(); |
|
200
|
|
|
if ( $entity instanceof StatementListProvider ) { |
|
201
|
|
|
$results[$entityId] = $this->crossCheckStatementsWithProperties( |
|
202
|
|
|
$entity->getStatements(), |
|
203
|
|
|
$propertyIds |
|
204
|
|
|
); |
|
205
|
|
|
} |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
return $results; |
|
209
|
|
|
} |
|
210
|
|
|
|
|
211
|
|
|
/** |
|
212
|
|
|
* Runs cross-check for a single statement. |
|
213
|
|
|
* |
|
214
|
|
|
* @param string $guid |
|
215
|
|
|
* |
|
216
|
|
|
* @return CrossCheckResultList |
|
217
|
|
|
* @throws InvalidArgumentException |
|
218
|
|
|
*/ |
|
219
|
|
|
public function crossCheckStatementByGuid( $guid ) { |
|
220
|
|
|
$this->assertIsString( $guid, '$guid' ); |
|
221
|
|
|
|
|
222
|
|
|
$resultList = $this->crossCheckStatementsByGuids( array( $guid ) ); |
|
223
|
|
|
|
|
224
|
|
|
return reset( $resultList ); |
|
|
|
|
|
|
225
|
|
|
} |
|
226
|
|
|
|
|
227
|
|
|
/** |
|
228
|
|
|
* Runs cross-check for multiple statements. |
|
229
|
|
|
* |
|
230
|
|
|
* @param string[] $guids |
|
231
|
|
|
* |
|
232
|
|
|
* @return CrossCheckResultList[] |
|
233
|
|
|
* @throws InvalidArgumentException |
|
234
|
|
|
*/ |
|
235
|
|
|
public function crossCheckStatementsByGuids( array $guids ) { |
|
236
|
|
|
$this->assertIsArrayOfStrings( $guids, '$guids' ); |
|
237
|
|
|
|
|
238
|
|
|
$entityIds = array(); |
|
239
|
|
|
$groupedStatementGuids = array(); |
|
240
|
|
|
foreach ( $guids as $guid ) { |
|
241
|
|
|
$serializedEntityId = $this->statementGuidParser->parse( $guid )->getEntityId(); |
|
242
|
|
|
$entityIds[$serializedEntityId->getSerialization()] = $serializedEntityId; |
|
243
|
|
|
$groupedStatementGuids[$serializedEntityId->getSerialization()][] = $guid; |
|
244
|
|
|
} |
|
245
|
|
|
|
|
246
|
|
|
$resultLists = array(); |
|
247
|
|
|
foreach ( $groupedStatementGuids as $serializedEntityId => $guidsOfEntity ) { |
|
248
|
|
|
$entityId = $entityIds[ $serializedEntityId ]; |
|
249
|
|
|
$resultLists[ $serializedEntityId ] = $this->crossCheckClaimsOfEntity( $entityId, $guidsOfEntity ); |
|
250
|
|
|
} |
|
251
|
|
|
|
|
252
|
|
|
return $resultLists; |
|
253
|
|
|
} |
|
254
|
|
|
|
|
255
|
|
|
/** |
|
256
|
|
|
* @param EntityId $entityId |
|
257
|
|
|
* @param string[] $guids |
|
258
|
|
|
* |
|
259
|
|
|
* @return CrossCheckResultList|null |
|
260
|
|
|
*/ |
|
261
|
|
|
private function crossCheckClaimsOfEntity( EntityId $entityId, array $guids ) { |
|
262
|
|
|
$entity = $this->entityLookup->getEntity( $entityId ); |
|
263
|
|
|
|
|
264
|
|
|
if ( $entity instanceof StatementListProvider ) { |
|
265
|
|
|
$statements = new StatementList(); |
|
266
|
|
|
foreach ( $entity->getStatements()->toArray() as $statement ) { |
|
267
|
|
|
if ( in_array( $statement->getGuid(), $guids ) ) { |
|
268
|
|
|
$statements->addStatement( $statement ); |
|
269
|
|
|
} |
|
270
|
|
|
} |
|
271
|
|
|
|
|
272
|
|
|
return $this->crossChecker->crossCheckStatements( $entity->getStatements(), $statements ); |
|
273
|
|
|
} |
|
274
|
|
|
|
|
275
|
|
|
return null; |
|
276
|
|
|
} |
|
277
|
|
|
|
|
278
|
|
|
/** |
|
279
|
|
|
* @param string $string |
|
280
|
|
|
* @param string $parameterName |
|
281
|
|
|
* |
|
282
|
|
|
* @throws InvalidArgumentException |
|
283
|
|
|
*/ |
|
284
|
|
|
private function assertIsString( $string, $parameterName ) { |
|
285
|
|
|
if ( !is_string( $string ) ) { |
|
286
|
|
|
throw new InvalidArgumentException( "$parameterName must be string." ); |
|
287
|
|
|
} |
|
288
|
|
|
} |
|
289
|
|
|
|
|
290
|
|
|
/** |
|
291
|
|
|
* @param array $strings |
|
292
|
|
|
* @param string $parameterName |
|
293
|
|
|
* |
|
294
|
|
|
* @throws InvalidArgumentException |
|
295
|
|
|
*/ |
|
296
|
|
|
private function assertIsArrayOfStrings( array $strings, $parameterName ) { |
|
297
|
|
|
foreach ( $strings as $string ) { |
|
298
|
|
|
if ( !is_string( $string ) ) { |
|
299
|
|
|
throw new InvalidArgumentException( "Each element of $parameterName must be string." ); |
|
300
|
|
|
} |
|
301
|
|
|
} |
|
302
|
|
|
} |
|
303
|
|
|
|
|
304
|
|
|
} |
|
305
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.