1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Samwilson\SimpleWikidata; |
4
|
|
|
|
5
|
|
|
use DateInterval; |
6
|
|
|
use Exception; |
7
|
|
|
use Mediawiki\Api\MediawikiApi; |
8
|
|
|
use Mediawiki\Api\SimpleRequest; |
9
|
|
|
use Nayjest\StrCaseConverter\Str; |
10
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
11
|
|
|
use Samwilson\SimpleWikidata\Properties\Time; |
12
|
|
|
use Symfony\Component\DomCrawler\Crawler; |
13
|
|
|
|
14
|
|
|
class Item { |
15
|
|
|
|
16
|
|
|
const PROP_INSTANCE_OF = 'P31'; |
17
|
|
|
const PROP_TITLE = 'P1476'; |
18
|
|
|
const PROP_IMAGE = 'P18'; |
19
|
|
|
const PROP_AUTHOR = 'P50'; |
20
|
|
|
|
21
|
|
|
/** @var string */ |
22
|
|
|
protected $id; |
23
|
|
|
|
24
|
|
|
/** @var MediawikiApi */ |
25
|
|
|
protected $wdApi; |
26
|
|
|
|
27
|
|
|
/** @var string */ |
28
|
|
|
protected $lang; |
29
|
|
|
|
30
|
|
|
/** @var CacheItemPoolInterface */ |
31
|
|
|
protected $cache; |
32
|
|
|
|
33
|
|
|
/** @var string The base URL of Wikidata, with trailing slash. */ |
34
|
|
|
protected $wikidataUrlBase = 'https://www.wikidata.org/wiki/'; |
35
|
|
|
|
36
|
1 |
|
private function __construct( $id, $lang, CacheItemPoolInterface $cache ) { |
37
|
1 |
|
if ( !is_string( $id ) || preg_match( '/[QP][0-9]*/i', $id ) !== 1 ) { |
38
|
|
|
throw new Exception( "Not a valid ID: " . var_export( $id, true ) ); |
39
|
|
|
} |
40
|
1 |
|
$this->id = $id; |
41
|
1 |
|
$this->wdApi = new MediawikiApi( 'https://www.wikidata.org/w/api.php' ); |
42
|
1 |
|
$this->entities = []; |
|
|
|
|
43
|
1 |
|
$this->lang = $lang; |
44
|
1 |
|
$this->cache = $cache; |
45
|
1 |
|
} |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* Create a new Item object with class based on the item's 'instance of' statement. |
49
|
|
|
* |
50
|
|
|
* @param string $id The item ID (Q-number). |
51
|
|
|
* @param string $lang The language code. |
52
|
|
|
* @param CacheItemPoolInterface $cache The cache to use. |
53
|
|
|
* @return Item |
54
|
|
|
*/ |
55
|
1 |
|
public static function factory( $id, $lang, CacheItemPoolInterface $cache ) { |
56
|
1 |
|
$item = new Item( $id, $lang, $cache ); |
57
|
1 |
|
foreach ( $item->getPropertyOfTypeItem( self::PROP_INSTANCE_OF ) as $instanceOf ) { |
58
|
|
|
// Try to find a class mating the 'instance of' name. |
59
|
1 |
|
$possibleBaseClassName = Str::toCamelCase( $instanceOf->getItem()->getLabel() ); |
60
|
1 |
|
$possibleClassName = __NAMESPACE__ . '\\Items\\' . $possibleBaseClassName; |
61
|
1 |
|
if ( class_exists( $possibleClassName ) ) { |
62
|
|
|
// This won't re-request the metadata, because that's cached. |
63
|
1 |
|
return new $possibleClassName( $id, $lang, $cache ); |
64
|
|
|
} |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
// If we're here, just leave it as a basic Item. |
68
|
1 |
|
$item->setCache( $cache ); |
69
|
1 |
|
return $item; |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @param CacheItemPoolInterface $cache The cache to use. |
74
|
|
|
*/ |
75
|
1 |
|
public function setCache( CacheItemPoolInterface $cache ) { |
76
|
1 |
|
$this->cache = $cache; |
77
|
1 |
|
} |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* Get the ID (Q-number) of this item. |
81
|
|
|
* @return string|bool The ID or false if it couldn't be determined. |
82
|
|
|
*/ |
83
|
1 |
|
public function getId() { |
84
|
1 |
|
$entity = $this->getEntity( $this->id ); |
85
|
1 |
|
return isset( $entity['id'] ) ? $entity['id'] : false; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Get this item's label. |
90
|
|
|
* @return string |
91
|
|
|
*/ |
92
|
1 |
|
public function getLabel() { |
93
|
1 |
|
$entity = $this->getEntity( $this->id ); |
94
|
1 |
|
if ( ! empty( $entity['labels'][ $this->lang ]['value'] ) ) { |
95
|
|
|
// Use the label if there is one. |
96
|
1 |
|
return $entity['labels'][ $this->lang ]['value']; |
97
|
|
|
} |
98
|
|
|
// Or just use the ID. |
99
|
1 |
|
return $entity['id']; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* @return string The Wikidata.org URL for this item. |
104
|
|
|
*/ |
105
|
1 |
|
public function getWikidataUrl() { |
106
|
1 |
|
return $this->wikidataUrlBase.$this->id; |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
/** |
110
|
|
|
* Wikiprojects list their properties like this: |
111
|
|
|
* |
112
|
|
|
* {{List of properties/Header}} |
113
|
|
|
* {{List of properties/Row|id=31|example-subject=Q923767|example-object=Q3331189}} |
114
|
|
|
* </table> |
115
|
|
|
* |
116
|
|
|
* @param string $wikiProject The name of the WikiProject (must exist as a Wikidata page e.g. |
117
|
|
|
* [[Wikidata:$wikiProject]]). |
118
|
|
|
* @param string $type |
119
|
|
|
* @return array |
120
|
|
|
*/ |
121
|
|
|
public function getStandardProperties( $wikiProject = 'WikiProject_Books', $type = 'work' ) { |
122
|
|
|
if ( $type !== 'work' ) { |
123
|
|
|
$type = 'edition'; |
124
|
|
|
} |
125
|
|
|
$cacheKey = $type . '_item_property_IDs'; |
126
|
|
|
if ( $this->cache->hasItem( $cacheKey ) ) { |
127
|
|
|
$propIds = $this->cache->getItem( $cacheKey )->get(); |
128
|
|
|
} else { |
129
|
|
|
$domCrawler = new Crawler(); |
130
|
|
|
$wikiProjectUrl = 'https://www.wikidata.org/wiki/Wikidata:' . $wikiProject; |
131
|
|
|
$domCrawler->addHtmlContent( file_get_contents( $wikiProjectUrl ) ); |
132
|
|
|
$propAncors = "//h3/span[@id='" . ucfirst( $type ) . "_item_properties']/../following-sibling::table[1]//td[2]/a"; |
133
|
|
|
$propCells = $domCrawler->filterXPath( $propAncors ); |
134
|
|
|
$propIds = []; |
135
|
|
|
$propCells->each( function ( Crawler $node, $i ) use ( &$propIds ) { |
136
|
|
|
$propId = $node->text(); |
137
|
|
|
$propIds[] = $propId; |
138
|
|
|
} ); |
139
|
|
|
$cacheItem = $this->cache->getItem( $cacheKey ) |
140
|
|
|
->expiresAfter( new DateInterval( 'PT1H' ) ) |
141
|
|
|
->set( $propIds ); |
142
|
|
|
$this->cache->save( $cacheItem ); |
143
|
|
|
} |
144
|
|
|
$workProperties = []; |
145
|
|
|
foreach ( $propIds as $propId ) { |
146
|
|
|
$workProperties[] = self::factory( $propId, $this->lang, $this->cache ); |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
return $workProperties; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* @param string $propertyId |
154
|
|
|
* @return bool|Time[] |
155
|
|
|
*/ |
156
|
|
|
public function getPropertyOfTypeTime( $propertyId ) { |
157
|
|
|
$times = []; |
158
|
|
|
$entity = $this->getEntity(); |
159
|
|
|
if ( !isset( $entity['claims'][$propertyId] ) ) { |
160
|
|
|
// No statements for this property. |
161
|
|
|
return $times; |
162
|
|
|
} |
163
|
|
|
// print_r($entity['claims'][$propertyId]);exit(); |
164
|
|
|
foreach ( $entity['claims'][$propertyId] as $claim ) { |
165
|
|
|
// print_r($claim); |
166
|
|
|
$times[] = new Time( $claim, $this->lang, $this->cache ); |
167
|
|
|
// |
168
|
|
|
// $timeValue = $claim['datavalue']['value']['time']; |
169
|
|
|
// // Ugly workaround for imprecise dates. :-( |
170
|
|
|
// if (preg_match('/([0-9]{1,4})-00-00/', $timeValue, $matches) === 1) { |
171
|
|
|
// $timeValue = $matches[1]; |
172
|
|
|
// return $timeValue; |
173
|
|
|
// } |
174
|
|
|
// $time = strtotime($timeValue); |
175
|
|
|
// return date($dateFormat, $time); |
176
|
|
|
// } |
177
|
|
|
} |
178
|
|
|
return $times; |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
/** |
182
|
|
|
* Get the Item that is referred to by the specified item's property. |
183
|
|
|
* |
184
|
|
|
* @param string $propertyId |
185
|
|
|
* |
186
|
|
|
* @return \Samwilson\SimpleWikidata\Properties\Item[] |
187
|
|
|
*/ |
188
|
1 |
|
public function getPropertyOfTypeItem( $propertyId ) { |
189
|
1 |
|
$entity = $this->getEntity( $this->id ); |
190
|
1 |
|
if ( !isset( $entity['claims'][$propertyId] ) ) { |
191
|
1 |
|
return []; |
192
|
|
|
} |
193
|
1 |
|
$items = []; |
194
|
1 |
|
foreach ( $entity['claims'][$propertyId] as $claim ) { |
195
|
1 |
|
$items[] = new Properties\Item( $claim, $this->lang, $this->cache ); |
196
|
|
|
} |
197
|
|
|
|
198
|
1 |
|
return $items; |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
public function setPropertyOfTypeItem( $property, $itemId ) { |
202
|
|
|
$itemIdNumeric = substr( $itemId, 1 ); |
203
|
|
|
|
204
|
|
|
// First see if this property already exists, and that it is different from what's being set. |
205
|
|
|
$entity = $this->getEntity( $this->id ); |
206
|
|
|
if ( !empty( $entity['claims'][$property] ) ) { |
207
|
|
|
// Get the first claim, and update it if necessary. |
208
|
|
|
$claim = array_shift( $entity['claims'][$property] ); |
209
|
|
|
if ( $claim['mainsnak']['datavalue']['value']['id'] == $itemId ) { |
210
|
|
|
// Already is the required value, no need to change. |
211
|
|
|
return; |
212
|
|
|
} |
213
|
|
|
$claim['mainsnak']['datavalue']['value']['id'] = $itemId; |
214
|
|
|
$claim['mainsnak']['datavalue']['value']['numeric-id'] = $itemIdNumeric; |
215
|
|
|
$apiParams = [ |
216
|
|
|
'action' => 'wbsetclaim', |
217
|
|
|
'claim' => json_encode( $claim ), |
218
|
|
|
]; |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
// If no claim was found (and modified) above, create a new claim. |
222
|
|
|
if ( !isset( $apiParams ) ) { |
223
|
|
|
$apiParams = [ |
|
|
|
|
224
|
|
|
'action' => 'wbcreateclaim', |
225
|
|
|
'entity' => $this->getId(), |
226
|
|
|
'property' => $property, |
227
|
|
|
'snaktype' => 'value', |
228
|
|
|
'value' => json_encode( [ 'entity-type' => 'item', 'numeric-id' => $itemIdNumeric ] ), |
229
|
|
|
]; |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
// @TODO Save the property. |
233
|
|
|
|
234
|
|
|
// Clear the cache. |
235
|
|
|
$this->cache->deleteItem( $this->getEntityCacheKey( $this->id ) ); |
236
|
|
|
} |
237
|
|
|
|
238
|
|
|
public function getPropertyOfTypeUrl( $entityId, $propertyId ) { |
239
|
|
|
$entity = $this->getEntity( $entityId ); |
240
|
|
|
if ( !isset( $entity['claims'][$propertyId] ) ) { |
241
|
|
|
return false; |
242
|
|
|
} |
243
|
|
|
$urls = []; |
244
|
|
|
foreach ( $entity['claims'][$propertyId] as $claim ) { |
245
|
|
|
$urls[] = $claim['mainsnak']['datavalue']['value']; |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
return $urls; |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
public function getPropertyOfTypeExternalIdentifier( $entityId, $propertyId ) { |
252
|
|
|
$entity = $this->getEntity( $entityId ); |
253
|
|
|
if ( !isset( $entity['claims'][$propertyId] ) ) { |
254
|
|
|
return false; |
255
|
|
|
} |
256
|
|
|
$idents = []; |
257
|
|
|
foreach ( $entity['claims'][$propertyId] as $claim ) { |
258
|
|
|
$qualifiers = []; |
259
|
|
|
if ( !isset( $claim['qualifiers'] ) ) { |
260
|
|
|
continue; |
261
|
|
|
} |
262
|
|
|
foreach ( $claim['qualifiers'] as $qualsInfo ) { |
263
|
|
|
foreach ( $qualsInfo as $qualInfo ) { |
264
|
|
|
$qualProp = self::factory( $qualInfo['property'], $this->lang, $this->cache ); |
265
|
|
|
$propLabel = $qualProp->getLabel(); |
266
|
|
|
if ( !isset( $qualifiers[$propLabel] ) ) { |
267
|
|
|
$qualifiers[$propLabel] = []; |
268
|
|
|
} |
269
|
|
|
$qualifiers[$propLabel][] = $qualInfo['datavalue']['value']; |
270
|
|
|
} |
271
|
|
|
} |
272
|
|
|
$idents[] = [ |
273
|
|
|
'qualifiers' => $qualifiers, |
274
|
|
|
'value' => $claim['mainsnak']['datavalue']['value'], |
275
|
|
|
]; |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
return $idents; |
279
|
|
|
} |
280
|
|
|
|
281
|
|
|
/** |
282
|
|
|
* Get a single-valued text property. |
283
|
|
|
* @param string $property One of the PROP_* constants. |
284
|
|
|
* @return string|bool The value, or false if it can't be found. |
285
|
|
|
*/ |
286
|
|
|
public function getPropertyOfTypeText( $property ) { |
287
|
|
|
$entity = $this->getEntity( $this->id ); |
288
|
|
|
if ( isset( $entity['claims'][$property] ) ) { |
289
|
|
|
// Use the first title. |
290
|
|
|
foreach ( $entity['claims'][$property] as $t ) { |
291
|
|
|
if ( !isset( $t['mainsnak']['datavalue']['value']['language'] ) ) { |
292
|
|
|
var_dump( $t['mainsnak']['datavalue']['value'] ); |
|
|
|
|
293
|
|
|
exit(); |
|
|
|
|
294
|
|
|
} |
295
|
|
|
if ( $t['mainsnak']['datavalue']['value']['language'] == $this->lang |
296
|
|
|
&& !empty( $t['mainsnak']['datavalue']['value']['text'] ) |
297
|
|
|
) { |
298
|
|
|
return $t['mainsnak']['datavalue']['value']['text']; |
299
|
|
|
} |
300
|
|
|
} |
301
|
|
|
} |
302
|
|
|
return false; |
303
|
|
|
} |
304
|
|
|
|
305
|
|
|
/** |
306
|
|
|
* Literal data field for a quantity that relates to some kind of well-defined unit. The actual unit goes in the data values that is entered. |
307
|
|
|
* - amount – implicit part of the string (mapping of unit prefix is unclear) |
308
|
|
|
* - unit – implicit part of the string that defaults to "1" (mapping to standardizing body is unclear) |
309
|
|
|
* - upperbound - quantity's upper bound |
310
|
|
|
* - lowerbound - quantity's lower bound |
311
|
|
|
* @param $property |
312
|
|
|
* @return mixed[]|bool If it's not false it's an array with 'amount', 'unit', etc. |
313
|
|
|
*/ |
314
|
|
|
public function getPropertyOfTypeQuantity( $property ) { |
315
|
|
|
$quantities = []; |
316
|
|
|
$entity = $this->getEntity( $this->id ); |
317
|
|
|
if ( !isset( $entity['claims'][$property] ) ) { |
318
|
|
|
return false; |
319
|
|
|
} |
320
|
|
|
foreach ( $entity['claims'][$property] as $t ) { |
321
|
|
|
$quantity = $t['mainsnak']['datavalue']['value']; |
322
|
|
|
$unitId = substr( $quantity['unit'], strlen( $this->wikidataUrlBase ) + 1 ); |
323
|
|
|
$quantity['unit'] = self::factory( $unitId, $this->lang, $this->cache ); |
324
|
|
|
$quantities[] = $quantity; |
325
|
|
|
} |
326
|
|
|
return $quantities; |
327
|
|
|
} |
328
|
|
|
|
329
|
|
|
/** |
330
|
|
|
* Set a single-valued text property. |
331
|
|
|
* @param string $property One of the PROP_* constants. |
332
|
|
|
* @param string $value The value. |
333
|
|
|
*/ |
334
|
|
|
public function setPropertyOfTypeText( $property, $value ) { |
335
|
|
|
// First see if this property already exists, and that it is different from what's being set. |
336
|
|
|
$entity = $this->getEntity( $this->id ); |
337
|
|
|
if ( !empty( $entity['claims'][$property] ) ) { |
338
|
|
|
// Find this language's claim (if there is one). |
339
|
|
|
foreach ( $entity['claims'][$property] as $claim ) { |
340
|
|
|
if ( $claim['mainsnak']['datavalue']['value']['language'] == $this->lang ) { |
341
|
|
|
// Modify this claim's text value. |
342
|
|
|
$titleClaim = $claim; |
343
|
|
|
$titleClaim['mainsnak']['datavalue']['value']['text'] = $value; |
344
|
|
|
$setTitleParams = [ |
345
|
|
|
'action' => 'wbsetclaim', |
346
|
|
|
'claim' => \GuzzleHttp\json_encode( $titleClaim ), |
347
|
|
|
]; |
348
|
|
|
continue; |
349
|
|
|
} |
350
|
|
|
} |
351
|
|
|
} |
352
|
|
|
|
353
|
|
|
// If no claim was found (and modified) above, create a new claim. |
354
|
|
|
if ( !isset( $setTitleParams ) ) { |
355
|
|
|
$setTitleParams = [ |
356
|
|
|
'action' => 'wbcreateclaim', |
357
|
|
|
'entity' => $this->getId(), |
358
|
|
|
'property' => $property, |
359
|
|
|
'snaktype' => 'value', |
360
|
|
|
'value' => \GuzzleHttp\json_encode( [ 'text' => $value, 'language' => $this->lang ] ), |
361
|
|
|
]; |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
// Save the property. |
365
|
|
|
$wdWpOauth = new WdWpOauth(); |
|
|
|
|
366
|
|
|
$wdWpOauth->makeCall( $setTitleParams, true ); |
367
|
|
|
|
368
|
|
|
// Clear the cache. |
369
|
|
|
$this->cache->deleteItem( $this->getEntityCacheKey( $this->id ) ); |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
/** |
373
|
|
|
* Does this item exist? |
374
|
|
|
* @return bool |
375
|
|
|
*/ |
376
|
|
|
public function exists() { |
377
|
|
|
return $this->getId() !== false; |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
public function getWikipediaIntro() { |
381
|
|
|
$cacheKey = 'wikipedia-intro-' . $this->id . $this->lang; |
382
|
|
|
if ( $this->cache->hasItem( $cacheKey ) ) { |
383
|
|
|
return $this->cache->getItem( $cacheKey )->get(); |
384
|
|
|
} |
385
|
|
|
$entity = $this->getEntity( $this->id ); |
386
|
|
|
if ( !isset( $entity['sitelinks'] ) ) { |
387
|
|
|
return []; |
388
|
|
|
} |
389
|
|
|
foreach ( $entity['sitelinks'] as $sitelink ) { |
390
|
|
|
if ( $sitelink['site'] == $this->lang . 'wiki' ) { |
391
|
|
|
$api = new MediawikiApi( 'https://' . $this->lang . '.wikipedia.org/w/api.php' ); |
392
|
|
|
$req = new SimpleRequest( 'query', [ |
393
|
|
|
'prop' => 'extracts', |
394
|
|
|
'exintro' => true, |
395
|
|
|
'titles' => $sitelink['title'], |
396
|
|
|
] ); |
397
|
|
|
$response = $api->getRequest( $req ); |
398
|
|
|
$page = array_shift( $response['query']['pages'] ); |
399
|
|
|
$out = [ |
400
|
|
|
'title' => $page['title'], |
401
|
|
|
'html' => $page['extract'], |
402
|
|
|
]; |
403
|
|
|
$cacheItem = $this->cache->getItem( $cacheKey ) |
404
|
|
|
->expiresAfter( new DateInterval( 'P1D' ) ) |
405
|
|
|
->set( $out ); |
406
|
|
|
$this->cache->save( $cacheItem ); |
407
|
|
|
|
408
|
|
|
return $out; |
409
|
|
|
} |
410
|
|
|
} |
411
|
|
|
|
412
|
|
|
return []; |
413
|
|
|
} |
414
|
|
|
|
415
|
|
|
/** |
416
|
|
|
* Get the raw entity data from the 'wbgetentities' API call. |
417
|
|
|
* @param string $id The Q-number. |
418
|
|
|
* @param bool $ignoreCache |
419
|
|
|
* @return array|bool |
420
|
|
|
*/ |
421
|
1 |
|
public function getEntity( $id = null, $ignoreCache = false ) { |
422
|
1 |
|
$idActual = $id ?: $this->id; |
423
|
1 |
|
$cacheKey = $this->getEntityCacheKey( $idActual ); |
424
|
1 |
|
if ( !$ignoreCache && $this->cache->hasItem( $cacheKey ) ) { |
425
|
|
|
return $this->cache->getItem( $cacheKey )->get(); |
426
|
|
|
} |
427
|
1 |
|
$metadataRequest = new SimpleRequest( 'wbgetentities', [ 'ids' => $idActual ] ); |
428
|
1 |
|
$itemResult = $this->wdApi->getRequest( $metadataRequest ); |
429
|
1 |
|
if ( !isset( $itemResult['success'] ) || !isset( $itemResult['entities'][$id] ) ) { |
430
|
|
|
return false; |
431
|
|
|
} |
432
|
1 |
|
$metadata = $itemResult['entities'][$idActual]; |
433
|
1 |
|
$cacheItem = $this->cache->getItem( $cacheKey ) |
434
|
1 |
|
->expiresAfter( new DateInterval( 'PT10M' ) ) |
435
|
1 |
|
->set( $metadata ); |
436
|
1 |
|
$this->cache->save( $cacheItem ); |
437
|
1 |
|
return $metadata; |
438
|
|
|
} |
439
|
|
|
|
440
|
|
|
/** |
441
|
|
|
* @param $id |
442
|
|
|
* |
443
|
|
|
* @return string |
444
|
|
|
*/ |
445
|
1 |
|
protected function getEntityCacheKey( $id ) { |
446
|
1 |
|
return 'entities' . $id; |
447
|
|
|
} |
448
|
|
|
} |
449
|
|
|
|