Passed
Push — master ( aeb35e...5f0582 )
by Sam
04:41
created

Item::getPropertyOfTypeExternalIdentifier()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 28
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 0
Metric Value
cc 7
eloc 19
nc 7
nop 2
dl 0
loc 28
ccs 0
cts 20
cp 0
crap 56
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
namespace Samwilson\SimpleWikidata;
4
5
use DateInterval;
6
use Exception;
7
use Mediawiki\Api\MediawikiApi;
8
use Mediawiki\Api\SimpleRequest;
9
use Nayjest\StrCaseConverter\Str;
10
use Psr\Cache\CacheItemPoolInterface;
11
use Samwilson\SimpleWikidata\Properties\Time;
12
use Symfony\Component\DomCrawler\Crawler;
13
14
class Item {
15
16
	const PROP_INSTANCE_OF = 'P31';
17
	const PROP_TITLE = 'P1476';
18
	const PROP_IMAGE = 'P18';
19
	const PROP_AUTHOR = 'P50';
20
21
	/** @var string */
22
	protected $id;
23
24
	/** @var MediawikiApi */
25
	protected $wdApi;
26
27
	/** @var string */
28
	protected $lang;
29
30
	/** @var CacheItemPoolInterface */
31
	protected $cache;
32
33
	/** @var string The base URL of Wikidata, with trailing slash. */
34
	protected $wikidataUrlBase = 'https://www.wikidata.org/wiki/';
35
36 1
	private function __construct( $id, $lang, CacheItemPoolInterface $cache ) {
37 1
		if ( !is_string( $id ) || preg_match( '/[QP][0-9]*/i', $id ) !== 1 ) {
38
			throw new Exception( "Not a valid ID: " . var_export( $id, true ) );
39
		}
40 1
		$this->id = $id;
41 1
		$this->wdApi = new MediawikiApi( 'https://www.wikidata.org/w/api.php' );
42 1
		$this->entities = [];
0 ignored issues
show
Bug Best Practice introduced by
The property entities does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
43 1
		$this->lang = $lang;
44 1
		$this->cache = $cache;
45 1
	}
46
47
	/**
48
	 * Create a new Item object with class based on the item's 'instance of' statement.
49
	 *
50
	 * @param string $id The item ID (Q-number).
51
	 * @param string $lang The language code.
52
	 * @param CacheItemPoolInterface $cache The cache to use.
53
	 * @return Item
54
	 */
55 1
	public static function factory( $id, $lang, CacheItemPoolInterface $cache ) {
56 1
		$item = new Item( $id, $lang, $cache );
57 1
		foreach ( $item->getPropertyOfTypeItem( self::PROP_INSTANCE_OF ) as $instanceOf ) {
58
			// Try to find a class mating the 'instance of' name.
59 1
			$possibleBaseClassName = Str::toCamelCase( $instanceOf->getItem()->getLabel() );
60 1
			$possibleClassName = __NAMESPACE__ . '\\Items\\' . $possibleBaseClassName;
61 1
			if ( class_exists( $possibleClassName ) ) {
62
				// This won't re-request the metadata, because that's cached.
63
				$specificItem = new $possibleClassName( $id, $lang, $cache );
64 1
				return $specificItem;
65
			}
66
		}
67
68
		// If we're here, just leave it as a basic Item.
69 1
		$item->setCache( $cache );
70 1
		return $item;
71
	}
72
73
	/**
74
	 * @param CacheItemPoolInterface $cache The cache to use.
75
	 */
76 1
	public function setCache( CacheItemPoolInterface $cache ) {
77 1
		$this->cache = $cache;
78 1
	}
79
80
	/**
81
	 * Get the ID (Q-number) of this item.
82
	 * @return string|bool The ID or false if it couldn't be determined.
83
	 */
84 1
	public function getId() {
85 1
		$entity = $this->getEntity( $this->id );
86 1
		return isset( $entity['id'] ) ? $entity['id'] : false;
87
	}
88
89
	/**
90
	 * Get this item's label.
91
	 * @return string
92
	 */
93 1
	public function getLabel() {
94 1
		$entity = $this->getEntity( $this->id );
95 1
		if ( ! empty( $entity['labels'][ $this->lang ]['value'] ) ) {
96
			// Use the label if there is one.
97 1
			return $entity['labels'][ $this->lang ]['value'];
98
		}
99
		// Or just use the ID.
100 1
		return $entity['id'];
101
	}
102
103
	/**
104
	 * @return string The Wikidata.org URL for this item.
105
	 */
106 1
	public function getWikidataUrl() {
107 1
		return $this->wikidataUrlBase.$this->id;
108
	}
109
110
	/**
111
	 * Wikiprojects list their properties like this:
112
	 *
113
	 *     {{List of properties/Header}}
114
	 *     {{List of properties/Row|id=31|example-subject=Q923767|example-object=Q3331189}}
115
	 *     </table>
116
	 *
117
	 * @param string $wikiProject The name of the WikiProject (must exist as a Wikidata page e.g.
118
	 * [[Wikidata:$wikiProject]]).
119
	 * @param string $type
120
	 * @return array
121
	 */
122
	public function getStandardProperties( $wikiProject = 'WikiProject_Books', $type = 'work' ) {
123
		if ( $type !== 'work' ) {
124
			$type = 'edition';
125
		}
126
		$cacheKey = $type . '_item_property_IDs';
127
		if ( $this->cache->hasItem( $cacheKey ) ) {
128
			$propIds = $this->cache->getItem( $cacheKey )->get();
129
		} else {
130
			$domCrawler = new Crawler();
131
			$wikiProjectUrl = 'https://www.wikidata.org/wiki/Wikidata:' . $wikiProject;
132
			$domCrawler->addHtmlContent( file_get_contents( $wikiProjectUrl ) );
133
			$propAncors = "//h3/span[@id='" . ucfirst( $type ) . "_item_properties']/../following-sibling::table[1]//td[2]/a";
134
			$propCells = $domCrawler->filterXPath( $propAncors );
135
			$propIds = [];
136
			$propCells->each( function ( Crawler $node, $i ) use ( &$propIds ) {
137
				$propId = $node->text();
138
				$propIds[] = $propId;
139
			} );
140
			$cacheItem = $this->cache->getItem( $cacheKey )
141
				->expiresAfter( new DateInterval( 'PT1H' ) )
142
				->set( $propIds );
143
			$this->cache->save( $cacheItem );
144
		}
145
		$workProperties = [];
146
		foreach ( $propIds as $propId ) {
147
			$workProperties[] = self::factory( $propId, $this->lang, $this->cache );
148
		}
149
150
		return $workProperties;
151
	}
152
153
	/**
154
	 * @param string $propertyId
155
	 * @return bool|Time[]
156
	 */
157
	public function getPropertyOfTypeTime( $propertyId ) {
158
		$times = [];
159
		$entity = $this->getEntity();
160
		if ( !isset( $entity['claims'][$propertyId] ) ) {
161
			// No statements for this property.
162
			return $times;
163
		}
164
		// print_r($entity['claims'][$propertyId]);exit();
165
		foreach ( $entity['claims'][$propertyId] as $claim ) {
166
			// print_r($claim);
167
			$times[] = new Time( $claim, $this->lang, $this->cache );
168
//
169
// $timeValue = $claim['datavalue']['value']['time'];
170
// // Ugly workaround for imprecise dates. :-(
171
// if (preg_match('/([0-9]{1,4})-00-00/', $timeValue, $matches) === 1) {
172
// $timeValue = $matches[1];
173
// return $timeValue;
174
// }
175
// $time = strtotime($timeValue);
176
// return date($dateFormat, $time);
177
			// }
178
		}
179
		return $times;
180
	}
181
182
	/**
183
	 * Get the Item that is referred to by the specified item's property.
184
	 *
185
	 * @param string $propertyId
186
	 *
187
	 * @return \Samwilson\SimpleWikidata\Properties\Item[]
188
	 */
189 1
	public function getPropertyOfTypeItem( $propertyId ) {
190 1
		$entity = $this->getEntity( $this->id );
191 1
		if ( !isset( $entity['claims'][$propertyId] ) ) {
192 1
			return [];
193
		}
194 1
		$items = [];
195 1
		foreach ( $entity['claims'][$propertyId] as $claim ) {
196 1
			$items[] = new Properties\Item( $claim, $this->lang, $this->cache );
197
		}
198
199 1
		return $items;
200
	}
201
202
	public function setPropertyOfTypeItem( $property, $itemId ) {
203
		$itemIdNumeric = substr( $itemId, 1 );
204
205
		// First see if this property already exists, and that it is different from what's being set.
206
		$entity = $this->getEntity( $this->id );
207
		if ( !empty( $entity['claims'][$property] ) ) {
208
			// Get the first claim, and update it if necessary.
209
			$claim = array_shift( $entity['claims'][$property] );
210
			if ( $claim['mainsnak']['datavalue']['value']['id'] == $itemId ) {
211
				// Already is the required value, no need to change.
212
				return;
213
			}
214
			$claim['mainsnak']['datavalue']['value']['id'] = $itemId;
215
			$claim['mainsnak']['datavalue']['value']['numeric-id'] = $itemIdNumeric;
216
			$apiParams = [
217
				'action' => 'wbsetclaim',
218
				'claim' => json_encode( $claim ),
219
			];
220
		}
221
222
		// If no claim was found (and modified) above, create a new claim.
223
		if ( !isset( $apiParams ) ) {
224
			$apiParams = [
0 ignored issues
show
Unused Code introduced by
The assignment to $apiParams is dead and can be removed.
Loading history...
225
				'action' => 'wbcreateclaim',
226
				'entity' => $this->getId(),
227
				'property' => $property,
228
				'snaktype' => 'value',
229
				'value' => json_encode( [ 'entity-type' => 'item', 'numeric-id' => $itemIdNumeric ] ),
230
			];
231
		}
232
233
		// @TODO Save the property.
234
235
		// Clear the cache.
236
		$this->cache->deleteItem( $this->getEntityCacheKey( $this->id ) );
237
	}
238
239
	public function getPropertyOfTypeUrl( $entityId, $propertyId ) {
240
		$entity = $this->getEntity( $entityId );
241
		if ( !isset( $entity['claims'][$propertyId] ) ) {
242
			return false;
243
		}
244
		$urls = [];
245
		foreach ( $entity['claims'][$propertyId] as $claim ) {
246
			$urls[] = $claim['mainsnak']['datavalue']['value'];
247
		}
248
249
		return $urls;
250
	}
251
252
	public function getPropertyOfTypeExternalIdentifier( $entityId, $propertyId ) {
253
		$entity = $this->getEntity( $entityId );
254
		if ( !isset( $entity['claims'][$propertyId] ) ) {
255
			return false;
256
		}
257
		$idents = [];
258
		foreach ( $entity['claims'][$propertyId] as $claim ) {
259
			$qualifiers = [];
260
			if ( !isset( $claim['qualifiers'] ) ) {
261
				continue;
262
			}
263
			foreach ( $claim['qualifiers'] as $qualsInfo ) {
264
				foreach ( $qualsInfo as $qualInfo ) {
265
					$qualProp = self::factory( $qualInfo['property'], $this->lang, $this->cache );
266
					$propLabel = $qualProp->getLabel();
267
					if ( !isset( $qualifiers[$propLabel] ) ) {
268
						$qualifiers[$propLabel] = [];
269
					}
270
					$qualifiers[$propLabel][] = $qualInfo['datavalue']['value'];
271
				}
272
			}
273
			$idents[] = [
274
				'qualifiers' => $qualifiers,
275
				'value' => $claim['mainsnak']['datavalue']['value'],
276
			];
277
		}
278
279
		return $idents;
280
	}
281
282
	/**
283
	 * Get a single-valued text property.
284
	 * @param string $property One of the PROP_* constants.
285
	 * @return string|bool The value, or false if it can't be found.
286
	 */
287
	public function getPropertyOfTypeText( $property ) {
288
		$entity = $this->getEntity( $this->id );
289
		if ( isset( $entity['claims'][$property] ) ) {
290
			// Use the first title.
291
			foreach ( $entity['claims'][$property] as $t ) {
292
				if ( !isset( $t['mainsnak']['datavalue']['value']['language'] ) ) {
293
					var_dump( $t['mainsnak']['datavalue']['value'] );
0 ignored issues
show
Security Debugging Code introduced by
var_dump($t['mainsnak']['datavalue']['value']) looks like debug code. Are you sure you do not want to remove it?
Loading history...
294
					exit();
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
295
				}
296
				if ( $t['mainsnak']['datavalue']['value']['language'] == $this->lang
297
					&& !empty( $t['mainsnak']['datavalue']['value']['text'] )
298
				) {
299
					return $t['mainsnak']['datavalue']['value']['text'];
300
				}
301
			}
302
		}
303
		return false;
304
	}
305
306
	/**
307
	 * Literal data field for a quantity that relates to some kind of well-defined unit. The actual unit goes in the data values that is entered.
308
	 *   - amount – implicit part of the string (mapping of unit prefix is unclear)
309
	 *   - unit – implicit part of the string that defaults to "1" (mapping to standardizing body is unclear)
310
	 *   - upperbound - quantity's upper bound
311
	 *   - lowerbound - quantity's lower bound
312
	 * @param $property
313
	 * @return mixed[]|bool If it's not false it's an array with 'amount', 'unit', etc.
314
	 */
315
	public function getPropertyOfTypeQuantity( $property ) {
316
		$quantities = [];
317
		$entity = $this->getEntity( $this->id );
318
		if ( !isset( $entity['claims'][$property] ) ) {
319
			return false;
320
		}
321
		foreach ( $entity['claims'][$property] as $t ) {
322
			$quantity = $t['mainsnak']['datavalue']['value'];
323
			$unitId = substr( $quantity['unit'], strlen( $this->wikidataUrlBase ) + 1 );
324
			$quantity['unit'] = self::factory( $unitId, $this->lang, $this->cache );
325
			$quantities[] = $quantity;
326
		}
327
		return $quantities;
328
	}
329
330
	/**
331
	 * Set a single-valued text property.
332
	 * @param string $property One of the PROP_* constants.
333
	 * @param string $value The value.
334
	 */
335
	public function setPropertyOfTypeText( $property, $value ) {
336
		// First see if this property already exists, and that it is different from what's being set.
337
		$entity = $this->getEntity( $this->id );
338
		if ( !empty( $entity['claims'][$property] ) ) {
339
			// Find this language's claim (if there is one).
340
			foreach ( $entity['claims'][$property] as $claim ) {
341
				if ( $claim['mainsnak']['datavalue']['value']['language'] == $this->lang ) {
342
					// Modify this claim's text value.
343
					$titleClaim = $claim;
344
					$titleClaim['mainsnak']['datavalue']['value']['text'] = $value;
345
					$setTitleParams = [
346
						'action' => 'wbsetclaim',
347
						'claim' => \GuzzleHttp\json_encode( $titleClaim ),
348
					];
349
					continue;
350
				}
351
			}
352
		}
353
354
		// If no claim was found (and modified) above, create a new claim.
355
		if ( !isset( $setTitleParams ) ) {
356
			$setTitleParams = [
357
				'action' => 'wbcreateclaim',
358
				'entity' => $this->getId(),
359
				'property' => $property,
360
				'snaktype' => 'value',
361
				'value' => \GuzzleHttp\json_encode( [ 'text' => $value, 'language' => $this->lang ] ),
362
			];
363
		}
364
365
		// Save the property.
366
		$wdWpOauth = new WdWpOauth();
0 ignored issues
show
Bug introduced by
The type Samwilson\SimpleWikidata\WdWpOauth was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
367
		$wdWpOauth->makeCall( $setTitleParams, true );
368
369
		// Clear the cache.
370
		$this->cache->deleteItem( $this->getEntityCacheKey( $this->id ) );
371
	}
372
373
	public function getInstanceOf() {
374
		$instancesOf = $this->getPropertyOfTypeItem( $this->getId(), self::PROP_INSTANCE_OF );
0 ignored issues
show
Unused Code introduced by
The call to Samwilson\SimpleWikidata...getPropertyOfTypeItem() has too many arguments starting with self::PROP_INSTANCE_OF. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

374
		/** @scrutinizer ignore-call */ 
375
  $instancesOf = $this->getPropertyOfTypeItem( $this->getId(), self::PROP_INSTANCE_OF );

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
375
		return array_shift( $instancesOf );
376
	}
377
378
	/**
379
	 * Does this item exist?
380
	 * @return bool
381
	 */
382
	public function exists() {
383
		return $this->getId() !== false;
384
	}
385
386
	public function getWikipediaIntro() {
387
		$cacheKey = 'wikipedia-intro-' . $this->id . $this->lang;
388
		if ( $this->cache->hasItem( $cacheKey ) ) {
389
			return $this->cache->getItem( $cacheKey )->get();
390
		}
391
		$entity = $this->getEntity( $this->id );
392
		if ( !isset( $entity['sitelinks'] ) ) {
393
			return [];
394
		}
395
		foreach ( $entity['sitelinks'] as $sitelink ) {
396
			if ( $sitelink['site'] == $this->lang . 'wiki' ) {
397
				$api = new MediawikiApi( 'https://' . $this->lang . '.wikipedia.org/w/api.php' );
398
				$req = new SimpleRequest( 'query', [
399
					'prop' => 'extracts',
400
					'exintro' => true,
401
					'titles' => $sitelink['title'],
402
				] );
403
				$response = $api->getRequest( $req );
404
				$page = array_shift( $response['query']['pages'] );
405
				$out = [
406
					'title' => $page['title'],
407
					'html' => $page['extract'],
408
				];
409
				$cacheItem = $this->cache->getItem( $cacheKey )
410
					->expiresAfter( new DateInterval( 'P1D' ) )
411
					->set( $out );
412
				$this->cache->save( $cacheItem );
413
414
				return $out;
415
			}
416
		}
417
418
		return [];
419
	}
420
421
	/**
422
	 * Get the raw entity data from the 'wbgetentities' API call.
423
	 * @param string $id
424
	 * @param bool $ignoreCache
425
	 * @return bool
426
	 */
427 1
	public function getEntity( $id = null, $ignoreCache = false ) {
428 1
		$idActual = $id ?: $this->id;
429 1
		$cacheKey = $this->getEntityCacheKey( $idActual );
430 1
		if ( !$ignoreCache && $this->cache->hasItem( $cacheKey ) ) {
431
			return $this->cache->getItem( $cacheKey )->get();
432
		}
433 1
		$metadataRequest = new SimpleRequest( 'wbgetentities', [ 'ids' => $idActual ] );
434 1
		$itemResult = $this->wdApi->getRequest( $metadataRequest );
435 1
		if ( !isset( $itemResult['success'] ) || !isset( $itemResult['entities'][$id] ) ) {
436
			return false;
437
		}
438 1
		$metadata = $itemResult['entities'][$idActual];
439 1
		$cacheItem = $this->cache->getItem( $cacheKey )
440 1
			->expiresAfter( new DateInterval( 'PT10M' ) )
441 1
			->set( $metadata );
442 1
		$this->cache->save( $cacheItem );
443 1
		return $metadata;
444
	}
445
446
	/**
447
	 * @param $id
448
	 *
449
	 * @return string
450
	 */
451 1
	protected function getEntityCacheKey( $id ) {
452 1
		return 'entities' . $id;
453
	}
454
}
455