EntityDataRequestHandler::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 31
rs 9.424
c 0
b 0
f 0
cc 1
nc 1
nop 14

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace Wikibase\Repo\LinkedData;
4
5
use HtmlCacheUpdater;
6
use HttpError;
7
use OutputPage;
8
use Psr\Log\LoggerInterface;
9
use WebRequest;
10
use WebResponse;
11
use Wikibase\DataModel\Entity\EntityId;
12
use Wikibase\DataModel\Entity\EntityIdParser;
13
use Wikibase\DataModel\Entity\EntityIdParsingException;
14
use Wikibase\DataModel\Entity\EntityRedirect;
15
use Wikibase\DataModel\Services\Lookup\EntityRedirectLookup;
16
use Wikibase\DataModel\Services\Lookup\EntityRedirectLookupException;
17
use Wikibase\Lib\Store\BadRevisionException;
18
use Wikibase\Lib\Store\EntityRevision;
19
use Wikibase\Lib\Store\EntityRevisionLookup;
20
use Wikibase\Lib\Store\EntityTitleLookup;
21
use Wikibase\Lib\Store\RedirectRevision;
22
use Wikibase\Lib\Store\RevisionedUnresolvedRedirectException;
23
use Wikibase\Lib\Store\StorageException;
24
use Wikimedia\Http\HttpAcceptNegotiator;
25
use Wikimedia\Http\HttpAcceptParser;
26
27
/**
28
 * Request handler implementing a linked data interface for Wikibase entities.
29
 *
30
 * @license GPL-2.0-or-later
31
 * @author Daniel Kinzler
32
 * @author Thomas Pellissier Tanon
33
 * @author Anja Jentzsch < [email protected] >
34
 */
35
class EntityDataRequestHandler {
36
37
	/**
38
	 * Allowed smallest and biggest number of seconds for the "max-age=..." and "s-maxage=..." cache
39
	 * control parameters.
40
	 *
41
	 * @todo Hard maximum could be configurable somehow.
42
	 */
43
	const MINIMUM_MAX_AGE = 0;
44
	const MAXIMUM_MAX_AGE = 2678400; // 31 days
45
46
	/**
47
	 * @var EntityDataSerializationService
48
	 */
49
	private $serializationService;
50
51
	/**
52
	 * @var EntityDataUriManager
53
	 */
54
	private $uriManager;
55
56
	/**
57
	 * @var EntityIdParser
58
	 */
59
	private $entityIdParser;
60
61
	/**
62
	 * @var EntityRevisionLookup
63
	 */
64
	private $entityRevisionLookup;
65
66
	/**
67
	 * @var EntityRedirectLookup
68
	 */
69
	private $entityRedirectLookup;
70
71
	/**
72
	 * @var EntityTitleLookup
73
	 */
74
	private $entityTitleLookup;
75
76
	/**
77
	 * @var EntityDataFormatProvider
78
	 */
79
	private $entityDataFormatProvider;
80
81
	/**
82
	 * @var HtmlCacheUpdater
83
	 */
84
	private $htmlCacheUpdater;
85
86
	/**
87
	 * @var LoggerInterface
88
	 */
89
	private $logger;
90
91
	/**
92
	 * @var string
93
	 */
94
	private $defaultFormat;
95
96
	/**
97
	 * @var int Number of seconds to cache entity data.
98
	 */
99
	private $maxAge;
100
101
	/**
102
	 * @var bool
103
	 */
104
	private $useCdn;
105
106
	/**
107
	 * @var string|null
108
	 */
109
	private $frameOptionsHeader;
110
111
	/**
112
	 * @var string[]
113
	 */
114
	private $entityTypesWithoutRdfOutput;
115
116
	/**
117
	 * @param EntityDataUriManager $uriManager
118
	 * @param HtmlCacheUpdater $htmlCacheUpdater
119
	 * @param EntityTitleLookup $entityTitleLookup
120
	 * @param EntityIdParser $entityIdParser
121
	 * @param EntityRevisionLookup $entityRevisionLookup
122
	 * @param EntityRedirectLookup $entityRedirectLookup
123
	 * @param EntityDataSerializationService $serializationService
124
	 * @param EntityDataFormatProvider $entityDataFormatProvider
125
	 * @param LoggerInterface $logger
126
	 * @param string[] $entityTypesWithoutRdfOutput
127
	 * @param string $defaultFormat The format as a file extension or MIME type.
128
	 * @param int $maxAge number of seconds to cache entity data
129
	 * @param bool $useCdn do we have web caches configured?
130
	 * @param string|null $frameOptionsHeader for X-Frame-Options
131
	 */
132
	public function __construct(
133
		EntityDataUriManager $uriManager,
134
		HtmlCacheUpdater $htmlCacheUpdater,
135
		EntityTitleLookup $entityTitleLookup,
136
		EntityIdParser $entityIdParser,
137
		EntityRevisionLookup $entityRevisionLookup,
138
		EntityRedirectLookup $entityRedirectLookup,
139
		EntityDataSerializationService $serializationService,
140
		EntityDataFormatProvider $entityDataFormatProvider,
141
		LoggerInterface $logger,
142
		array $entityTypesWithoutRdfOutput,
143
		$defaultFormat,
144
		$maxAge,
145
		$useCdn,
146
		$frameOptionsHeader
147
	) {
148
		$this->uriManager = $uriManager;
149
		$this->htmlCacheUpdater = $htmlCacheUpdater;
150
		$this->entityTitleLookup = $entityTitleLookup;
151
		$this->entityIdParser = $entityIdParser;
152
		$this->entityRevisionLookup = $entityRevisionLookup;
153
		$this->entityRedirectLookup = $entityRedirectLookup;
154
		$this->serializationService = $serializationService;
155
		$this->entityDataFormatProvider = $entityDataFormatProvider;
156
		$this->logger = $logger;
157
		$this->entityTypesWithoutRdfOutput = $entityTypesWithoutRdfOutput;
158
		$this->defaultFormat = $defaultFormat;
159
		$this->maxAge = $maxAge;
160
		$this->useCdn = $useCdn;
161
		$this->frameOptionsHeader = $frameOptionsHeader;
162
	}
163
164
	/**
165
	 * Checks whether the request is complete, i.e. whether it contains all information needed
166
	 * to reply with entity data.
167
	 *
168
	 * This does not check whether the request is valid and will actually produce a successful
169
	 * response.
170
	 *
171
	 * @param string|null $doc Document name, e.g. Q5 or Q5.json or Q5:33.xml
172
	 * @param WebRequest $request
173
	 *
174
	 * @return bool
175
	 */
176
	public function canHandleRequest( $doc, WebRequest $request ) {
177
		if ( $doc === '' || $doc === null ) {
178
			if ( $request->getText( 'id', '' ) === '' ) {
179
				return false;
180
			}
181
		}
182
183
		return true;
184
	}
185
186
	/**
187
	 * Main method for handling requests.
188
	 *
189
	 * @param string $doc Document name, e.g. Q5 or Q5.json or Q5:33.xml
190
	 * @param WebRequest $request The request parameters. Known parameters are:
191
	 *        - id: the entity ID
192
	 *        - format: the format
193
	 *        - oldid|revision: the revision ID
194
	 *        - action=purge: to purge cached data from (web) caches
195
	 * @param OutputPage $output
196
	 *
197
	 * @note Instead of an output page, a WebResponse could be sufficient, but
198
	 *        redirect logic is currently implemented in OutputPage.
199
	 *
200
	 * @throws HttpError
201
	 * @suppress SecurityCheck-DoubleEscaped
202
	 */
203
	public function handleRequest( $doc, WebRequest $request, OutputPage $output ) {
204
		// No matter what: The response is always public
205
		$output->getRequest()->response()->header( 'Access-Control-Allow-Origin: *' );
206
207
		$revision = 0;
208
209
		list( $id, $format ) = $this->uriManager->parseDocName( $doc );
210
211
		// get entity id and format from request parameter
212
		$format = $request->getText( 'format', $format );
213
		$id = $request->getText( 'id', $id );
214
		$revision = $request->getInt( 'revision', $revision );
215
		$redirectMode = $request->getText( 'redirect' );
216
		//TODO: malformed revision IDs should trigger a code 400
217
218
		// If there is no ID, fail
219
		if ( $id === null || $id === '' ) {
220
			//TODO: different error message?
221
			throw new HttpError( 400, $output->msg( 'wikibase-entitydata-bad-id', $id ) );
222
		}
223
224
		try {
225
			$entityId = $this->entityIdParser->parse( $id );
226
		} catch ( EntityIdParsingException $ex ) {
227
			throw new HttpError( 400, $output->msg( 'wikibase-entitydata-bad-id', $id ) );
228
		}
229
230
		if ( $this->entityDataFormatProvider->isRdfFormat( $format ) &&
231
			in_array( $entityId->getEntityType(), $this->entityTypesWithoutRdfOutput )
232
		) {
233
			throw new HttpError( 406, $output->msg( 'wikibase-entitydata-rdf-not-available', $entityId->getEntityType() ) );
234
		}
235
236
		//XXX: allow for logged in users only?
237
		if ( $request->getText( 'action' ) === 'purge' ) {
238
			$this->purgeWebCache( $entityId, $revision );
239
			//XXX: Now what? Proceed to show the data?
240
		}
241
242
		if ( $format === null || $format === '' ) {
243
			// if no format is given, apply content negotiation and return.
244
			$this->httpContentNegotiation( $request, $output, $entityId, $revision );
245
			return;
246
		}
247
248
		//NOTE: will trigger a 415 if the format is not supported
249
		$format = $this->getCanonicalFormat( $format );
250
251
		if ( $doc !== null && $doc !== '' ) {
252
			// if subpage syntax is used, always enforce the canonical form
253
			$canonicalDoc = $this->uriManager->getDocName( $entityId, $format );
254
255
			if ( $doc !== $canonicalDoc ) {
256
				$url = $this->uriManager->getDocUrl( $entityId, $format, $revision );
257
				$output->redirect( $url, 301 );
258
				return;
259
			}
260
		}
261
262
		// if the format is HTML, redirect to the entity's wiki page
263
		if ( $format === 'html' ) {
264
			$url = $this->uriManager->getDocUrl( $entityId, 'html', $revision );
265
			$output->redirect( $url, 303 );
266
			return;
267
		}
268
269
		// if redirection was force, redirect
270
		if ( $redirectMode === 'force' ) {
271
			$url = $this->uriManager->getDocUrl( $entityId, $format, $revision );
272
			$output->redirect( $url, 303 );
273
			return;
274
		}
275
276
		$this->showData( $request, $output, $format, $entityId, $revision );
277
	}
278
279
	/**
280
	 * Returns the canonical format name for the given format.
281
	 *
282
	 * @param string $format
283
	 *
284
	 * @return string
285
	 * @throws HttpError code 415 if the format is not supported.
286
	 */
287
	public function getCanonicalFormat( $format ) {
288
		$format = strtolower( $format );
289
290
		// we always support html, it's handled by the entity's wiki page.
291
		if ( $format === 'html' || $format === 'htm' || $format === 'text/html' ) {
292
			return 'html';
293
		}
294
295
		// normalize format name (note that HTML may not be known to the service)
296
		$canonicalFormat = $this->entityDataFormatProvider->getFormatName( $format );
297
298
		if ( $canonicalFormat === null ) {
299
			$msg = wfMessage( 'wikibase-entitydata-unsupported-format', $format );
300
			// @phan-suppress-next-line SecurityCheck-DoubleEscaped
301
			throw new HttpError( 415, $msg );
302
		}
303
304
		return $canonicalFormat;
305
	}
306
307
	/**
308
	 * Purges the entity data identified by the doc parameter from any HTTP caches.
309
	 * Does nothing if $wgUseCdn is not set.
310
	 *
311
	 * @param EntityId $id The entity ID for which to purge all data.
312
	 * @param int $revision The revision ID (0 for current/unspecified)
313
	 */
314
	public function purgeWebCache( EntityId $id, int $revision ) {
315
		$urls = $this->uriManager->getPotentiallyCachedUrls( $id, $revision );
316
		if ( $urls !== [] ) {
317
			$this->htmlCacheUpdater->purgeUrls( $urls );
318
		}
319
	}
320
321
	/**
322
	 * Applies HTTP content negotiation.
323
	 * If the negotiation is successfull, this method will set the appropriate redirect
324
	 * in the OutputPage object and return. Otherwise, an HttpError is thrown.
325
	 *
326
	 * @param WebRequest $request
327
	 * @param OutputPage $output
328
	 * @param EntityId $id The ID of the entity to show
329
	 * @param int      $revision The desired revision
330
	 *
331
	 * @throws HttpError
332
	 */
333
	public function httpContentNegotiation( WebRequest $request, OutputPage $output, EntityId $id, $revision = 0 ) {
334
		$headers = $request->getAllHeaders();
335
		if ( isset( $headers['ACCEPT'] ) ) {
336
			$parser = new HttpAcceptParser();
337
			$accept = $parser->parseWeights( $headers['ACCEPT'] );
338
		} else {
339
			// anything goes
340
			$accept = [
341
				'*' => 0.1 // just to make extra sure
342
			];
343
344
			$defaultFormat = $this->entityDataFormatProvider->getFormatName( $this->defaultFormat );
345
			$defaultMime = $this->entityDataFormatProvider->getMimeType( $defaultFormat );
346
347
			// prefer the default
348
			if ( $defaultMime != null ) {
349
				$accept[$defaultMime] = 1;
350
			}
351
		}
352
353
		$mimeTypes = $this->entityDataFormatProvider->getSupportedMimeTypes();
354
		$mimeTypes[] = 'text/html'; // HTML is handled by the normal page URL
355
356
		$negotiator = new HttpAcceptNegotiator( $mimeTypes );
357
		$format = $negotiator->getBestSupportedKey( $accept, null );
358
359
		if ( $format === null ) {
360
			$mimeTypes = implode( ', ', $this->entityDataFormatProvider->getSupportedMimeTypes() );
361
			$msg = $output->msg( 'wikibase-entitydata-not-acceptable', $mimeTypes );
362
			// @phan-suppress-next-line SecurityCheck-DoubleEscaped
363
			throw new HttpError( 406, $msg );
364
		}
365
366
		$format = $this->getCanonicalFormat( $format );
367
368
		$url = $this->uriManager->getDocUrl( $id, $format, $revision );
369
		$output->redirect( $url, 303 );
370
	}
371
372
	/**
373
	 * Loads the requested Entity. Redirects are resolved if no specific revision
374
	 * is requested or they are explicitly allowed by $allowRedirects.
375
	 *
376
	 * @param EntityId $id
377
	 * @param int $revision The revision ID (use 0 for the current revision).
378
	 * @param bool $allowRedirects Can we fetch redirects when revision is set?
379
	 *
380
	 * @return array list( EntityRevision, RedirectRevision|null )
381
	 * @throws HttpError
382
	 * @suppress SecurityCheck-DoubleEscaped
383
	 */
384
	private function getEntityRevision( EntityId $id, $revision, $allowRedirects = false ) {
385
		$prefixedId = $id->getSerialization();
386
		$redirectRevision = null;
387
388
		try {
389
			$entityRevision = $this->entityRevisionLookup->getEntityRevision( $id, $revision );
390
391
			if ( $entityRevision === null ) {
392
				$this->logger->debug(
393
					'{method}: entity not found: {prefixedId}',
394
					[
395
						'method' => __METHOD__,
396
						'prefixedId' => $prefixedId,
397
					]
398
				);
399
400
				$msg = wfMessage( 'wikibase-entitydata-not-found', $prefixedId );
401
				throw new HttpError( 404, $msg );
402
			}
403
		} catch ( RevisionedUnresolvedRedirectException $ex ) {
404
			$redirectRevision = new RedirectRevision(
405
				new EntityRedirect( $id, $ex->getRedirectTargetId() ),
406
				$ex->getRevisionId(), $ex->getRevisionTimestamp()
407
			);
408
409
			if ( $revision === 0 || $allowRedirects ) {
410
				// If no specific revision is requested or redirects are explicitly allowed, resolve the redirect.
411
				list( $entityRevision, ) = $this->getEntityRevision( $ex->getRedirectTargetId(), 0 );
412
			} else {
413
				// The requested revision is a redirect
414
				$this->logger->debug(
415
					'{method}: revision {revision} of {prefixedId} is a redirect: {exMsg}',
416
					[
417
						'method' => __METHOD__,
418
						'revision' => $revision,
419
						'prefixedId' => $prefixedId,
420
						'exMsg' => strval( $ex ),
421
					]
422
				);
423
424
				$msg = wfMessage( 'wikibase-entitydata-bad-revision', $prefixedId, $revision );
425
				throw new HttpError( 400, $msg );
426
			}
427
		} catch ( BadRevisionException $ex ) {
428
			$this->logger->debug(
429
				'{method}: could not load revision {revision} or {prefixedId}: {exMsg}',
430
				[
431
					'method' => __METHOD__,
432
					'revision' => $revision,
433
					'prefixedId' => $prefixedId,
434
					'exMsg' => strval( $ex ),
435
				]
436
			);
437
438
			$msg = wfMessage( 'wikibase-entitydata-bad-revision', $prefixedId, $revision );
439
			throw new HttpError( 404, $msg );
440
		} catch ( StorageException $ex ) {
441
			$this->logger->debug(
442
				'{method}: failed to load {prefixedId}: {exMsg} (revision {revision})',
443
				[
444
					'method' => __METHOD__,
445
					'prefixedId' => $prefixedId,
446
					'exMsg' => strval( $ex ),
447
					'revision' => $revision,
448
				]
449
			);
450
451
			$msg = wfMessage( 'wikibase-entitydata-storage-error', $prefixedId, $revision );
452
			throw new HttpError( 500, $msg );
453
		}
454
455
		return [ $entityRevision, $redirectRevision ];
456
	}
457
458
	/**
459
	 * Loads incoming redirects referring to the given entity ID.
460
	 *
461
	 * @param EntityId $id
462
	 *
463
	 * @return EntityId[]
464
	 * @throws HttpError
465
	 */
466
	private function getIncomingRedirects( EntityId $id ) {
467
		try {
468
			return $this->entityRedirectLookup->getRedirectIds( $id );
469
		} catch ( EntityRedirectLookupException $ex ) {
470
			$prefixedId = $id->getSerialization();
471
			$this->logger->debug(
472
				'{method}: failed to load incoming redirects of {prefixedId}: {exMsg}',
473
				[
474
					'method' => __METHOD__,
475
					'prefixedId' => $prefixedId,
476
					'exMsg' => strval( $ex ),
477
				]
478
			);
479
480
			return [];
481
		}
482
	}
483
484
	/**
485
	 * Output entity data.
486
	 *
487
	 * @param WebRequest $request
488
	 * @param OutputPage $output
489
	 * @param string $format The name (mime type of file extension) of the format to use
490
	 * @param EntityId $id The entity ID
491
	 * @param int $revision The revision ID (use 0 for the current revision).
492
	 *
493
	 * @throws HttpError
494
	 */
495
	public function showData( WebRequest $request, OutputPage $output, $format, EntityId $id, $revision ) {
496
		$flavor = $request->getRawVal( 'flavor' );
497
498
		/** @var EntityRevision $entityRevision */
499
		/** @var RedirectRevision $followedRedirectRevision */
500
		// If flavor is "dump", we allow fetching redirects by revision, since we won't
501
		// be dumping the content of the target revision.
502
		list( $entityRevision, $followedRedirectRevision ) = $this->getEntityRevision( $id, $revision, $flavor === 'dump' );
503
504
		// handle If-Modified-Since
505
		$imsHeader = $request->getHeader( 'IF-MODIFIED-SINCE' );
506
		if ( $imsHeader !== false ) {
507
			$ims = wfTimestamp( TS_MW, $imsHeader );
508
509
			if ( $entityRevision->getTimestamp() <= $ims ) {
510
				$response = $output->getRequest()->response();
511
				$response->header( 'Status: 304', true, 304 );
512
				$output->setArticleBodyOnly( true );
513
				return;
514
			}
515
		}
516
517
		if ( $flavor === 'dump' || $revision > 0 ) {
518
			// In dump mode and when fetching a specific revision, don't include incoming redirects.
519
			$incomingRedirects = [];
520
		} else {
521
			// Get the incoming redirects of the entity (if we followed a redirect, use the target id).
522
			$incomingRedirects = $this->getIncomingRedirects( $entityRevision->getEntity()->getId() );
523
		}
524
525
		list( $data, $contentType ) = $this->serializationService->getSerializedData(
526
			$format,
527
			$entityRevision,
528
			$followedRedirectRevision,
529
			$incomingRedirects,
530
			$flavor
531
		);
532
533
		$output->disable();
534
		$this->outputData(
535
			$request,
536
			$id,
537
			$revision,
538
			$output->getRequest()->response(),
539
			$data,
540
			$contentType,
541
			$entityRevision->getTimestamp()
542
		);
543
	}
544
545
	/**
546
	 * Output the entity data and set the appropriate HTTP response headers.
547
	 *
548
	 * @param WebRequest  $request
549
	 * @param EntityId    $requestId       the original entity ID of the request
550
	 * @param int         $requestRevision the original revision ID of the request (0 for latest)
551
	 * @param WebResponse $response
552
	 * @param string      $data        the data to output
553
	 * @param string      $contentType the data's mime type
554
	 * @param string      $lastModified
555
	 */
556
	public function outputData(
557
		WebRequest $request,
558
		EntityId $requestId,
559
		int $requestRevision,
560
		WebResponse $response,
561
		string $data,
562
		string $contentType,
563
		string $lastModified
564
	) {
565
		// NOTE: similar code as in RawAction::onView, keep in sync.
566
567
		$maxAge = $request->getInt( 'maxage', $this->maxAge );
568
		$sMaxAge = $request->getInt( 'smaxage', $this->maxAge );
569
570
		$maxAge  = max( self::MINIMUM_MAX_AGE, min( self::MAXIMUM_MAX_AGE, $maxAge ) );
571
		$sMaxAge = max( self::MINIMUM_MAX_AGE, min( self::MAXIMUM_MAX_AGE, $sMaxAge ) );
572
573
		$response->header( 'Content-Type: ' . $contentType . '; charset=UTF-8' );
574
575
		if ( $lastModified ) {
576
			$response->header( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $lastModified ) );
577
		}
578
579
		//Set X-Frame-Options API results (bug T41180)
580
		if ( $this->frameOptionsHeader !== null && $this->frameOptionsHeader !== '' ) {
581
			$response->header( "X-Frame-Options: $this->frameOptionsHeader" );
582
		}
583
584
		$cacheableUrls = $this->uriManager->getCacheableUrls( $requestId, $requestRevision );
585
		if ( in_array( $request->getFullRequestURL(), $cacheableUrls ) ) {
586
			$response->header( 'Cache-Control: public, s-maxage=' . $sMaxAge . ', max-age=' . $maxAge );
587
		} else {
588
			$response->header( 'Cache-Control: private, no-cache, s-maxage=0' );
589
		}
590
591
		ob_clean(); // remove anything that might already be in the output buffer.
592
593
		print $data;
594
595
		// exit normally here, keeping all levels of output buffering.
596
	}
597
598
}
599