EntityDataRequestHandler::handleRequest()   C
last analyzed

Complexity

Conditions 14
Paths 19

Size

Total Lines 75

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 75
rs 5.5587
c 0
b 0
f 0
cc 14
nc 19
nop 3

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Wikibase\Repo\LinkedData;
4
5
use HtmlCacheUpdater;
6
use HttpError;
7
use OutputPage;
8
use Psr\Log\LoggerInterface;
9
use WebRequest;
10
use WebResponse;
11
use Wikibase\DataModel\Entity\EntityId;
12
use Wikibase\DataModel\Entity\EntityIdParser;
13
use Wikibase\DataModel\Entity\EntityIdParsingException;
14
use Wikibase\DataModel\Entity\EntityRedirect;
15
use Wikibase\DataModel\Services\Lookup\EntityRedirectLookup;
16
use Wikibase\DataModel\Services\Lookup\EntityRedirectLookupException;
17
use Wikibase\Lib\Store\BadRevisionException;
18
use Wikibase\Lib\Store\EntityRevision;
19
use Wikibase\Lib\Store\EntityRevisionLookup;
20
use Wikibase\Lib\Store\EntityTitleLookup;
21
use Wikibase\Lib\Store\RedirectRevision;
22
use Wikibase\Lib\Store\RevisionedUnresolvedRedirectException;
23
use Wikibase\Lib\Store\StorageException;
24
use Wikimedia\Http\HttpAcceptNegotiator;
25
use Wikimedia\Http\HttpAcceptParser;
26
27
/**
28
 * Request handler implementing a linked data interface for Wikibase entities.
29
 *
30
 * @license GPL-2.0-or-later
31
 * @author Daniel Kinzler
32
 * @author Thomas Pellissier Tanon
33
 * @author Anja Jentzsch < [email protected] >
34
 */
35
class EntityDataRequestHandler {
36
37
	/**
38
	 * Allowed smallest and biggest number of seconds for the "max-age=..." and "s-maxage=..." cache
39
	 * control parameters.
40
	 *
41
	 * @todo Hard maximum could be configurable somehow.
42
	 */
43
	const MINIMUM_MAX_AGE = 0;
44
	const MAXIMUM_MAX_AGE = 2678400; // 31 days
45
46
	/**
47
	 * @var EntityDataSerializationService
48
	 */
49
	private $serializationService;
50
51
	/**
52
	 * @var EntityDataUriManager
53
	 */
54
	private $uriManager;
55
56
	/**
57
	 * @var EntityIdParser
58
	 */
59
	private $entityIdParser;
60
61
	/**
62
	 * @var EntityRevisionLookup
63
	 */
64
	private $entityRevisionLookup;
65
66
	/**
67
	 * @var EntityRedirectLookup
68
	 */
69
	private $entityRedirectLookup;
70
71
	/**
72
	 * @var EntityTitleLookup
73
	 */
74
	private $entityTitleLookup;
75
76
	/**
77
	 * @var EntityDataFormatProvider
78
	 */
79
	private $entityDataFormatProvider;
80
81
	/**
82
	 * @var HtmlCacheUpdater
83
	 */
84
	private $htmlCacheUpdater;
85
86
	/**
87
	 * @var LoggerInterface
88
	 */
89
	private $logger;
90
91
	/**
92
	 * @var string
93
	 */
94
	private $defaultFormat;
95
96
	/**
97
	 * @var int Number of seconds to cache entity data.
98
	 */
99
	private $maxAge;
100
101
	/**
102
	 * @var bool
103
	 */
104
	private $useCdn;
105
106
	/**
107
	 * @var string|null
108
	 */
109
	private $frameOptionsHeader;
110
111
	/**
112
	 * @var string[]
113
	 */
114
	private $entityTypesWithoutRdfOutput;
115
116
	/**
117
	 * @param EntityDataUriManager $uriManager
118
	 * @param HtmlCacheUpdater $htmlCacheUpdater
119
	 * @param EntityTitleLookup $entityTitleLookup
120
	 * @param EntityIdParser $entityIdParser
121
	 * @param EntityRevisionLookup $entityRevisionLookup
122
	 * @param EntityRedirectLookup $entityRedirectLookup
123
	 * @param EntityDataSerializationService $serializationService
124
	 * @param EntityDataFormatProvider $entityDataFormatProvider
125
	 * @param LoggerInterface $logger
126
	 * @param string[] $entityTypesWithoutRdfOutput
127
	 * @param string $defaultFormat The format as a file extension or MIME type.
128
	 * @param int $maxAge number of seconds to cache entity data
129
	 * @param bool $useCdn do we have web caches configured?
130
	 * @param string|null $frameOptionsHeader for X-Frame-Options
131
	 */
132
	public function __construct(
133
		EntityDataUriManager $uriManager,
134
		HtmlCacheUpdater $htmlCacheUpdater,
135
		EntityTitleLookup $entityTitleLookup,
136
		EntityIdParser $entityIdParser,
137
		EntityRevisionLookup $entityRevisionLookup,
138
		EntityRedirectLookup $entityRedirectLookup,
139
		EntityDataSerializationService $serializationService,
140
		EntityDataFormatProvider $entityDataFormatProvider,
141
		LoggerInterface $logger,
142
		array $entityTypesWithoutRdfOutput,
143
		$defaultFormat,
144
		$maxAge,
145
		$useCdn,
146
		$frameOptionsHeader
147
	) {
148
		$this->uriManager = $uriManager;
149
		$this->htmlCacheUpdater = $htmlCacheUpdater;
150
		$this->entityTitleLookup = $entityTitleLookup;
151
		$this->entityIdParser = $entityIdParser;
152
		$this->entityRevisionLookup = $entityRevisionLookup;
153
		$this->entityRedirectLookup = $entityRedirectLookup;
154
		$this->serializationService = $serializationService;
155
		$this->entityDataFormatProvider = $entityDataFormatProvider;
156
		$this->logger = $logger;
157
		$this->entityTypesWithoutRdfOutput = $entityTypesWithoutRdfOutput;
158
		$this->defaultFormat = $defaultFormat;
159
		$this->maxAge = $maxAge;
160
		$this->useCdn = $useCdn;
161
		$this->frameOptionsHeader = $frameOptionsHeader;
162
	}
163
164
	/**
165
	 * Checks whether the request is complete, i.e. whether it contains all information needed
166
	 * to reply with entity data.
167
	 *
168
	 * This does not check whether the request is valid and will actually produce a successful
169
	 * response.
170
	 *
171
	 * @param string|null $doc Document name, e.g. Q5 or Q5.json or Q5:33.xml
172
	 * @param WebRequest $request
173
	 *
174
	 * @return bool
175
	 */
176
	public function canHandleRequest( $doc, WebRequest $request ) {
177
		if ( $doc === '' || $doc === null ) {
178
			if ( $request->getText( 'id', '' ) === '' ) {
179
				return false;
180
			}
181
		}
182
183
		return true;
184
	}
185
186
	/**
187
	 * Main method for handling requests.
188
	 *
189
	 * @param string $doc Document name, e.g. Q5 or Q5.json or Q5:33.xml
190
	 * @param WebRequest $request The request parameters. Known parameters are:
191
	 *        - id: the entity ID
192
	 *        - format: the format
193
	 *        - oldid|revision: the revision ID
194
	 *        - action=purge: to purge cached data from (web) caches
195
	 * @param OutputPage $output
196
	 *
197
	 * @note Instead of an output page, a WebResponse could be sufficient, but
198
	 *        redirect logic is currently implemented in OutputPage.
199
	 *
200
	 * @throws HttpError
201
	 * @suppress SecurityCheck-DoubleEscaped
202
	 */
203
	public function handleRequest( $doc, WebRequest $request, OutputPage $output ) {
204
		// No matter what: The response is always public
205
		$output->getRequest()->response()->header( 'Access-Control-Allow-Origin: *' );
206
207
		$revision = 0;
208
209
		list( $id, $format ) = $this->uriManager->parseDocName( $doc );
210
211
		// get entity id and format from request parameter
212
		$format = $request->getText( 'format', $format );
213
		$id = $request->getText( 'id', $id );
214
		$revision = $request->getInt( 'revision', $revision );
215
		$redirectMode = $request->getText( 'redirect' );
216
		//TODO: malformed revision IDs should trigger a code 400
217
218
		// If there is no ID, fail
219
		if ( $id === null || $id === '' ) {
220
			//TODO: different error message?
221
			throw new HttpError( 400, $output->msg( 'wikibase-entitydata-bad-id', $id ) );
222
		}
223
224
		try {
225
			$entityId = $this->entityIdParser->parse( $id );
226
		} catch ( EntityIdParsingException $ex ) {
227
			throw new HttpError( 400, $output->msg( 'wikibase-entitydata-bad-id', $id ) );
228
		}
229
230
		if ( $this->entityDataFormatProvider->isRdfFormat( $format ) &&
231
			in_array( $entityId->getEntityType(), $this->entityTypesWithoutRdfOutput )
232
		) {
233
			throw new HttpError( 406, $output->msg( 'wikibase-entitydata-rdf-not-available', $entityId->getEntityType() ) );
234
		}
235
236
		//XXX: allow for logged in users only?
237
		if ( $request->getText( 'action' ) === 'purge' ) {
238
			$this->purgeWebCache( $entityId, $revision );
239
			//XXX: Now what? Proceed to show the data?
240
		}
241
242
		if ( $format === null || $format === '' ) {
243
			// if no format is given, apply content negotiation and return.
244
			$this->httpContentNegotiation( $request, $output, $entityId, $revision );
245
			return;
246
		}
247
248
		//NOTE: will trigger a 415 if the format is not supported
249
		$format = $this->getCanonicalFormat( $format );
250
251
		if ( $doc !== null && $doc !== '' ) {
252
			// if subpage syntax is used, always enforce the canonical form
253
			$canonicalDoc = $this->uriManager->getDocName( $entityId, $format );
254
255
			if ( $doc !== $canonicalDoc ) {
256
				$url = $this->uriManager->getDocUrl( $entityId, $format, $revision );
257
				$output->redirect( $url, 301 );
258
				return;
259
			}
260
		}
261
262
		// if the format is HTML, redirect to the entity's wiki page
263
		if ( $format === 'html' ) {
264
			$url = $this->uriManager->getDocUrl( $entityId, 'html', $revision );
265
			$output->redirect( $url, 303 );
266
			return;
267
		}
268
269
		// if redirection was force, redirect
270
		if ( $redirectMode === 'force' ) {
271
			$url = $this->uriManager->getDocUrl( $entityId, $format, $revision );
272
			$output->redirect( $url, 303 );
273
			return;
274
		}
275
276
		$this->showData( $request, $output, $format, $entityId, $revision );
277
	}
278
279
	/**
280
	 * Returns the canonical format name for the given format.
281
	 *
282
	 * @param string $format
283
	 *
284
	 * @return string
285
	 * @throws HttpError code 415 if the format is not supported.
286
	 */
287
	public function getCanonicalFormat( $format ) {
288
		$format = strtolower( $format );
289
290
		// we always support html, it's handled by the entity's wiki page.
291
		if ( $format === 'html' || $format === 'htm' || $format === 'text/html' ) {
292
			return 'html';
293
		}
294
295
		// normalize format name (note that HTML may not be known to the service)
296
		$canonicalFormat = $this->entityDataFormatProvider->getFormatName( $format );
297
298
		if ( $canonicalFormat === null ) {
299
			$msg = wfMessage( 'wikibase-entitydata-unsupported-format', $format );
300
			// @phan-suppress-next-line SecurityCheck-DoubleEscaped
301
			throw new HttpError( 415, $msg );
302
		}
303
304
		return $canonicalFormat;
305
	}
306
307
	/**
308
	 * Purges the entity data identified by the doc parameter from any HTTP caches.
309
	 * Does nothing if $wgUseCdn is not set.
310
	 *
311
	 * @param EntityId $id The entity ID for which to purge all data.
312
	 * @param int $revision The revision ID (0 for current/unspecified)
313
	 */
314
	public function purgeWebCache( EntityId $id, int $revision ) {
315
		$urls = $this->uriManager->getPotentiallyCachedUrls( $id, $revision );
316
		if ( $urls !== [] ) {
317
			$this->htmlCacheUpdater->purgeUrls( $urls );
318
		}
319
	}
320
321
	/**
322
	 * Applies HTTP content negotiation.
323
	 * If the negotiation is successfull, this method will set the appropriate redirect
324
	 * in the OutputPage object and return. Otherwise, an HttpError is thrown.
325
	 *
326
	 * @param WebRequest $request
327
	 * @param OutputPage $output
328
	 * @param EntityId $id The ID of the entity to show
329
	 * @param int      $revision The desired revision
330
	 *
331
	 * @throws HttpError
332
	 */
333
	public function httpContentNegotiation( WebRequest $request, OutputPage $output, EntityId $id, $revision = 0 ) {
334
		$headers = $request->getAllHeaders();
335
		if ( isset( $headers['ACCEPT'] ) ) {
336
			$parser = new HttpAcceptParser();
337
			$accept = $parser->parseWeights( $headers['ACCEPT'] );
338
		} else {
339
			// anything goes
340
			$accept = [
341
				'*' => 0.1 // just to make extra sure
342
			];
343
344
			$defaultFormat = $this->entityDataFormatProvider->getFormatName( $this->defaultFormat );
345
			$defaultMime = $this->entityDataFormatProvider->getMimeType( $defaultFormat );
346
347
			// prefer the default
348
			if ( $defaultMime != null ) {
349
				$accept[$defaultMime] = 1;
350
			}
351
		}
352
353
		$mimeTypes = $this->entityDataFormatProvider->getSupportedMimeTypes();
354
		$mimeTypes[] = 'text/html'; // HTML is handled by the normal page URL
355
356
		$negotiator = new HttpAcceptNegotiator( $mimeTypes );
357
		$format = $negotiator->getBestSupportedKey( $accept, null );
358
359
		if ( $format === null ) {
360
			$mimeTypes = implode( ', ', $this->entityDataFormatProvider->getSupportedMimeTypes() );
361
			$msg = $output->msg( 'wikibase-entitydata-not-acceptable', $mimeTypes );
362
			// @phan-suppress-next-line SecurityCheck-DoubleEscaped
363
			throw new HttpError( 406, $msg );
364
		}
365
366
		$format = $this->getCanonicalFormat( $format );
367
368
		$url = $this->uriManager->getDocUrl( $id, $format, $revision );
369
		$output->redirect( $url, 303 );
370
	}
371
372
	/**
373
	 * Loads the requested Entity. Redirects are resolved if no specific revision
374
	 * is requested or they are explicitly allowed by $allowRedirects.
375
	 *
376
	 * @param EntityId $id
377
	 * @param int $revision The revision ID (use 0 for the current revision).
378
	 * @param bool $allowRedirects Can we fetch redirects when revision is set?
379
	 *
380
	 * @return array list( EntityRevision, RedirectRevision|null )
381
	 * @throws HttpError
382
	 * @suppress SecurityCheck-DoubleEscaped
383
	 */
384
	private function getEntityRevision( EntityId $id, $revision, $allowRedirects = false ) {
385
		$prefixedId = $id->getSerialization();
386
		$redirectRevision = null;
387
388
		try {
389
			$entityRevision = $this->entityRevisionLookup->getEntityRevision( $id, $revision );
390
391
			if ( $entityRevision === null ) {
392
				$this->logger->debug(
393
					'{method}: entity not found: {prefixedId}',
394
					[
395
						'method' => __METHOD__,
396
						'prefixedId' => $prefixedId,
397
					]
398
				);
399
400
				$msg = wfMessage( 'wikibase-entitydata-not-found', $prefixedId );
401
				throw new HttpError( 404, $msg );
402
			}
403
		} catch ( RevisionedUnresolvedRedirectException $ex ) {
404
			$redirectRevision = new RedirectRevision(
405
				new EntityRedirect( $id, $ex->getRedirectTargetId() ),
406
				$ex->getRevisionId(), $ex->getRevisionTimestamp()
407
			);
408
409
			if ( $revision === 0 || $allowRedirects ) {
410
				// If no specific revision is requested or redirects are explicitly allowed, resolve the redirect.
411
				list( $entityRevision, ) = $this->getEntityRevision( $ex->getRedirectTargetId(), 0 );
412
			} else {
413
				// The requested revision is a redirect
414
				$this->logger->debug(
415
					'{method}: revision {revision} of {prefixedId} is a redirect: {exMsg}',
416
					[
417
						'method' => __METHOD__,
418
						'revision' => $revision,
419
						'prefixedId' => $prefixedId,
420
						'exMsg' => strval( $ex ),
421
					]
422
				);
423
424
				$msg = wfMessage( 'wikibase-entitydata-bad-revision', $prefixedId, $revision );
425
				throw new HttpError( 400, $msg );
426
			}
427
		} catch ( BadRevisionException $ex ) {
428
			$this->logger->debug(
429
				'{method}: could not load revision {revision} or {prefixedId}: {exMsg}',
430
				[
431
					'method' => __METHOD__,
432
					'revision' => $revision,
433
					'prefixedId' => $prefixedId,
434
					'exMsg' => strval( $ex ),
435
				]
436
			);
437
438
			$msg = wfMessage( 'wikibase-entitydata-bad-revision', $prefixedId, $revision );
439
			throw new HttpError( 404, $msg );
440
		} catch ( StorageException $ex ) {
441
			$this->logger->debug(
442
				'{method}: failed to load {prefixedId}: {exMsg} (revision {revision})',
443
				[
444
					'method' => __METHOD__,
445
					'prefixedId' => $prefixedId,
446
					'exMsg' => strval( $ex ),
447
					'revision' => $revision,
448
				]
449
			);
450
451
			$msg = wfMessage( 'wikibase-entitydata-storage-error', $prefixedId, $revision );
452
			throw new HttpError( 500, $msg );
453
		}
454
455
		return [ $entityRevision, $redirectRevision ];
456
	}
457
458
	/**
459
	 * Loads incoming redirects referring to the given entity ID.
460
	 *
461
	 * @param EntityId $id
462
	 *
463
	 * @return EntityId[]
464
	 * @throws HttpError
465
	 */
466
	private function getIncomingRedirects( EntityId $id ) {
467
		try {
468
			return $this->entityRedirectLookup->getRedirectIds( $id );
469
		} catch ( EntityRedirectLookupException $ex ) {
470
			$prefixedId = $id->getSerialization();
471
			$this->logger->debug(
472
				'{method}: failed to load incoming redirects of {prefixedId}: {exMsg}',
473
				[
474
					'method' => __METHOD__,
475
					'prefixedId' => $prefixedId,
476
					'exMsg' => strval( $ex ),
477
				]
478
			);
479
480
			return [];
481
		}
482
	}
483
484
	/**
485
	 * Output entity data.
486
	 *
487
	 * @param WebRequest $request
488
	 * @param OutputPage $output
489
	 * @param string $format The name (mime type of file extension) of the format to use
490
	 * @param EntityId $id The entity ID
491
	 * @param int $revision The revision ID (use 0 for the current revision).
492
	 *
493
	 * @throws HttpError
494
	 */
495
	public function showData( WebRequest $request, OutputPage $output, $format, EntityId $id, $revision ) {
496
		$flavor = $request->getRawVal( 'flavor' );
497
498
		/** @var EntityRevision $entityRevision */
499
		/** @var RedirectRevision $followedRedirectRevision */
500
		// If flavor is "dump", we allow fetching redirects by revision, since we won't
501
		// be dumping the content of the target revision.
502
		list( $entityRevision, $followedRedirectRevision ) = $this->getEntityRevision( $id, $revision, $flavor === 'dump' );
503
504
		// handle If-Modified-Since
505
		$imsHeader = $request->getHeader( 'IF-MODIFIED-SINCE' );
506
		if ( $imsHeader !== false ) {
507
			$ims = wfTimestamp( TS_MW, $imsHeader );
508
509
			if ( $entityRevision->getTimestamp() <= $ims ) {
510
				$response = $output->getRequest()->response();
511
				$response->header( 'Status: 304', true, 304 );
512
				$output->setArticleBodyOnly( true );
513
				return;
514
			}
515
		}
516
517
		if ( $flavor === 'dump' || $revision > 0 ) {
518
			// In dump mode and when fetching a specific revision, don't include incoming redirects.
519
			$incomingRedirects = [];
520
		} else {
521
			// Get the incoming redirects of the entity (if we followed a redirect, use the target id).
522
			$incomingRedirects = $this->getIncomingRedirects( $entityRevision->getEntity()->getId() );
523
		}
524
525
		list( $data, $contentType ) = $this->serializationService->getSerializedData(
526
			$format,
527
			$entityRevision,
528
			$followedRedirectRevision,
529
			$incomingRedirects,
530
			$flavor
531
		);
532
533
		$output->disable();
534
		$this->outputData(
535
			$request,
536
			$id,
537
			$revision,
538
			$output->getRequest()->response(),
539
			$data,
540
			$contentType,
541
			$entityRevision->getTimestamp()
542
		);
543
	}
544
545
	/**
546
	 * Output the entity data and set the appropriate HTTP response headers.
547
	 *
548
	 * @param WebRequest  $request
549
	 * @param EntityId    $requestId       the original entity ID of the request
550
	 * @param int         $requestRevision the original revision ID of the request (0 for latest)
551
	 * @param WebResponse $response
552
	 * @param string      $data        the data to output
553
	 * @param string      $contentType the data's mime type
554
	 * @param string      $lastModified
555
	 */
556
	public function outputData(
557
		WebRequest $request,
558
		EntityId $requestId,
559
		int $requestRevision,
560
		WebResponse $response,
561
		string $data,
562
		string $contentType,
563
		string $lastModified
564
	) {
565
		// NOTE: similar code as in RawAction::onView, keep in sync.
566
567
		$maxAge = $request->getInt( 'maxage', $this->maxAge );
568
		$sMaxAge = $request->getInt( 'smaxage', $this->maxAge );
569
570
		$maxAge  = max( self::MINIMUM_MAX_AGE, min( self::MAXIMUM_MAX_AGE, $maxAge ) );
571
		$sMaxAge = max( self::MINIMUM_MAX_AGE, min( self::MAXIMUM_MAX_AGE, $sMaxAge ) );
572
573
		$response->header( 'Content-Type: ' . $contentType . '; charset=UTF-8' );
574
575
		if ( $lastModified ) {
576
			$response->header( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $lastModified ) );
577
		}
578
579
		//Set X-Frame-Options API results (bug T41180)
580
		if ( $this->frameOptionsHeader !== null && $this->frameOptionsHeader !== '' ) {
581
			$response->header( "X-Frame-Options: $this->frameOptionsHeader" );
582
		}
583
584
		$cacheableUrls = $this->uriManager->getCacheableUrls( $requestId, $requestRevision );
585
		if ( in_array( $request->getFullRequestURL(), $cacheableUrls ) ) {
586
			$response->header( 'Cache-Control: public, s-maxage=' . $sMaxAge . ', max-age=' . $maxAge );
587
		} else {
588
			$response->header( 'Cache-Control: private, no-cache, s-maxage=0' );
589
		}
590
591
		ob_clean(); // remove anything that might already be in the output buffer.
592
593
		print $data;
594
595
		// exit normally here, keeping all levels of output buffering.
596
	}
597
598
}
599