Completed
Branch master (939199)
by
unknown
39:35
created

includes/content/ContentHandler.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
use MediaWiki\Search\ParserOutputSearchDataExtractor;
4
5
/**
6
 * Base class for content handling.
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License along
19
 * with this program; if not, write to the Free Software Foundation, Inc.,
20
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21
 * http://www.gnu.org/copyleft/gpl.html
22
 *
23
 * @since 1.21
24
 *
25
 * @file
26
 * @ingroup Content
27
 *
28
 * @author Daniel Kinzler
29
 */
30
31
/**
32
 * Exception representing a failure to serialize or unserialize a content object.
33
 *
34
 * @ingroup Content
35
 */
36
class MWContentSerializationException extends MWException {
37
}
38
39
/**
40
 * Exception thrown when an unregistered content model is requested. This error
41
 * can be triggered by user input, so a separate exception class is provided so
42
 * callers can substitute a context-specific, internationalised error message.
43
 *
44
 * @ingroup Content
45
 * @since 1.27
46
 */
47
class MWUnknownContentModelException extends MWException {
48
	/** @var string The name of the unknown content model */
49
	private $modelId;
50
51
	/** @param string $modelId */
52
	function __construct( $modelId ) {
53
		parent::__construct( "The content model '$modelId' is not registered on this wiki.\n" .
54
			'See https://www.mediawiki.org/wiki/Content_handlers to find out which extensions ' .
55
			'handle this content model.' );
56
		$this->modelId = $modelId;
57
	}
58
59
	/** @return string */
60
	public function getModelId() {
61
		return $this->modelId;
62
	}
63
}
64
65
/**
66
 * A content handler knows how do deal with a specific type of content on a wiki
67
 * page. Content is stored in the database in a serialized form (using a
68
 * serialization format a.k.a. MIME type) and is unserialized into its native
69
 * PHP representation (the content model), which is wrapped in an instance of
70
 * the appropriate subclass of Content.
71
 *
72
 * ContentHandler instances are stateless singletons that serve, among other
73
 * things, as a factory for Content objects. Generally, there is one subclass
74
 * of ContentHandler and one subclass of Content for every type of content model.
75
 *
76
 * Some content types have a flat model, that is, their native representation
77
 * is the same as their serialized form. Examples would be JavaScript and CSS
78
 * code. As of now, this also applies to wikitext (MediaWiki's default content
79
 * type), but wikitext content may be represented by a DOM or AST structure in
80
 * the future.
81
 *
82
 * @ingroup Content
83
 */
84
abstract class ContentHandler {
85
	/**
86
	 * Convenience function for getting flat text from a Content object. This
87
	 * should only be used in the context of backwards compatibility with code
88
	 * that is not yet able to handle Content objects!
89
	 *
90
	 * If $content is null, this method returns the empty string.
91
	 *
92
	 * If $content is an instance of TextContent, this method returns the flat
93
	 * text as returned by $content->getNativeData().
94
	 *
95
	 * If $content is not a TextContent object, the behavior of this method
96
	 * depends on the global $wgContentHandlerTextFallback:
97
	 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
98
	 *   TextContent object, an MWException is thrown.
99
	 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
100
	 *   TextContent object, $content->serialize() is called to get a string
101
	 *   form of the content.
102
	 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
103
	 *   TextContent object, this method returns null.
104
	 * - otherwise, the behavior is undefined.
105
	 *
106
	 * @since 1.21
107
	 *
108
	 * @param Content $content
109
	 *
110
	 * @throws MWException If the content is not an instance of TextContent and
111
	 * wgContentHandlerTextFallback was set to 'fail'.
112
	 * @return string|null Textual form of the content, if available.
113
	 */
114
	public static function getContentText( Content $content = null ) {
115
		global $wgContentHandlerTextFallback;
116
117
		if ( is_null( $content ) ) {
118
			return '';
119
		}
120
121
		if ( $content instanceof TextContent ) {
122
			return $content->getNativeData();
123
		}
124
125
		wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
126
127
		if ( $wgContentHandlerTextFallback == 'fail' ) {
128
			throw new MWException(
129
				"Attempt to get text from Content with model " .
130
				$content->getModel()
131
			);
132
		}
133
134
		if ( $wgContentHandlerTextFallback == 'serialize' ) {
135
			return $content->serialize();
136
		}
137
138
		return null;
139
	}
140
141
	/**
142
	 * Convenience function for creating a Content object from a given textual
143
	 * representation.
144
	 *
145
	 * $text will be deserialized into a Content object of the model specified
146
	 * by $modelId (or, if that is not given, $title->getContentModel()) using
147
	 * the given format.
148
	 *
149
	 * @since 1.21
150
	 *
151
	 * @param string $text The textual representation, will be
152
	 *    unserialized to create the Content object
153
	 * @param Title $title The title of the page this text belongs to.
154
	 *    Required if $modelId is not provided.
155
	 * @param string $modelId The model to deserialize to. If not provided,
156
	 *    $title->getContentModel() is used.
157
	 * @param string $format The format to use for deserialization. If not
158
	 *    given, the model's default format is used.
159
	 *
160
	 * @throws MWException If model ID or format is not supported or if the text can not be
161
	 * unserialized using the format.
162
	 * @return Content A Content object representing the text.
163
	 */
164
	public static function makeContent( $text, Title $title = null,
165
		$modelId = null, $format = null ) {
166
		if ( is_null( $modelId ) ) {
167
			if ( is_null( $title ) ) {
168
				throw new MWException( "Must provide a Title object or a content model ID." );
169
			}
170
171
			$modelId = $title->getContentModel();
172
		}
173
174
		$handler = ContentHandler::getForModelID( $modelId );
175
176
		return $handler->unserializeContent( $text, $format );
177
	}
178
179
	/**
180
	 * Returns the name of the default content model to be used for the page
181
	 * with the given title.
182
	 *
183
	 * Note: There should rarely be need to call this method directly.
184
	 * To determine the actual content model for a given page, use
185
	 * Title::getContentModel().
186
	 *
187
	 * Which model is to be used by default for the page is determined based
188
	 * on several factors:
189
	 * - The global setting $wgNamespaceContentModels specifies a content model
190
	 *   per namespace.
191
	 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
192
	 *   model.
193
	 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
194
	 *   model if they end in .js or .css, respectively.
195
	 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
196
	 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
197
	 *   or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
198
	 *   hook should be used instead if possible.
199
	 * - The hook TitleIsWikitextPage may be used to force a page to use the
200
	 *   wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
201
	 *   hook should be used instead if possible.
202
	 *
203
	 * If none of the above applies, the wikitext model is used.
204
	 *
205
	 * Note: this is used by, and may thus not use, Title::getContentModel()
206
	 *
207
	 * @since 1.21
208
	 *
209
	 * @param Title $title
210
	 *
211
	 * @return string Default model name for the page given by $title
212
	 */
213
	public static function getDefaultModelFor( Title $title ) {
214
		// NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
215
		//       because it is used to initialize the mContentModel member.
216
217
		$ns = $title->getNamespace();
218
219
		$ext = false;
220
		$m = null;
221
		$model = MWNamespace::getNamespaceContentModel( $ns );
222
223
		// Hook can determine default model
224
		if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
225
			if ( !is_null( $model ) ) {
226
				return $model;
227
			}
228
		}
229
230
		// Could this page contain code based on the title?
231
		$isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
232
		if ( $isCodePage ) {
233
			$ext = $m[1];
234
		}
235
236
		// Hook can force JS/CSS
237
		Hooks::run( 'TitleIsCssOrJsPage', [ $title, &$isCodePage ], '1.21' );
238
239
		// Is this a user subpage containing code?
240
		$isCodeSubpage = NS_USER == $ns
241
			&& !$isCodePage
242
			&& preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
243
		if ( $isCodeSubpage ) {
244
			$ext = $m[1];
245
		}
246
247
		// Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
248
		$isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
249
		$isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
250
251
		// Hook can override $isWikitext
252
		Hooks::run( 'TitleIsWikitextPage', [ $title, &$isWikitext ], '1.21' );
253
254
		if ( !$isWikitext ) {
255
			switch ( $ext ) {
256
				case 'js':
257
					return CONTENT_MODEL_JAVASCRIPT;
258
				case 'css':
259
					return CONTENT_MODEL_CSS;
260
				case 'json':
261
					return CONTENT_MODEL_JSON;
262
				default:
263
					return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
264
			}
265
		}
266
267
		// We established that it must be wikitext
268
269
		return CONTENT_MODEL_WIKITEXT;
270
	}
271
272
	/**
273
	 * Returns the appropriate ContentHandler singleton for the given title.
274
	 *
275
	 * @since 1.21
276
	 *
277
	 * @param Title $title
278
	 *
279
	 * @return ContentHandler
280
	 */
281
	public static function getForTitle( Title $title ) {
282
		$modelId = $title->getContentModel();
283
284
		return ContentHandler::getForModelID( $modelId );
285
	}
286
287
	/**
288
	 * Returns the appropriate ContentHandler singleton for the given Content
289
	 * object.
290
	 *
291
	 * @since 1.21
292
	 *
293
	 * @param Content $content
294
	 *
295
	 * @return ContentHandler
296
	 */
297
	public static function getForContent( Content $content ) {
298
		$modelId = $content->getModel();
299
300
		return ContentHandler::getForModelID( $modelId );
301
	}
302
303
	/**
304
	 * @var array A Cache of ContentHandler instances by model id
305
	 */
306
	protected static $handlers;
307
308
	/**
309
	 * Returns the ContentHandler singleton for the given model ID. Use the
310
	 * CONTENT_MODEL_XXX constants to identify the desired content model.
311
	 *
312
	 * ContentHandler singletons are taken from the global $wgContentHandlers
313
	 * array. Keys in that array are model names, the values are either
314
	 * ContentHandler singleton objects, or strings specifying the appropriate
315
	 * subclass of ContentHandler.
316
	 *
317
	 * If a class name is encountered when looking up the singleton for a given
318
	 * model name, the class is instantiated and the class name is replaced by
319
	 * the resulting singleton in $wgContentHandlers.
320
	 *
321
	 * If no ContentHandler is defined for the desired $modelId, the
322
	 * ContentHandler may be provided by the ContentHandlerForModelID hook.
323
	 * If no ContentHandler can be determined, an MWException is raised.
324
	 *
325
	 * @since 1.21
326
	 *
327
	 * @param string $modelId The ID of the content model for which to get a
328
	 *    handler. Use CONTENT_MODEL_XXX constants.
329
	 *
330
	 * @throws MWException For internal errors and problems in the configuration.
331
	 * @throws MWUnknownContentModelException If no handler is known for the model ID.
332
	 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
333
	 */
334
	public static function getForModelID( $modelId ) {
335
		global $wgContentHandlers;
336
337
		if ( isset( ContentHandler::$handlers[$modelId] ) ) {
338
			return ContentHandler::$handlers[$modelId];
339
		}
340
341
		if ( empty( $wgContentHandlers[$modelId] ) ) {
342
			$handler = null;
343
344
			Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
345
346
			if ( $handler === null ) {
347
				throw new MWUnknownContentModelException( $modelId );
348
			}
349
350
			if ( !( $handler instanceof ContentHandler ) ) {
351
				throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
352
			}
353
		} else {
354
			$classOrCallback = $wgContentHandlers[$modelId];
355
356
			if ( is_callable( $classOrCallback ) ) {
357
				$handler = call_user_func( $classOrCallback, $modelId );
358
			} else {
359
				$handler = new $classOrCallback( $modelId );
360
			}
361
362
			if ( !( $handler instanceof ContentHandler ) ) {
363
				throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
364
					"compatible with ContentHandler" );
365
			}
366
		}
367
368
		wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
369
			. ': ' . get_class( $handler ) );
370
371
		ContentHandler::$handlers[$modelId] = $handler;
372
373
		return ContentHandler::$handlers[$modelId];
374
	}
375
376
	/**
377
	 * Returns the localized name for a given content model.
378
	 *
379
	 * Model names are localized using system messages. Message keys
380
	 * have the form content-model-$name, where $name is getContentModelName( $id ).
381
	 *
382
	 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
383
	 *    constant or returned by Revision::getContentModel().
384
	 * @param Language|null $lang The language to parse the message in (since 1.26)
385
	 *
386
	 * @throws MWException If the model ID isn't known.
387
	 * @return string The content model's localized name.
388
	 */
389
	public static function getLocalizedName( $name, Language $lang = null ) {
390
		// Messages: content-model-wikitext, content-model-text,
391
		// content-model-javascript, content-model-css
392
		$key = "content-model-$name";
393
394
		$msg = wfMessage( $key );
395
		if ( $lang ) {
396
			$msg->inLanguage( $lang );
397
		}
398
399
		return $msg->exists() ? $msg->plain() : $name;
400
	}
401
402
	public static function getContentModels() {
403
		global $wgContentHandlers;
404
405
		return array_keys( $wgContentHandlers );
406
	}
407
408
	public static function getAllContentFormats() {
409
		global $wgContentHandlers;
410
411
		$formats = [];
412
413
		foreach ( $wgContentHandlers as $model => $class ) {
414
			$handler = ContentHandler::getForModelID( $model );
415
			$formats = array_merge( $formats, $handler->getSupportedFormats() );
416
		}
417
418
		$formats = array_unique( $formats );
419
420
		return $formats;
421
	}
422
423
	// ------------------------------------------------------------------------
424
425
	/**
426
	 * @var string
427
	 */
428
	protected $mModelID;
429
430
	/**
431
	 * @var string[]
432
	 */
433
	protected $mSupportedFormats;
434
435
	/**
436
	 * Constructor, initializing the ContentHandler instance with its model ID
437
	 * and a list of supported formats. Values for the parameters are typically
438
	 * provided as literals by subclass's constructors.
439
	 *
440
	 * @param string $modelId (use CONTENT_MODEL_XXX constants).
441
	 * @param string[] $formats List for supported serialization formats
442
	 *    (typically as MIME types)
443
	 */
444
	public function __construct( $modelId, $formats ) {
445
		$this->mModelID = $modelId;
446
		$this->mSupportedFormats = $formats;
447
	}
448
449
	/**
450
	 * Serializes a Content object of the type supported by this ContentHandler.
451
	 *
452
	 * @since 1.21
453
	 *
454
	 * @param Content $content The Content object to serialize
455
	 * @param string $format The desired serialization format
456
	 *
457
	 * @return string Serialized form of the content
458
	 */
459
	abstract public function serializeContent( Content $content, $format = null );
460
461
	/**
462
	 * Applies transformations on export (returns the blob unchanged per default).
463
	 * Subclasses may override this to perform transformations such as conversion
464
	 * of legacy formats or filtering of internal meta-data.
465
	 *
466
	 * @param string $blob The blob to be exported
467
	 * @param string|null $format The blob's serialization format
468
	 *
469
	 * @return string
470
	 */
471
	public function exportTransform( $blob, $format = null ) {
472
		return $blob;
473
	}
474
475
	/**
476
	 * Unserializes a Content object of the type supported by this ContentHandler.
477
	 *
478
	 * @since 1.21
479
	 *
480
	 * @param string $blob Serialized form of the content
481
	 * @param string $format The format used for serialization
482
	 *
483
	 * @return Content The Content object created by deserializing $blob
484
	 */
485
	abstract public function unserializeContent( $blob, $format = null );
486
487
	/**
488
	 * Apply import transformation (per default, returns $blob unchanged).
489
	 * This gives subclasses an opportunity to transform data blobs on import.
490
	 *
491
	 * @since 1.24
492
	 *
493
	 * @param string $blob
494
	 * @param string|null $format
495
	 *
496
	 * @return string
497
	 */
498
	public function importTransform( $blob, $format = null ) {
499
		return $blob;
500
	}
501
502
	/**
503
	 * Creates an empty Content object of the type supported by this
504
	 * ContentHandler.
505
	 *
506
	 * @since 1.21
507
	 *
508
	 * @return Content
509
	 */
510
	abstract public function makeEmptyContent();
511
512
	/**
513
	 * Creates a new Content object that acts as a redirect to the given page,
514
	 * or null if redirects are not supported by this content model.
515
	 *
516
	 * This default implementation always returns null. Subclasses supporting redirects
517
	 * must override this method.
518
	 *
519
	 * Note that subclasses that override this method to return a Content object
520
	 * should also override supportsRedirects() to return true.
521
	 *
522
	 * @since 1.21
523
	 *
524
	 * @param Title $destination The page to redirect to.
525
	 * @param string $text Text to include in the redirect, if possible.
526
	 *
527
	 * @return Content Always null.
528
	 */
529
	public function makeRedirectContent( Title $destination, $text = '' ) {
530
		return null;
531
	}
532
533
	/**
534
	 * Returns the model id that identifies the content model this
535
	 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
536
	 *
537
	 * @since 1.21
538
	 *
539
	 * @return string The model ID
540
	 */
541
	public function getModelID() {
542
		return $this->mModelID;
543
	}
544
545
	/**
546
	 * @since 1.21
547
	 *
548
	 * @param string $model_id The model to check
549
	 *
550
	 * @throws MWException If the model ID is not the ID of the content model supported by this
551
	 * ContentHandler.
552
	 */
553
	protected function checkModelID( $model_id ) {
554
		if ( $model_id !== $this->mModelID ) {
555
			throw new MWException( "Bad content model: " .
556
				"expected {$this->mModelID} " .
557
				"but got $model_id." );
558
		}
559
	}
560
561
	/**
562
	 * Returns a list of serialization formats supported by the
563
	 * serializeContent() and unserializeContent() methods of this
564
	 * ContentHandler.
565
	 *
566
	 * @since 1.21
567
	 *
568
	 * @return string[] List of serialization formats as MIME type like strings
569
	 */
570
	public function getSupportedFormats() {
571
		return $this->mSupportedFormats;
572
	}
573
574
	/**
575
	 * The format used for serialization/deserialization by default by this
576
	 * ContentHandler.
577
	 *
578
	 * This default implementation will return the first element of the array
579
	 * of formats that was passed to the constructor.
580
	 *
581
	 * @since 1.21
582
	 *
583
	 * @return string The name of the default serialization format as a MIME type
584
	 */
585
	public function getDefaultFormat() {
586
		return $this->mSupportedFormats[0];
587
	}
588
589
	/**
590
	 * Returns true if $format is a serialization format supported by this
591
	 * ContentHandler, and false otherwise.
592
	 *
593
	 * Note that if $format is null, this method always returns true, because
594
	 * null means "use the default format".
595
	 *
596
	 * @since 1.21
597
	 *
598
	 * @param string $format The serialization format to check
599
	 *
600
	 * @return bool
601
	 */
602
	public function isSupportedFormat( $format ) {
603
		if ( !$format ) {
604
			return true; // this means "use the default"
605
		}
606
607
		return in_array( $format, $this->mSupportedFormats );
608
	}
609
610
	/**
611
	 * Convenient for checking whether a format provided as a parameter is actually supported.
612
	 *
613
	 * @param string $format The serialization format to check
614
	 *
615
	 * @throws MWException If the format is not supported by this content handler.
616
	 */
617
	protected function checkFormat( $format ) {
618
		if ( !$this->isSupportedFormat( $format ) ) {
619
			throw new MWException(
620
				"Format $format is not supported for content model "
621
				. $this->getModelID()
622
			);
623
		}
624
	}
625
626
	/**
627
	 * Returns overrides for action handlers.
628
	 * Classes listed here will be used instead of the default one when
629
	 * (and only when) $wgActions[$action] === true. This allows subclasses
630
	 * to override the default action handlers.
631
	 *
632
	 * @since 1.21
633
	 *
634
	 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
635
	 *  either the full qualified class name of an Action class, a callable taking ( Page $page,
636
	 *  IContextSource $context = null ) as parameters and returning an Action object, or an actual
637
	 *  Action object. An empty array in this default implementation.
638
	 *
639
	 * @see Action::factory
640
	 */
641
	public function getActionOverrides() {
642
		return [];
643
	}
644
645
	/**
646
	 * Factory for creating an appropriate DifferenceEngine for this content model.
647
	 *
648
	 * @since 1.21
649
	 *
650
	 * @param IContextSource $context Context to use, anything else will be ignored.
651
	 * @param int $old Revision ID we want to show and diff with.
652
	 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
653
	 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
654
	 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
655
	 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
656
	 *
657
	 * @return DifferenceEngine
658
	 */
659
	public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
660
		$rcid = 0, // FIXME: Deprecated, no longer used
661
		$refreshCache = false, $unhide = false ) {
662
663
		// hook: get difference engine
664
		$differenceEngine = null;
665
		if ( !Hooks::run( 'GetDifferenceEngine',
666
			[ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ]
667
		) ) {
668
			return $differenceEngine;
669
		}
670
		$diffEngineClass = $this->getDiffEngineClass();
671
		return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
672
	}
673
674
	/**
675
	 * Get the language in which the content of the given page is written.
676
	 *
677
	 * This default implementation just returns $wgContLang (except for pages
678
	 * in the MediaWiki namespace)
679
	 *
680
	 * Note that the pages language is not cacheable, since it may in some
681
	 * cases depend on user settings.
682
	 *
683
	 * Also note that the page language may or may not depend on the actual content of the page,
684
	 * that is, this method may load the content in order to determine the language.
685
	 *
686
	 * @since 1.21
687
	 *
688
	 * @param Title $title The page to determine the language for.
689
	 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
690
	 *
691
	 * @return Language The page's language
692
	 */
693
	public function getPageLanguage( Title $title, Content $content = null ) {
694
		global $wgContLang, $wgLang;
695
		$pageLang = $wgContLang;
696
697
		if ( $title->getNamespace() == NS_MEDIAWIKI ) {
698
			// Parse mediawiki messages with correct target language
699
			list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
700
			$pageLang = Language::factory( $lang );
701
		}
702
703
		Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
704
705
		return wfGetLangObj( $pageLang );
706
	}
707
708
	/**
709
	 * Get the language in which the content of this page is written when
710
	 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
711
	 * specified a preferred variant, the variant will be used.
712
	 *
713
	 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
714
	 * the user specified a preferred variant.
715
	 *
716
	 * Note that the pages view language is not cacheable, since it depends on user settings.
717
	 *
718
	 * Also note that the page language may or may not depend on the actual content of the page,
719
	 * that is, this method may load the content in order to determine the language.
720
	 *
721
	 * @since 1.21
722
	 *
723
	 * @param Title $title The page to determine the language for.
724
	 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
725
	 *
726
	 * @return Language The page's language for viewing
727
	 */
728
	public function getPageViewLanguage( Title $title, Content $content = null ) {
729
		$pageLang = $this->getPageLanguage( $title, $content );
730
731
		if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
732
			// If the user chooses a variant, the content is actually
733
			// in a language whose code is the variant code.
734
			$variant = $pageLang->getPreferredVariant();
735
			if ( $pageLang->getCode() !== $variant ) {
736
				$pageLang = Language::factory( $variant );
737
			}
738
		}
739
740
		return $pageLang;
741
	}
742
743
	/**
744
	 * Determines whether the content type handled by this ContentHandler
745
	 * can be used on the given page.
746
	 *
747
	 * This default implementation always returns true.
748
	 * Subclasses may override this to restrict the use of this content model to specific locations,
749
	 * typically based on the namespace or some other aspect of the title, such as a special suffix
750
	 * (e.g. ".svg" for SVG content).
751
	 *
752
	 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
753
	 * content model can be used where.
754
	 *
755
	 * @param Title $title The page's title.
756
	 *
757
	 * @return bool True if content of this kind can be used on the given page, false otherwise.
758
	 */
759
	public function canBeUsedOn( Title $title ) {
760
		$ok = true;
761
762
		Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
763
764
		return $ok;
765
	}
766
767
	/**
768
	 * Returns the name of the diff engine to use.
769
	 *
770
	 * @since 1.21
771
	 *
772
	 * @return string
773
	 */
774
	protected function getDiffEngineClass() {
775
		return DifferenceEngine::class;
776
	}
777
778
	/**
779
	 * Attempts to merge differences between three versions. Returns a new
780
	 * Content object for a clean merge and false for failure or a conflict.
781
	 *
782
	 * This default implementation always returns false.
783
	 *
784
	 * @since 1.21
785
	 *
786
	 * @param Content $oldContent The page's previous content.
787
	 * @param Content $myContent One of the page's conflicting contents.
788
	 * @param Content $yourContent One of the page's conflicting contents.
789
	 *
790
	 * @return Content|bool Always false.
791
	 */
792
	public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
793
		return false;
794
	}
795
796
	/**
797
	 * Return an applicable auto-summary if one exists for the given edit.
798
	 *
799
	 * @since 1.21
800
	 *
801
	 * @param Content $oldContent The previous text of the page.
802
	 * @param Content $newContent The submitted text of the page.
803
	 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
804
	 *
805
	 * @return string An appropriate auto-summary, or an empty string.
806
	 */
807
	public function getAutosummary( Content $oldContent = null, Content $newContent = null,
808
		$flags ) {
809
		// Decide what kind of auto-summary is needed.
810
811
		// Redirect auto-summaries
812
813
		/**
814
		 * @var $ot Title
815
		 * @var $rt Title
816
		 */
817
818
		$ot = !is_null( $oldContent ) ? $oldContent->getRedirectTarget() : null;
819
		$rt = !is_null( $newContent ) ? $newContent->getRedirectTarget() : null;
820
821
		if ( is_object( $rt ) ) {
822
			if ( !is_object( $ot )
823
				|| !$rt->equals( $ot )
824
				|| $ot->getFragment() != $rt->getFragment()
825
			) {
826
				$truncatedtext = $newContent->getTextForSummary(
827
					250
828
					- strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
829
					- strlen( $rt->getFullText() ) );
830
831
				return wfMessage( 'autoredircomment', $rt->getFullText() )
832
					->rawParams( $truncatedtext )->inContentLanguage()->text();
833
			}
834
		}
835
836
		// New page auto-summaries
837
		if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
838
			// If they're making a new article, give its text, truncated, in
839
			// the summary.
840
841
			$truncatedtext = $newContent->getTextForSummary(
842
				200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
843
844
			return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
845
				->inContentLanguage()->text();
846
		}
847
848
		// Blanking auto-summaries
849
		if ( !empty( $oldContent ) && $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
850
			return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
851
		} elseif ( !empty( $oldContent )
852
			&& $oldContent->getSize() > 10 * $newContent->getSize()
853
			&& $newContent->getSize() < 500
854
		) {
855
			// Removing more than 90% of the article
856
857
			$truncatedtext = $newContent->getTextForSummary(
858
				200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
859
860
			return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
861
				->inContentLanguage()->text();
862
		}
863
864
		// New blank article auto-summary
865
		if ( $flags & EDIT_NEW && $newContent->isEmpty() ) {
866
			return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
867
		}
868
869
		// If we reach this point, there's no applicable auto-summary for our
870
		// case, so our auto-summary is empty.
871
		return '';
872
	}
873
874
	/**
875
	 * Auto-generates a deletion reason
876
	 *
877
	 * @since 1.21
878
	 *
879
	 * @param Title $title The page's title
880
	 * @param bool &$hasHistory Whether the page has a history
881
	 *
882
	 * @return mixed String containing deletion reason or empty string, or
883
	 *    boolean false if no revision occurred
884
	 *
885
	 * @todo &$hasHistory is extremely ugly, it's here because
886
	 * WikiPage::getAutoDeleteReason() and Article::generateReason()
887
	 * have it / want it.
888
	 */
889
	public function getAutoDeleteReason( Title $title, &$hasHistory ) {
890
		$dbr = wfGetDB( DB_REPLICA );
891
892
		// Get the last revision
893
		$rev = Revision::newFromTitle( $title );
894
895
		if ( is_null( $rev ) ) {
896
			return false;
897
		}
898
899
		// Get the article's contents
900
		$content = $rev->getContent();
901
		$blank = false;
902
903
		// If the page is blank, use the text from the previous revision,
904
		// which can only be blank if there's a move/import/protect dummy
905
		// revision involved
906
		if ( !$content || $content->isEmpty() ) {
907
			$prev = $rev->getPrevious();
908
909
			if ( $prev ) {
910
				$rev = $prev;
911
				$content = $rev->getContent();
912
				$blank = true;
913
			}
914
		}
915
916
		$this->checkModelID( $rev->getContentModel() );
917
918
		// Find out if there was only one contributor
919
		// Only scan the last 20 revisions
920
		$res = $dbr->select( 'revision', 'rev_user_text',
921
			[
922
				'rev_page' => $title->getArticleID(),
923
				$dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
924
			],
925
			__METHOD__,
926
			[ 'LIMIT' => 20 ]
927
		);
928
929
		if ( $res === false ) {
930
			// This page has no revisions, which is very weird
931
			return false;
932
		}
933
934
		$hasHistory = ( $res->numRows() > 1 );
935
		$row = $dbr->fetchObject( $res );
936
937
		if ( $row ) { // $row is false if the only contributor is hidden
938
			$onlyAuthor = $row->rev_user_text;
939
			// Try to find a second contributor
940
			foreach ( $res as $row ) {
941
				if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
942
					$onlyAuthor = false;
943
					break;
944
				}
945
			}
946
		} else {
947
			$onlyAuthor = false;
948
		}
949
950
		// Generate the summary with a '$1' placeholder
951
		if ( $blank ) {
952
			// The current revision is blank and the one before is also
953
			// blank. It's just not our lucky day
954
			$reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
955
		} else {
956
			if ( $onlyAuthor ) {
957
				$reason = wfMessage(
958
					'excontentauthor',
959
					'$1',
960
					$onlyAuthor
961
				)->inContentLanguage()->text();
962
			} else {
963
				$reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
964
			}
965
		}
966
967
		if ( $reason == '-' ) {
968
			// Allow these UI messages to be blanked out cleanly
969
			return '';
970
		}
971
972
		// Max content length = max comment length - length of the comment (excl. $1)
973
		$text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
974
975
		// Now replace the '$1' placeholder
976
		$reason = str_replace( '$1', $text, $reason );
977
978
		return $reason;
979
	}
980
981
	/**
982
	 * Get the Content object that needs to be saved in order to undo all revisions
983
	 * between $undo and $undoafter. Revisions must belong to the same page,
984
	 * must exist and must not be deleted.
985
	 *
986
	 * @since 1.21
987
	 *
988
	 * @param Revision $current The current text
989
	 * @param Revision $undo The revision to undo
990
	 * @param Revision $undoafter Must be an earlier revision than $undo
991
	 *
992
	 * @return mixed String on success, false on failure
993
	 */
994
	public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) {
995
		$cur_content = $current->getContent();
996
997
		if ( empty( $cur_content ) ) {
998
			return false; // no page
999
		}
1000
1001
		$undo_content = $undo->getContent();
1002
		$undoafter_content = $undoafter->getContent();
1003
1004
		if ( !$undo_content || !$undoafter_content ) {
1005
			return false; // no content to undo
1006
		}
1007
1008
		try {
1009
			$this->checkModelID( $cur_content->getModel() );
1010
			$this->checkModelID( $undo_content->getModel() );
1011
			if ( $current->getId() !== $undo->getId() ) {
1012
				// If we are undoing the most recent revision,
1013
				// its ok to revert content model changes. However
1014
				// if we are undoing a revision in the middle, then
1015
				// doing that will be confusing.
1016
				$this->checkModelID( $undoafter_content->getModel() );
1017
			}
1018
		} catch ( MWException $e ) {
1019
			// If the revisions have different content models
1020
			// just return false
1021
			return false;
1022
		}
1023
1024
		if ( $cur_content->equals( $undo_content ) ) {
1025
			// No use doing a merge if it's just a straight revert.
1026
			return $undoafter_content;
1027
		}
1028
1029
		$undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
1030
1031
		return $undone_content;
1032
	}
1033
1034
	/**
1035
	 * Get parser options suitable for rendering and caching the article
1036
	 *
1037
	 * @param IContextSource|User|string $context One of the following:
1038
	 *        - IContextSource: Use the User and the Language of the provided
1039
	 *                                            context
1040
	 *        - User: Use the provided User object and $wgLang for the language,
1041
	 *                                            so use an IContextSource object if possible.
1042
	 *        - 'canonical': Canonical options (anonymous user with default
1043
	 *                                            preferences and content language).
1044
	 *
1045
	 * @throws MWException
1046
	 * @return ParserOptions
1047
	 */
1048
	public function makeParserOptions( $context ) {
1049
		global $wgContLang, $wgEnableParserLimitReporting;
1050
1051
		if ( $context instanceof IContextSource ) {
1052
			$options = ParserOptions::newFromContext( $context );
1053
		} elseif ( $context instanceof User ) { // settings per user (even anons)
1054
			$options = ParserOptions::newFromUser( $context );
1055
		} elseif ( $context === 'canonical' ) { // canonical settings
1056
			$options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
1057
		} else {
1058
			throw new MWException( "Bad context for parser options: $context" );
1059
		}
1060
1061
		$options->enableLimitReport( $wgEnableParserLimitReporting ); // show inclusion/loop reports
1062
		$options->setTidy( true ); // fix bad HTML
1063
1064
		return $options;
1065
	}
1066
1067
	/**
1068
	 * Returns true for content models that support caching using the
1069
	 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1070
	 *
1071
	 * @since 1.21
1072
	 *
1073
	 * @return bool Always false.
1074
	 */
1075
	public function isParserCacheSupported() {
1076
		return false;
1077
	}
1078
1079
	/**
1080
	 * Returns true if this content model supports sections.
1081
	 * This default implementation returns false.
1082
	 *
1083
	 * Content models that return true here should also implement
1084
	 * Content::getSection, Content::replaceSection, etc. to handle sections..
1085
	 *
1086
	 * @return bool Always false.
1087
	 */
1088
	public function supportsSections() {
1089
		return false;
1090
	}
1091
1092
	/**
1093
	 * Returns true if this content model supports categories.
1094
	 * The default implementation returns true.
1095
	 *
1096
	 * @return bool Always true.
1097
	 */
1098
	public function supportsCategories() {
1099
		return true;
1100
	}
1101
1102
	/**
1103
	 * Returns true if this content model supports redirects.
1104
	 * This default implementation returns false.
1105
	 *
1106
	 * Content models that return true here should also implement
1107
	 * ContentHandler::makeRedirectContent to return a Content object.
1108
	 *
1109
	 * @return bool Always false.
1110
	 */
1111
	public function supportsRedirects() {
1112
		return false;
1113
	}
1114
1115
	/**
1116
	 * Return true if this content model supports direct editing, such as via EditPage.
1117
	 *
1118
	 * @return bool Default is false, and true for TextContent and it's derivatives.
1119
	 */
1120
	public function supportsDirectEditing() {
1121
		return false;
1122
	}
1123
1124
	/**
1125
	 * Whether or not this content model supports direct editing via ApiEditPage
1126
	 *
1127
	 * @return bool Default is false, and true for TextContent and derivatives.
1128
	 */
1129
	public function supportsDirectApiEditing() {
1130
		return $this->supportsDirectEditing();
1131
	}
1132
1133
	/**
1134
	 * Call a legacy hook that uses text instead of Content objects.
1135
	 * Will log a warning when a matching hook function is registered.
1136
	 * If the textual representation of the content is changed by the
1137
	 * hook function, a new Content object is constructed from the new
1138
	 * text.
1139
	 *
1140
	 * @param string $event Event name
1141
	 * @param array $args Parameters passed to hook functions
1142
	 * @param string|null $deprecatedVersion Emit a deprecation notice
1143
	 *   when the hook is run for the provided version
1144
	 *
1145
	 * @return bool True if no handler aborted the hook
1146
	 */
1147
	public static function runLegacyHooks( $event, $args = [],
1148
		$deprecatedVersion = null
1149
	) {
1150
1151
		if ( !Hooks::isRegistered( $event ) ) {
1152
			return true; // nothing to do here
1153
		}
1154
1155
		// convert Content objects to text
1156
		$contentObjects = [];
1157
		$contentTexts = [];
1158
1159 View Code Duplication
		foreach ( $args as $k => $v ) {
1160
			if ( $v instanceof Content ) {
1161
				/* @var Content $v */
1162
1163
				$contentObjects[$k] = $v;
1164
1165
				$v = $v->serialize();
1166
				$contentTexts[$k] = $v;
1167
				$args[$k] = $v;
1168
			}
1169
		}
1170
1171
		// call the hook functions
1172
		$ok = Hooks::run( $event, $args, $deprecatedVersion );
1173
1174
		// see if the hook changed the text
1175 View Code Duplication
		foreach ( $contentTexts as $k => $orig ) {
1176
			/* @var Content $content */
1177
1178
			$modified = $args[$k];
1179
			$content = $contentObjects[$k];
1180
1181
			if ( $modified !== $orig ) {
1182
				// text was changed, create updated Content object
1183
				$content = $content->getContentHandler()->unserializeContent( $modified );
1184
			}
1185
1186
			$args[$k] = $content;
1187
		}
1188
1189
		return $ok;
1190
	}
1191
1192
	/**
1193
	 * Get fields definition for search index
1194
	 *
1195
	 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1196
	 *       field mappings here. (see T142670 and T143409)
1197
	 *
1198
	 * @param SearchEngine $engine
1199
	 * @return SearchIndexField[] List of fields this content handler can provide.
1200
	 * @since 1.28
1201
	 */
1202
	public function getFieldsForSearchIndex( SearchEngine $engine ) {
1203
		$fields['category'] = $engine->makeSearchFieldMapping(
0 ignored issues
show
Coding Style Comprehensibility introduced by
$fields was never initialized. Although not strictly required by PHP, it is generally a good practice to add $fields = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
1204
			'category',
1205
			SearchIndexField::INDEX_TYPE_TEXT
1206
		);
1207
1208
		$fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1209
1210
		$fields['external_link'] = $engine->makeSearchFieldMapping(
1211
			'external_link',
1212
			SearchIndexField::INDEX_TYPE_KEYWORD
1213
		);
1214
1215
		$fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1216
			'outgoing_link',
1217
			SearchIndexField::INDEX_TYPE_KEYWORD
1218
		);
1219
1220
		$fields['template'] = $engine->makeSearchFieldMapping(
1221
			'template',
1222
			SearchIndexField::INDEX_TYPE_KEYWORD
1223
		);
1224
1225
		$fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1226
1227
		return $fields;
1228
	}
1229
1230
	/**
1231
	 * Add new field definition to array.
1232
	 * @param SearchIndexField[] $fields
1233
	 * @param SearchEngine       $engine
1234
	 * @param string             $name
1235
	 * @param int                $type
1236
	 * @return SearchIndexField[] new field defs
1237
	 * @since 1.28
1238
	 */
1239
	protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1240
		$fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1241
		return $fields;
1242
	}
1243
1244
	/**
1245
	 * Return fields to be indexed by search engine
1246
	 * as representation of this document.
1247
	 * Overriding class should call parent function or take care of calling
1248
	 * the SearchDataForIndex hook.
1249
	 * @param WikiPage     $page Page to index
1250
	 * @param ParserOutput $output
1251
	 * @param SearchEngine $engine Search engine for which we are indexing
1252
	 * @return array Map of name=>value for fields
1253
	 * @since 1.28
1254
	 */
1255
	public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
1256
	                                       SearchEngine $engine ) {
1257
		$fieldData = [];
1258
		$content = $page->getContent();
1259
1260
		if ( $content ) {
1261
			$searchDataExtractor = new ParserOutputSearchDataExtractor();
1262
1263
			$fieldData['category'] = $searchDataExtractor->getCategories( $output );
1264
			$fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1265
			$fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1266
			$fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1267
1268
			$text = $content->getTextForSearchIndex();
1269
1270
			$fieldData['text'] = $text;
1271
			$fieldData['source_text'] = $text;
1272
			$fieldData['text_bytes'] = $content->getSize();
1273
		}
1274
1275
		Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1276
		return $fieldData;
1277
	}
1278
1279
	/**
1280
	 * Produce page output suitable for indexing.
1281
	 *
1282
	 * Specific content handlers may override it if they need different content handling.
1283
	 *
1284
	 * @param WikiPage    $page
1285
	 * @param ParserCache $cache
1286
	 * @return ParserOutput
1287
	 */
1288
	public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1289
		$parserOptions = $page->makeParserOptions( 'canonical' );
1290
		$revId = $page->getRevision()->getId();
1291
		if ( $cache ) {
1292
			$parserOutput = $cache->get( $page, $parserOptions );
1293
		}
1294
		if ( empty( $parserOutput ) ) {
1295
			$parserOutput =
1296
				$page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
1297
			if ( $cache ) {
1298
				$cache->save( $parserOutput, $page, $parserOptions );
1299
			}
1300
		}
1301
		return $parserOutput;
1302
	}
1303
1304
}
1305