Completed
Branch master (0c9f05)
by
unknown
29:21
created

ContentHandler::getFieldsForSearchIndex()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 27
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 16
nc 1
nop 1
dl 0
loc 27
rs 8.8571
c 1
b 0
f 0
1
<?php
2
3
use MediaWiki\Search\ParserOutputSearchDataExtractor;
4
5
/**
6
 * Base class for content handling.
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License along
19
 * with this program; if not, write to the Free Software Foundation, Inc.,
20
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21
 * http://www.gnu.org/copyleft/gpl.html
22
 *
23
 * @since 1.21
24
 *
25
 * @file
26
 * @ingroup Content
27
 *
28
 * @author Daniel Kinzler
29
 */
30
31
/**
32
 * Exception representing a failure to serialize or unserialize a content object.
33
 *
34
 * @ingroup Content
35
 */
36
class MWContentSerializationException extends MWException {
37
}
38
39
/**
40
 * Exception thrown when an unregistered content model is requested. This error
41
 * can be triggered by user input, so a separate exception class is provided so
42
 * callers can substitute a context-specific, internationalised error message.
43
 *
44
 * @ingroup Content
45
 * @since 1.27
46
 */
47
class MWUnknownContentModelException extends MWException {
48
	/** @var string The name of the unknown content model */
49
	private $modelId;
50
51
	/** @param string $modelId */
52
	function __construct( $modelId ) {
53
		parent::__construct( "The content model '$modelId' is not registered on this wiki.\n" .
54
			'See https://www.mediawiki.org/wiki/Content_handlers to find out which extensions ' .
55
			'handle this content model.' );
56
		$this->modelId = $modelId;
57
	}
58
59
	/** @return string */
60
	public function getModelId() {
61
		return $this->modelId;
62
	}
63
}
64
65
/**
66
 * A content handler knows how do deal with a specific type of content on a wiki
67
 * page. Content is stored in the database in a serialized form (using a
68
 * serialization format a.k.a. MIME type) and is unserialized into its native
69
 * PHP representation (the content model), which is wrapped in an instance of
70
 * the appropriate subclass of Content.
71
 *
72
 * ContentHandler instances are stateless singletons that serve, among other
73
 * things, as a factory for Content objects. Generally, there is one subclass
74
 * of ContentHandler and one subclass of Content for every type of content model.
75
 *
76
 * Some content types have a flat model, that is, their native representation
77
 * is the same as their serialized form. Examples would be JavaScript and CSS
78
 * code. As of now, this also applies to wikitext (MediaWiki's default content
79
 * type), but wikitext content may be represented by a DOM or AST structure in
80
 * the future.
81
 *
82
 * @ingroup Content
83
 */
84
abstract class ContentHandler {
85
	/**
86
	 * Switch for enabling deprecation warnings. Used by ContentHandler::deprecated()
87
	 * and ContentHandler::runLegacyHooks().
88
	 *
89
	 * Once the ContentHandler code has settled in a bit, this should be set to true to
90
	 * make extensions etc. show warnings when using deprecated functions and hooks.
91
	 */
92
	protected static $enableDeprecationWarnings = false;
93
94
	/**
95
	 * Convenience function for getting flat text from a Content object. This
96
	 * should only be used in the context of backwards compatibility with code
97
	 * that is not yet able to handle Content objects!
98
	 *
99
	 * If $content is null, this method returns the empty string.
100
	 *
101
	 * If $content is an instance of TextContent, this method returns the flat
102
	 * text as returned by $content->getNativeData().
103
	 *
104
	 * If $content is not a TextContent object, the behavior of this method
105
	 * depends on the global $wgContentHandlerTextFallback:
106
	 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
107
	 *   TextContent object, an MWException is thrown.
108
	 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
109
	 *   TextContent object, $content->serialize() is called to get a string
110
	 *   form of the content.
111
	 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
112
	 *   TextContent object, this method returns null.
113
	 * - otherwise, the behavior is undefined.
114
	 *
115
	 * @since 1.21
116
	 *
117
	 * @param Content $content
118
	 *
119
	 * @throws MWException If the content is not an instance of TextContent and
120
	 * wgContentHandlerTextFallback was set to 'fail'.
121
	 * @return string|null Textual form of the content, if available.
122
	 */
123
	public static function getContentText( Content $content = null ) {
124
		global $wgContentHandlerTextFallback;
125
126
		if ( is_null( $content ) ) {
127
			return '';
128
		}
129
130
		if ( $content instanceof TextContent ) {
131
			return $content->getNativeData();
132
		}
133
134
		wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
135
136
		if ( $wgContentHandlerTextFallback == 'fail' ) {
137
			throw new MWException(
138
				"Attempt to get text from Content with model " .
139
				$content->getModel()
140
			);
141
		}
142
143
		if ( $wgContentHandlerTextFallback == 'serialize' ) {
144
			return $content->serialize();
145
		}
146
147
		return null;
148
	}
149
150
	/**
151
	 * Convenience function for creating a Content object from a given textual
152
	 * representation.
153
	 *
154
	 * $text will be deserialized into a Content object of the model specified
155
	 * by $modelId (or, if that is not given, $title->getContentModel()) using
156
	 * the given format.
157
	 *
158
	 * @since 1.21
159
	 *
160
	 * @param string $text The textual representation, will be
161
	 *    unserialized to create the Content object
162
	 * @param Title $title The title of the page this text belongs to.
163
	 *    Required if $modelId is not provided.
164
	 * @param string $modelId The model to deserialize to. If not provided,
165
	 *    $title->getContentModel() is used.
166
	 * @param string $format The format to use for deserialization. If not
167
	 *    given, the model's default format is used.
168
	 *
169
	 * @throws MWException If model ID or format is not supported or if the text can not be
170
	 * unserialized using the format.
171
	 * @return Content A Content object representing the text.
172
	 */
173
	public static function makeContent( $text, Title $title = null,
174
		$modelId = null, $format = null ) {
175
		if ( is_null( $modelId ) ) {
176
			if ( is_null( $title ) ) {
177
				throw new MWException( "Must provide a Title object or a content model ID." );
178
			}
179
180
			$modelId = $title->getContentModel();
181
		}
182
183
		$handler = ContentHandler::getForModelID( $modelId );
184
185
		return $handler->unserializeContent( $text, $format );
186
	}
187
188
	/**
189
	 * Returns the name of the default content model to be used for the page
190
	 * with the given title.
191
	 *
192
	 * Note: There should rarely be need to call this method directly.
193
	 * To determine the actual content model for a given page, use
194
	 * Title::getContentModel().
195
	 *
196
	 * Which model is to be used by default for the page is determined based
197
	 * on several factors:
198
	 * - The global setting $wgNamespaceContentModels specifies a content model
199
	 *   per namespace.
200
	 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
201
	 *   model.
202
	 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
203
	 *   model if they end in .js or .css, respectively.
204
	 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
205
	 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
206
	 *   or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
207
	 *   hook should be used instead if possible.
208
	 * - The hook TitleIsWikitextPage may be used to force a page to use the
209
	 *   wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
210
	 *   hook should be used instead if possible.
211
	 *
212
	 * If none of the above applies, the wikitext model is used.
213
	 *
214
	 * Note: this is used by, and may thus not use, Title::getContentModel()
215
	 *
216
	 * @since 1.21
217
	 *
218
	 * @param Title $title
219
	 *
220
	 * @return string Default model name for the page given by $title
221
	 */
222
	public static function getDefaultModelFor( Title $title ) {
223
		// NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
224
		//       because it is used to initialize the mContentModel member.
225
226
		$ns = $title->getNamespace();
227
228
		$ext = false;
229
		$m = null;
230
		$model = MWNamespace::getNamespaceContentModel( $ns );
231
232
		// Hook can determine default model
233
		if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
234
			if ( !is_null( $model ) ) {
235
				return $model;
236
			}
237
		}
238
239
		// Could this page contain code based on the title?
240
		$isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
241
		if ( $isCodePage ) {
242
			$ext = $m[1];
243
		}
244
245
		// Hook can force JS/CSS
246
		Hooks::run( 'TitleIsCssOrJsPage', [ $title, &$isCodePage ], '1.25' );
247
248
		// Is this a user subpage containing code?
249
		$isCodeSubpage = NS_USER == $ns
250
			&& !$isCodePage
251
			&& preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
252
		if ( $isCodeSubpage ) {
253
			$ext = $m[1];
254
		}
255
256
		// Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
257
		$isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
258
		$isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
259
260
		// Hook can override $isWikitext
261
		Hooks::run( 'TitleIsWikitextPage', [ $title, &$isWikitext ], '1.25' );
262
263
		if ( !$isWikitext ) {
264
			switch ( $ext ) {
265
				case 'js':
266
					return CONTENT_MODEL_JAVASCRIPT;
267
				case 'css':
268
					return CONTENT_MODEL_CSS;
269
				case 'json':
270
					return CONTENT_MODEL_JSON;
271
				default:
272
					return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
273
			}
274
		}
275
276
		// We established that it must be wikitext
277
278
		return CONTENT_MODEL_WIKITEXT;
279
	}
280
281
	/**
282
	 * Returns the appropriate ContentHandler singleton for the given title.
283
	 *
284
	 * @since 1.21
285
	 *
286
	 * @param Title $title
287
	 *
288
	 * @return ContentHandler
289
	 */
290
	public static function getForTitle( Title $title ) {
291
		$modelId = $title->getContentModel();
292
293
		return ContentHandler::getForModelID( $modelId );
294
	}
295
296
	/**
297
	 * Returns the appropriate ContentHandler singleton for the given Content
298
	 * object.
299
	 *
300
	 * @since 1.21
301
	 *
302
	 * @param Content $content
303
	 *
304
	 * @return ContentHandler
305
	 */
306
	public static function getForContent( Content $content ) {
307
		$modelId = $content->getModel();
308
309
		return ContentHandler::getForModelID( $modelId );
310
	}
311
312
	/**
313
	 * @var array A Cache of ContentHandler instances by model id
314
	 */
315
	protected static $handlers;
316
317
	/**
318
	 * Returns the ContentHandler singleton for the given model ID. Use the
319
	 * CONTENT_MODEL_XXX constants to identify the desired content model.
320
	 *
321
	 * ContentHandler singletons are taken from the global $wgContentHandlers
322
	 * array. Keys in that array are model names, the values are either
323
	 * ContentHandler singleton objects, or strings specifying the appropriate
324
	 * subclass of ContentHandler.
325
	 *
326
	 * If a class name is encountered when looking up the singleton for a given
327
	 * model name, the class is instantiated and the class name is replaced by
328
	 * the resulting singleton in $wgContentHandlers.
329
	 *
330
	 * If no ContentHandler is defined for the desired $modelId, the
331
	 * ContentHandler may be provided by the ContentHandlerForModelID hook.
332
	 * If no ContentHandler can be determined, an MWException is raised.
333
	 *
334
	 * @since 1.21
335
	 *
336
	 * @param string $modelId The ID of the content model for which to get a
337
	 *    handler. Use CONTENT_MODEL_XXX constants.
338
	 *
339
	 * @throws MWException For internal errors and problems in the configuration.
340
	 * @throws MWUnknownContentModelException If no handler is known for the model ID.
341
	 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
342
	 */
343
	public static function getForModelID( $modelId ) {
344
		global $wgContentHandlers;
345
346
		if ( isset( ContentHandler::$handlers[$modelId] ) ) {
347
			return ContentHandler::$handlers[$modelId];
348
		}
349
350
		if ( empty( $wgContentHandlers[$modelId] ) ) {
351
			$handler = null;
352
353
			Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
354
355
			if ( $handler === null ) {
356
				throw new MWUnknownContentModelException( $modelId );
357
			}
358
359
			if ( !( $handler instanceof ContentHandler ) ) {
360
				throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
361
			}
362
		} else {
363
			$classOrCallback = $wgContentHandlers[$modelId];
364
365
			if ( is_callable( $classOrCallback ) ) {
366
				$handler = call_user_func( $classOrCallback, $modelId );
367
			} else {
368
				$handler = new $classOrCallback( $modelId );
369
			}
370
371
			if ( !( $handler instanceof ContentHandler ) ) {
372
				throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
373
					"compatible with ContentHandler" );
374
			}
375
		}
376
377
		wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
378
			. ': ' . get_class( $handler ) );
379
380
		ContentHandler::$handlers[$modelId] = $handler;
381
382
		return ContentHandler::$handlers[$modelId];
383
	}
384
385
	/**
386
	 * Returns the localized name for a given content model.
387
	 *
388
	 * Model names are localized using system messages. Message keys
389
	 * have the form content-model-$name, where $name is getContentModelName( $id ).
390
	 *
391
	 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
392
	 *    constant or returned by Revision::getContentModel().
393
	 * @param Language|null $lang The language to parse the message in (since 1.26)
394
	 *
395
	 * @throws MWException If the model ID isn't known.
396
	 * @return string The content model's localized name.
397
	 */
398
	public static function getLocalizedName( $name, Language $lang = null ) {
399
		// Messages: content-model-wikitext, content-model-text,
400
		// content-model-javascript, content-model-css
401
		$key = "content-model-$name";
402
403
		$msg = wfMessage( $key );
404
		if ( $lang ) {
405
			$msg->inLanguage( $lang );
406
		}
407
408
		return $msg->exists() ? $msg->plain() : $name;
409
	}
410
411
	public static function getContentModels() {
412
		global $wgContentHandlers;
413
414
		return array_keys( $wgContentHandlers );
415
	}
416
417
	public static function getAllContentFormats() {
418
		global $wgContentHandlers;
419
420
		$formats = [];
421
422
		foreach ( $wgContentHandlers as $model => $class ) {
423
			$handler = ContentHandler::getForModelID( $model );
424
			$formats = array_merge( $formats, $handler->getSupportedFormats() );
425
		}
426
427
		$formats = array_unique( $formats );
428
429
		return $formats;
430
	}
431
432
	// ------------------------------------------------------------------------
433
434
	/**
435
	 * @var string
436
	 */
437
	protected $mModelID;
438
439
	/**
440
	 * @var string[]
441
	 */
442
	protected $mSupportedFormats;
443
444
	/**
445
	 * Constructor, initializing the ContentHandler instance with its model ID
446
	 * and a list of supported formats. Values for the parameters are typically
447
	 * provided as literals by subclass's constructors.
448
	 *
449
	 * @param string $modelId (use CONTENT_MODEL_XXX constants).
450
	 * @param string[] $formats List for supported serialization formats
451
	 *    (typically as MIME types)
452
	 */
453
	public function __construct( $modelId, $formats ) {
454
		$this->mModelID = $modelId;
455
		$this->mSupportedFormats = $formats;
456
457
		$this->mModelName = preg_replace( '/(Content)?Handler$/', '', get_class( $this ) );
0 ignored issues
show
Bug introduced by
The property mModelName does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
458
		$this->mModelName = preg_replace( '/[_\\\\]/', '', $this->mModelName );
459
		$this->mModelName = strtolower( $this->mModelName );
460
	}
461
462
	/**
463
	 * Serializes a Content object of the type supported by this ContentHandler.
464
	 *
465
	 * @since 1.21
466
	 *
467
	 * @param Content $content The Content object to serialize
468
	 * @param string $format The desired serialization format
469
	 *
470
	 * @return string Serialized form of the content
471
	 */
472
	abstract public function serializeContent( Content $content, $format = null );
473
474
	/**
475
	 * Applies transformations on export (returns the blob unchanged per default).
476
	 * Subclasses may override this to perform transformations such as conversion
477
	 * of legacy formats or filtering of internal meta-data.
478
	 *
479
	 * @param string $blob The blob to be exported
480
	 * @param string|null $format The blob's serialization format
481
	 *
482
	 * @return string
483
	 */
484
	public function exportTransform( $blob, $format = null ) {
485
		return $blob;
486
	}
487
488
	/**
489
	 * Unserializes a Content object of the type supported by this ContentHandler.
490
	 *
491
	 * @since 1.21
492
	 *
493
	 * @param string $blob Serialized form of the content
494
	 * @param string $format The format used for serialization
495
	 *
496
	 * @return Content The Content object created by deserializing $blob
497
	 */
498
	abstract public function unserializeContent( $blob, $format = null );
499
500
	/**
501
	 * Apply import transformation (per default, returns $blob unchanged).
502
	 * This gives subclasses an opportunity to transform data blobs on import.
503
	 *
504
	 * @since 1.24
505
	 *
506
	 * @param string $blob
507
	 * @param string|null $format
508
	 *
509
	 * @return string
510
	 */
511
	public function importTransform( $blob, $format = null ) {
512
		return $blob;
513
	}
514
515
	/**
516
	 * Creates an empty Content object of the type supported by this
517
	 * ContentHandler.
518
	 *
519
	 * @since 1.21
520
	 *
521
	 * @return Content
522
	 */
523
	abstract public function makeEmptyContent();
524
525
	/**
526
	 * Creates a new Content object that acts as a redirect to the given page,
527
	 * or null if redirects are not supported by this content model.
528
	 *
529
	 * This default implementation always returns null. Subclasses supporting redirects
530
	 * must override this method.
531
	 *
532
	 * Note that subclasses that override this method to return a Content object
533
	 * should also override supportsRedirects() to return true.
534
	 *
535
	 * @since 1.21
536
	 *
537
	 * @param Title $destination The page to redirect to.
538
	 * @param string $text Text to include in the redirect, if possible.
539
	 *
540
	 * @return Content Always null.
541
	 */
542
	public function makeRedirectContent( Title $destination, $text = '' ) {
543
		return null;
544
	}
545
546
	/**
547
	 * Returns the model id that identifies the content model this
548
	 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
549
	 *
550
	 * @since 1.21
551
	 *
552
	 * @return string The model ID
553
	 */
554
	public function getModelID() {
555
		return $this->mModelID;
556
	}
557
558
	/**
559
	 * @since 1.21
560
	 *
561
	 * @param string $model_id The model to check
562
	 *
563
	 * @throws MWException If the model ID is not the ID of the content model supported by this
564
	 * ContentHandler.
565
	 */
566
	protected function checkModelID( $model_id ) {
567
		if ( $model_id !== $this->mModelID ) {
568
			throw new MWException( "Bad content model: " .
569
				"expected {$this->mModelID} " .
570
				"but got $model_id." );
571
		}
572
	}
573
574
	/**
575
	 * Returns a list of serialization formats supported by the
576
	 * serializeContent() and unserializeContent() methods of this
577
	 * ContentHandler.
578
	 *
579
	 * @since 1.21
580
	 *
581
	 * @return string[] List of serialization formats as MIME type like strings
582
	 */
583
	public function getSupportedFormats() {
584
		return $this->mSupportedFormats;
585
	}
586
587
	/**
588
	 * The format used for serialization/deserialization by default by this
589
	 * ContentHandler.
590
	 *
591
	 * This default implementation will return the first element of the array
592
	 * of formats that was passed to the constructor.
593
	 *
594
	 * @since 1.21
595
	 *
596
	 * @return string The name of the default serialization format as a MIME type
597
	 */
598
	public function getDefaultFormat() {
599
		return $this->mSupportedFormats[0];
600
	}
601
602
	/**
603
	 * Returns true if $format is a serialization format supported by this
604
	 * ContentHandler, and false otherwise.
605
	 *
606
	 * Note that if $format is null, this method always returns true, because
607
	 * null means "use the default format".
608
	 *
609
	 * @since 1.21
610
	 *
611
	 * @param string $format The serialization format to check
612
	 *
613
	 * @return bool
614
	 */
615
	public function isSupportedFormat( $format ) {
616
		if ( !$format ) {
617
			return true; // this means "use the default"
618
		}
619
620
		return in_array( $format, $this->mSupportedFormats );
621
	}
622
623
	/**
624
	 * Convenient for checking whether a format provided as a parameter is actually supported.
625
	 *
626
	 * @param string $format The serialization format to check
627
	 *
628
	 * @throws MWException If the format is not supported by this content handler.
629
	 */
630
	protected function checkFormat( $format ) {
631
		if ( !$this->isSupportedFormat( $format ) ) {
632
			throw new MWException(
633
				"Format $format is not supported for content model "
634
				. $this->getModelID()
635
			);
636
		}
637
	}
638
639
	/**
640
	 * Returns overrides for action handlers.
641
	 * Classes listed here will be used instead of the default one when
642
	 * (and only when) $wgActions[$action] === true. This allows subclasses
643
	 * to override the default action handlers.
644
	 *
645
	 * @since 1.21
646
	 *
647
	 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
648
	 *  either the full qualified class name of an Action class, a callable taking ( Page $page,
649
	 *  IContextSource $context = null ) as parameters and returning an Action object, or an actual
650
	 *  Action object. An empty array in this default implementation.
651
	 *
652
	 * @see Action::factory
653
	 */
654
	public function getActionOverrides() {
655
		return [];
656
	}
657
658
	/**
659
	 * Factory for creating an appropriate DifferenceEngine for this content model.
660
	 *
661
	 * @since 1.21
662
	 *
663
	 * @param IContextSource $context Context to use, anything else will be ignored.
664
	 * @param int $old Revision ID we want to show and diff with.
665
	 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
666
	 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
667
	 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
668
	 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
669
	 *
670
	 * @return DifferenceEngine
671
	 */
672
	public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
673
		$rcid = 0, // FIXME: Deprecated, no longer used
674
		$refreshCache = false, $unhide = false ) {
675
676
		// hook: get difference engine
677
		$differenceEngine = null;
678
		if ( !Hooks::run( 'GetDifferenceEngine',
679
			[ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ]
680
		) ) {
681
			return $differenceEngine;
682
		}
683
		$diffEngineClass = $this->getDiffEngineClass();
684
		return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
685
	}
686
687
	/**
688
	 * Get the language in which the content of the given page is written.
689
	 *
690
	 * This default implementation just returns $wgContLang (except for pages
691
	 * in the MediaWiki namespace)
692
	 *
693
	 * Note that the pages language is not cacheable, since it may in some
694
	 * cases depend on user settings.
695
	 *
696
	 * Also note that the page language may or may not depend on the actual content of the page,
697
	 * that is, this method may load the content in order to determine the language.
698
	 *
699
	 * @since 1.21
700
	 *
701
	 * @param Title $title The page to determine the language for.
702
	 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
703
	 *
704
	 * @return Language The page's language
705
	 */
706
	public function getPageLanguage( Title $title, Content $content = null ) {
707
		global $wgContLang, $wgLang;
708
		$pageLang = $wgContLang;
709
710
		if ( $title->getNamespace() == NS_MEDIAWIKI ) {
711
			// Parse mediawiki messages with correct target language
712
			list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
713
			$pageLang = wfGetLangObj( $lang );
714
		}
715
716
		Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
717
718
		return wfGetLangObj( $pageLang );
719
	}
720
721
	/**
722
	 * Get the language in which the content of this page is written when
723
	 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
724
	 * specified a preferred variant, the variant will be used.
725
	 *
726
	 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
727
	 * the user specified a preferred variant.
728
	 *
729
	 * Note that the pages view language is not cacheable, since it depends on user settings.
730
	 *
731
	 * Also note that the page language may or may not depend on the actual content of the page,
732
	 * that is, this method may load the content in order to determine the language.
733
	 *
734
	 * @since 1.21
735
	 *
736
	 * @param Title $title The page to determine the language for.
737
	 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
738
	 *
739
	 * @return Language The page's language for viewing
740
	 */
741
	public function getPageViewLanguage( Title $title, Content $content = null ) {
742
		$pageLang = $this->getPageLanguage( $title, $content );
743
744
		if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
745
			// If the user chooses a variant, the content is actually
746
			// in a language whose code is the variant code.
747
			$variant = $pageLang->getPreferredVariant();
748
			if ( $pageLang->getCode() !== $variant ) {
749
				$pageLang = Language::factory( $variant );
750
			}
751
		}
752
753
		return $pageLang;
754
	}
755
756
	/**
757
	 * Determines whether the content type handled by this ContentHandler
758
	 * can be used on the given page.
759
	 *
760
	 * This default implementation always returns true.
761
	 * Subclasses may override this to restrict the use of this content model to specific locations,
762
	 * typically based on the namespace or some other aspect of the title, such as a special suffix
763
	 * (e.g. ".svg" for SVG content).
764
	 *
765
	 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
766
	 * content model can be used where.
767
	 *
768
	 * @param Title $title The page's title.
769
	 *
770
	 * @return bool True if content of this kind can be used on the given page, false otherwise.
771
	 */
772
	public function canBeUsedOn( Title $title ) {
773
		$ok = true;
774
775
		Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
776
777
		return $ok;
778
	}
779
780
	/**
781
	 * Returns the name of the diff engine to use.
782
	 *
783
	 * @since 1.21
784
	 *
785
	 * @return string
786
	 */
787
	protected function getDiffEngineClass() {
788
		return DifferenceEngine::class;
789
	}
790
791
	/**
792
	 * Attempts to merge differences between three versions. Returns a new
793
	 * Content object for a clean merge and false for failure or a conflict.
794
	 *
795
	 * This default implementation always returns false.
796
	 *
797
	 * @since 1.21
798
	 *
799
	 * @param Content $oldContent The page's previous content.
800
	 * @param Content $myContent One of the page's conflicting contents.
801
	 * @param Content $yourContent One of the page's conflicting contents.
802
	 *
803
	 * @return Content|bool Always false.
804
	 */
805
	public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
806
		return false;
807
	}
808
809
	/**
810
	 * Return an applicable auto-summary if one exists for the given edit.
811
	 *
812
	 * @since 1.21
813
	 *
814
	 * @param Content $oldContent The previous text of the page.
815
	 * @param Content $newContent The submitted text of the page.
816
	 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
817
	 *
818
	 * @return string An appropriate auto-summary, or an empty string.
819
	 */
820
	public function getAutosummary( Content $oldContent = null, Content $newContent = null,
821
		$flags ) {
822
		// Decide what kind of auto-summary is needed.
823
824
		// Redirect auto-summaries
825
826
		/**
827
		 * @var $ot Title
828
		 * @var $rt Title
829
		 */
830
831
		$ot = !is_null( $oldContent ) ? $oldContent->getRedirectTarget() : null;
832
		$rt = !is_null( $newContent ) ? $newContent->getRedirectTarget() : null;
833
834
		if ( is_object( $rt ) ) {
835
			if ( !is_object( $ot )
836
				|| !$rt->equals( $ot )
837
				|| $ot->getFragment() != $rt->getFragment()
838
			) {
839
				$truncatedtext = $newContent->getTextForSummary(
0 ignored issues
show
Bug introduced by
It seems like $newContent is not always an object, but can also be of type null. Maybe add an additional type check?

If a variable is not always an object, we recommend to add an additional type check to ensure your method call is safe:

function someFunction(A $objectMaybe = null)
{
    if ($objectMaybe instanceof A) {
        $objectMaybe->doSomething();
    }
}
Loading history...
840
					250
841
					- strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
842
					- strlen( $rt->getFullText() ) );
843
844
				return wfMessage( 'autoredircomment', $rt->getFullText() )
845
					->rawParams( $truncatedtext )->inContentLanguage()->text();
846
			}
847
		}
848
849
		// New page auto-summaries
850
		if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
851
			// If they're making a new article, give its text, truncated, in
852
			// the summary.
853
854
			$truncatedtext = $newContent->getTextForSummary(
855
				200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
856
857
			return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
858
				->inContentLanguage()->text();
859
		}
860
861
		// Blanking auto-summaries
862
		if ( !empty( $oldContent ) && $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
863
			return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
864
		} elseif ( !empty( $oldContent )
865
			&& $oldContent->getSize() > 10 * $newContent->getSize()
866
			&& $newContent->getSize() < 500
867
		) {
868
			// Removing more than 90% of the article
869
870
			$truncatedtext = $newContent->getTextForSummary(
871
				200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
872
873
			return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
874
				->inContentLanguage()->text();
875
		}
876
877
		// New blank article auto-summary
878
		if ( $flags & EDIT_NEW && $newContent->isEmpty() ) {
879
			return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
880
		}
881
882
		// If we reach this point, there's no applicable auto-summary for our
883
		// case, so our auto-summary is empty.
884
		return '';
885
	}
886
887
	/**
888
	 * Auto-generates a deletion reason
889
	 *
890
	 * @since 1.21
891
	 *
892
	 * @param Title $title The page's title
893
	 * @param bool &$hasHistory Whether the page has a history
894
	 *
895
	 * @return mixed String containing deletion reason or empty string, or
896
	 *    boolean false if no revision occurred
897
	 *
898
	 * @todo &$hasHistory is extremely ugly, it's here because
899
	 * WikiPage::getAutoDeleteReason() and Article::generateReason()
900
	 * have it / want it.
901
	 */
902
	public function getAutoDeleteReason( Title $title, &$hasHistory ) {
903
		$dbr = wfGetDB( DB_SLAVE );
904
905
		// Get the last revision
906
		$rev = Revision::newFromTitle( $title );
907
908
		if ( is_null( $rev ) ) {
909
			return false;
910
		}
911
912
		// Get the article's contents
913
		$content = $rev->getContent();
914
		$blank = false;
915
916
		// If the page is blank, use the text from the previous revision,
917
		// which can only be blank if there's a move/import/protect dummy
918
		// revision involved
919
		if ( !$content || $content->isEmpty() ) {
920
			$prev = $rev->getPrevious();
921
922
			if ( $prev ) {
923
				$rev = $prev;
924
				$content = $rev->getContent();
925
				$blank = true;
926
			}
927
		}
928
929
		$this->checkModelID( $rev->getContentModel() );
930
931
		// Find out if there was only one contributor
932
		// Only scan the last 20 revisions
933
		$res = $dbr->select( 'revision', 'rev_user_text',
934
			[
935
				'rev_page' => $title->getArticleID(),
936
				$dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
937
			],
938
			__METHOD__,
939
			[ 'LIMIT' => 20 ]
940
		);
941
942
		if ( $res === false ) {
943
			// This page has no revisions, which is very weird
944
			return false;
945
		}
946
947
		$hasHistory = ( $res->numRows() > 1 );
948
		$row = $dbr->fetchObject( $res );
0 ignored issues
show
Bug introduced by
It seems like $res defined by $dbr->select('revision',..., array('LIMIT' => 20)) on line 933 can also be of type boolean; however, IDatabase::fetchObject() does only seem to accept object<ResultWrapper>|object<stdClass>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
949
950
		if ( $row ) { // $row is false if the only contributor is hidden
951
			$onlyAuthor = $row->rev_user_text;
952
			// Try to find a second contributor
953
			foreach ( $res as $row ) {
0 ignored issues
show
Bug introduced by
The expression $res of type object<ResultWrapper>|boolean is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
954
				if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
955
					$onlyAuthor = false;
956
					break;
957
				}
958
			}
959
		} else {
960
			$onlyAuthor = false;
961
		}
962
963
		// Generate the summary with a '$1' placeholder
964
		if ( $blank ) {
965
			// The current revision is blank and the one before is also
966
			// blank. It's just not our lucky day
967
			$reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
968
		} else {
969
			if ( $onlyAuthor ) {
970
				$reason = wfMessage(
971
					'excontentauthor',
972
					'$1',
973
					$onlyAuthor
974
				)->inContentLanguage()->text();
975
			} else {
976
				$reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
977
			}
978
		}
979
980
		if ( $reason == '-' ) {
981
			// Allow these UI messages to be blanked out cleanly
982
			return '';
983
		}
984
985
		// Max content length = max comment length - length of the comment (excl. $1)
986
		$text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
987
988
		// Now replace the '$1' placeholder
989
		$reason = str_replace( '$1', $text, $reason );
990
991
		return $reason;
992
	}
993
994
	/**
995
	 * Get the Content object that needs to be saved in order to undo all revisions
996
	 * between $undo and $undoafter. Revisions must belong to the same page,
997
	 * must exist and must not be deleted.
998
	 *
999
	 * @since 1.21
1000
	 *
1001
	 * @param Revision $current The current text
1002
	 * @param Revision $undo The revision to undo
1003
	 * @param Revision $undoafter Must be an earlier revision than $undo
1004
	 *
1005
	 * @return mixed String on success, false on failure
1006
	 */
1007
	public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) {
1008
		$cur_content = $current->getContent();
1009
1010
		if ( empty( $cur_content ) ) {
1011
			return false; // no page
1012
		}
1013
1014
		$undo_content = $undo->getContent();
1015
		$undoafter_content = $undoafter->getContent();
1016
1017
		if ( !$undo_content || !$undoafter_content ) {
1018
			return false; // no content to undo
1019
		}
1020
1021
		$this->checkModelID( $cur_content->getModel() );
1022
		$this->checkModelID( $undo_content->getModel() );
1023
		$this->checkModelID( $undoafter_content->getModel() );
1024
1025
		if ( $cur_content->equals( $undo_content ) ) {
1026
			// No use doing a merge if it's just a straight revert.
1027
			return $undoafter_content;
1028
		}
1029
1030
		$undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
1031
1032
		return $undone_content;
1033
	}
1034
1035
	/**
1036
	 * Get parser options suitable for rendering and caching the article
1037
	 *
1038
	 * @param IContextSource|User|string $context One of the following:
1039
	 *        - IContextSource: Use the User and the Language of the provided
1040
	 *                                            context
1041
	 *        - User: Use the provided User object and $wgLang for the language,
1042
	 *                                            so use an IContextSource object if possible.
1043
	 *        - 'canonical': Canonical options (anonymous user with default
1044
	 *                                            preferences and content language).
1045
	 *
1046
	 * @throws MWException
1047
	 * @return ParserOptions
1048
	 */
1049
	public function makeParserOptions( $context ) {
1050
		global $wgContLang, $wgEnableParserLimitReporting;
1051
1052
		if ( $context instanceof IContextSource ) {
1053
			$options = ParserOptions::newFromContext( $context );
1054
		} elseif ( $context instanceof User ) { // settings per user (even anons)
1055
			$options = ParserOptions::newFromUser( $context );
1056
		} elseif ( $context === 'canonical' ) { // canonical settings
1057
			$options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
1058
		} else {
1059
			throw new MWException( "Bad context for parser options: $context" );
1060
		}
1061
1062
		$options->enableLimitReport( $wgEnableParserLimitReporting ); // show inclusion/loop reports
1063
		$options->setTidy( true ); // fix bad HTML
1064
1065
		return $options;
1066
	}
1067
1068
	/**
1069
	 * Returns true for content models that support caching using the
1070
	 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1071
	 *
1072
	 * @since 1.21
1073
	 *
1074
	 * @return bool Always false.
1075
	 */
1076
	public function isParserCacheSupported() {
1077
		return false;
1078
	}
1079
1080
	/**
1081
	 * Returns true if this content model supports sections.
1082
	 * This default implementation returns false.
1083
	 *
1084
	 * Content models that return true here should also implement
1085
	 * Content::getSection, Content::replaceSection, etc. to handle sections..
1086
	 *
1087
	 * @return bool Always false.
1088
	 */
1089
	public function supportsSections() {
1090
		return false;
1091
	}
1092
1093
	/**
1094
	 * Returns true if this content model supports categories.
1095
	 * The default implementation returns true.
1096
	 *
1097
	 * @return bool Always true.
1098
	 */
1099
	public function supportsCategories() {
1100
		return true;
1101
	}
1102
1103
	/**
1104
	 * Returns true if this content model supports redirects.
1105
	 * This default implementation returns false.
1106
	 *
1107
	 * Content models that return true here should also implement
1108
	 * ContentHandler::makeRedirectContent to return a Content object.
1109
	 *
1110
	 * @return bool Always false.
1111
	 */
1112
	public function supportsRedirects() {
1113
		return false;
1114
	}
1115
1116
	/**
1117
	 * Return true if this content model supports direct editing, such as via EditPage.
1118
	 *
1119
	 * @return bool Default is false, and true for TextContent and it's derivatives.
1120
	 */
1121
	public function supportsDirectEditing() {
1122
		return false;
1123
	}
1124
1125
	/**
1126
	 * Whether or not this content model supports direct editing via ApiEditPage
1127
	 *
1128
	 * @return bool Default is false, and true for TextContent and derivatives.
1129
	 */
1130
	public function supportsDirectApiEditing() {
1131
		return $this->supportsDirectEditing();
1132
	}
1133
1134
	/**
1135
	 * Logs a deprecation warning, visible if $wgDevelopmentWarnings, but only if
1136
	 * self::$enableDeprecationWarnings is set to true.
1137
	 *
1138
	 * @param string $func The name of the deprecated function
1139
	 * @param string $version The version since the method is deprecated. Usually 1.21
1140
	 *   for ContentHandler related stuff.
1141
	 * @param string|bool $component : Component to which the function belongs.
1142
	 *   If false, it is assumed the function is in MediaWiki core.
1143
	 *
1144
	 * @see ContentHandler::$enableDeprecationWarnings
1145
	 * @see wfDeprecated
1146
	 */
1147
	public static function deprecated( $func, $version, $component = false ) {
1148
		if ( self::$enableDeprecationWarnings ) {
1149
			wfDeprecated( $func, $version, $component, 3 );
1150
		}
1151
	}
1152
1153
	/**
1154
	 * Call a legacy hook that uses text instead of Content objects.
1155
	 * Will log a warning when a matching hook function is registered.
1156
	 * If the textual representation of the content is changed by the
1157
	 * hook function, a new Content object is constructed from the new
1158
	 * text.
1159
	 *
1160
	 * @param string $event Event name
1161
	 * @param array $args Parameters passed to hook functions
1162
	 * @param string|null $deprecatedVersion Emit a deprecation notice
1163
	 *   when the hook is run for the provided version
1164
	 *
1165
	 * @return bool True if no handler aborted the hook
1166
	 */
1167
	public static function runLegacyHooks( $event, $args = [],
1168
		$deprecatedVersion = null
1169
	) {
1170
1171
		if ( !Hooks::isRegistered( $event ) ) {
1172
			return true; // nothing to do here
1173
		}
1174
1175
		// convert Content objects to text
1176
		$contentObjects = [];
1177
		$contentTexts = [];
1178
1179 View Code Duplication
		foreach ( $args as $k => $v ) {
1180
			if ( $v instanceof Content ) {
1181
				/* @var Content $v */
1182
1183
				$contentObjects[$k] = $v;
1184
1185
				$v = $v->serialize();
1186
				$contentTexts[$k] = $v;
1187
				$args[$k] = $v;
1188
			}
1189
		}
1190
1191
		// call the hook functions
1192
		$ok = Hooks::run( $event, $args, $deprecatedVersion );
1193
1194
		// see if the hook changed the text
1195 View Code Duplication
		foreach ( $contentTexts as $k => $orig ) {
1196
			/* @var Content $content */
1197
1198
			$modified = $args[$k];
1199
			$content = $contentObjects[$k];
1200
1201
			if ( $modified !== $orig ) {
1202
				// text was changed, create updated Content object
1203
				$content = $content->getContentHandler()->unserializeContent( $modified );
1204
			}
1205
1206
			$args[$k] = $content;
1207
		}
1208
1209
		return $ok;
1210
	}
1211
1212
	/**
1213
	 * Get fields definition for search index
1214
	 *
1215
	 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1216
	 *       field mappings here. (see T142670 and T143409)
1217
	 *
1218
	 * @param SearchEngine $engine
1219
	 * @return SearchIndexField[] List of fields this content handler can provide.
1220
	 * @since 1.28
1221
	 */
1222
	public function getFieldsForSearchIndex( SearchEngine $engine ) {
1223
		$fields['category'] = $engine->makeSearchFieldMapping(
0 ignored issues
show
Coding Style Comprehensibility introduced by
$fields was never initialized. Although not strictly required by PHP, it is generally a good practice to add $fields = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
1224
			'category',
1225
			SearchIndexField::INDEX_TYPE_TEXT
1226
		);
1227
1228
		$fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1229
1230
		$fields['external_link'] = $engine->makeSearchFieldMapping(
1231
			'external_link',
1232
			SearchIndexField::INDEX_TYPE_KEYWORD
1233
		);
1234
1235
		$fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1236
			'outgoing_link',
1237
			SearchIndexField::INDEX_TYPE_KEYWORD
1238
		);
1239
1240
		$fields['template'] = $engine->makeSearchFieldMapping(
1241
			'template',
1242
			SearchIndexField::INDEX_TYPE_KEYWORD
1243
		);
1244
1245
		$fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1246
1247
		return $fields;
1248
	}
1249
1250
	/**
1251
	 * Add new field definition to array.
1252
	 * @param SearchIndexField[] $fields
1253
	 * @param SearchEngine       $engine
1254
	 * @param string             $name
1255
	 * @param int                $type
1256
	 * @return SearchIndexField[] new field defs
1257
	 * @since 1.28
1258
	 */
1259
	protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1260
		$fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1261
		return $fields;
1262
	}
1263
1264
	/**
1265
	 * Return fields to be indexed by search engine
1266
	 * as representation of this document.
1267
	 * Overriding class should call parent function or take care of calling
1268
	 * the SearchDataForIndex hook.
1269
	 * @param WikiPage     $page Page to index
1270
	 * @param ParserOutput $output
1271
	 * @param SearchEngine $engine Search engine for which we are indexing
1272
	 * @return array Map of name=>value for fields
1273
	 * @since 1.28
1274
	 */
1275
	public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
1276
	                                       SearchEngine $engine ) {
1277
		$fieldData = [];
1278
		$content = $page->getContent();
1279
1280
		if ( $content ) {
1281
			$searchDataExtractor = new ParserOutputSearchDataExtractor();
1282
1283
			$fieldData['category'] = $searchDataExtractor->getCategories( $output );
1284
			$fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1285
			$fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1286
			$fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1287
1288
			$text = $content->getTextForSearchIndex();
1289
1290
			$fieldData['text'] = $text;
1291
			$fieldData['source_text'] = $text;
1292
			$fieldData['text_bytes'] = $content->getSize();
1293
		}
1294
1295
		Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1296
		return $fieldData;
1297
	}
1298
1299
	/**
1300
	 * Produce page output suitable for indexing.
1301
	 *
1302
	 * Specific content handlers may override it if they need different content handling.
1303
	 *
1304
	 * @param WikiPage    $page
1305
	 * @param ParserCache $cache
1306
	 * @return ParserOutput
1307
	 */
1308
	public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1309
		$parserOptions = $page->makeParserOptions( 'canonical' );
1310
		$revId = $page->getRevision()->getId();
1311
		if ( $cache ) {
1312
			$parserOutput = $cache->get( $page, $parserOptions );
1313
		}
1314
		if ( empty( $parserOutput ) ) {
1315
			$parserOutput =
1316
				$page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
1317
			if ( $cache ) {
1318
				$cache->save( $parserOutput, $page, $parserOptions );
1319
			}
1320
		}
1321
		return $parserOutput;
1322
	}
1323
1324
}
1325