Completed
Branch master (5998bb)
by
unknown
29:17
created

BalanceActiveFormattingElements::reconstruct()   D

Complexity

Conditions 9
Paths 9

Size

Total Lines 42
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 1 Features 0
Metric Value
cc 9
eloc 23
c 3
b 1
f 0
nc 9
nop 1
dl 0
loc 42
rs 4.909
1
<?php
2
/**
3
 * An implementation of the tree building portion of the HTML5 parsing
4
 * spec.
5
 *
6
 * This program is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with this program; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
 * http://www.gnu.org/copyleft/gpl.html
20
 *
21
 * @file
22
 * @ingroup Parser
23
 * @since 1.27
24
 * @author C. Scott Ananian, 2016
25
 */
26
namespace MediaWiki\Tidy;
27
28
use Wikimedia\Assert\Assert;
29
use Wikimedia\Assert\ParameterAssertionException;
30
use \ExplodeIterator;
31
use \IteratorAggregate;
32
use \ReverseArrayIterator;
33
use \Sanitizer;
34
35
# A note for future librarization[1] -- this file is a good candidate
36
# for splitting into an independent library, except that it is currently
37
# highly optimized for MediaWiki use.  It only implements the portions
38
# of the HTML5 tree builder used by tags supported by MediaWiki, and
39
# does not contain a true tokenizer pass, instead relying on
40
# comment stripping, attribute normalization, and escaping done by
41
# the MediaWiki Sanitizer.  It also deliberately avoids building
42
# a true DOM in memory, instead serializing elements to an output string
43
# as soon as possible (usually as soon as the tag is closed) to reduce
44
# its memory footprint.
45
46
# We've been gradually lifting some of these restrictions to handle
47
# non-sanitized output generated by extensions, but we shortcut the tokenizer
48
# for speed (primarily by splitting on `<`) and so rely on syntactic
49
# well-formedness.
50
51
# On the other hand, I've been pretty careful to note with comments in the
52
# code the places where this implementation omits features of the spec or
53
# depends on the MediaWiki Sanitizer.  Perhaps in the future we'll want to
54
# implement the missing pieces and make this a standalone PHP HTML5 parser.
55
# In order to do so, some sort of MediaWiki-specific API will need
56
# to be added to (a) allow the Balancer to bypass the tokenizer,
57
# and (b) support on-the-fly flattening instead of DOM node creation.
58
59
# [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
60
61
/**
62
 * Utility constants and sets for the HTML5 tree building algorithm.
63
 * Sets are associative arrays indexed first by namespace and then by
64
 * lower-cased tag name.
65
 *
66
 * @ingroup Parser
67
 * @since 1.27
68
 */
69
class BalanceSets {
70
	const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
71
	const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
72
	const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
73
74
	public static $unsupportedSet = [
75
		self::HTML_NAMESPACE => [
76
			'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
77
			'frame' => true,
78
			'plaintext' => true, 'isindex' => true,
79
			'xmp' => true, 'iframe' => true, 'noembed' => true,
80
			'noscript' => true, 'script' => true,
81
			'title' => true
82
		]
83
	];
84
85
	public static $emptyElementSet = [
86
		self::HTML_NAMESPACE => [
87
			'area' => true, 'base' => true, 'basefont' => true,
88
			'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
89
			'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
90
			'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
91
			'param' => true, 'source' => true, 'track' => true, 'wbr' => true
92
		]
93
	];
94
95
	public static $extraLinefeedSet = [
96
		self::HTML_NAMESPACE => [
97
			'pre' => true, 'textarea' => true, 'listing' => true,
98
		]
99
	];
100
101
	public static $headingSet = [
102
		self::HTML_NAMESPACE => [
103
			'h1' => true, 'h2' => true, 'h3' => true,
104
			'h4' => true, 'h5' => true, 'h6' => true
105
		]
106
	];
107
108
	public static $specialSet = [
109
		self::HTML_NAMESPACE => [
110
			'address' => true, 'applet' => true, 'area' => true,
111
			'article' => true, 'aside' => true, 'base' => true,
112
			'basefont' => true, 'bgsound' => true, 'blockquote' => true,
113
			'body' => true, 'br' => true, 'button' => true, 'caption' => true,
114
			'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
115
			'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
116
			'dt' => true, 'embed' => true, 'fieldset' => true,
117
			'figcaption' => true, 'figure' => true, 'footer' => true,
118
			'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
119
			'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
120
			'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
121
			'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
122
			'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
123
			'listing' => true, 'main' => true, 'marquee' => true,
124
			'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
125
			'noembed' => true, 'noframes' => true, 'noscript' => true,
126
			'object' => true, 'ol' => true, 'p' => true, 'param' => true,
127
			'plaintext' => true, 'pre' => true, 'script' => true,
128
			'section' => true, 'select' => true, 'source' => true,
129
			'style' => true, 'summary' => true, 'table' => true,
130
			'tbody' => true, 'td' => true, 'template' => true,
131
			'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
132
			'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
133
			'wbr' => true, 'xmp' => true
134
		],
135
		self::SVG_NAMESPACE => [
136
			'foreignobject' => true, 'desc' => true, 'title' => true
137
		],
138
		self::MATHML_NAMESPACE => [
139
			'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
140
			'mtext' => true, 'annotation-xml' => true
141
		]
142
	];
143
144
	public static $addressDivPSet = [
145
		self::HTML_NAMESPACE => [
146
			'address' => true, 'div' => true, 'p' => true
147
		]
148
	];
149
150
	public static $tableSectionRowSet = [
151
		self::HTML_NAMESPACE => [
152
			'table' => true, 'thead' => true, 'tbody' => true,
153
			'tfoot' => true, 'tr' => true
154
		]
155
	];
156
157
	public static $impliedEndTagsSet = [
158
		self::HTML_NAMESPACE => [
159
			'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
160
			'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
161
			'rt' => true, 'rtc' => true
162
		]
163
	];
164
165
	public static $thoroughImpliedEndTagsSet = [
166
		self::HTML_NAMESPACE => [
167
			'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
168
			'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
169
			'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
170
			'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
171
			'thead' => true, 'tr' => true
172
		]
173
	];
174
175
	public static $tableCellSet = [
176
		self::HTML_NAMESPACE => [
177
			'td' => true, 'th' => true
178
		]
179
	];
180
	public static $tableContextSet = [
181
		self::HTML_NAMESPACE => [
182
			'table' => true, 'template' => true, 'html' => true
183
		]
184
	];
185
186
	public static $tableBodyContextSet = [
187
		self::HTML_NAMESPACE => [
188
			'tbody' => true, 'tfoot' => true, 'thead' => true,
189
			'template' => true, 'html' => true
190
		]
191
	];
192
193
	public static $tableRowContextSet = [
194
		self::HTML_NAMESPACE => [
195
			'tr' => true, 'template' => true, 'html' => true
196
		]
197
	];
198
199
	// See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
200
	public static $formAssociatedSet = [
201
		self::HTML_NAMESPACE => [
202
			'button' => true, 'fieldset' => true, 'input' => true,
203
			'keygen' => true, 'object' => true, 'output' => true,
204
			'select' => true, 'textarea' => true, 'img' => true
205
		]
206
	];
207
208
	public static $inScopeSet = [
209
		self::HTML_NAMESPACE => [
210
			'applet' => true, 'caption' => true, 'html' => true,
211
			'marquee' => true, 'object' => true,
212
			'table' => true, 'td' => true, 'template' => true,
213
			'th' => true
214
		],
215
		self::SVG_NAMESPACE => [
216
			'foreignobject' => true, 'desc' => true, 'title' => true
217
		],
218
		self::MATHML_NAMESPACE => [
219
			'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
220
			'mtext' => true, 'annotation-xml' => true
221
		]
222
	];
223
224
	private static $inListItemScopeSet = null;
225
	public static function inListItemScopeSet() {
226
		if ( self::$inListItemScopeSet === null ) {
227
			self::$inListItemScopeSet = self::$inScopeSet;
228
			self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
229
			self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
230
		}
231
		return self::$inListItemScopeSet;
232
	}
233
234
	private static $inButtonScopeSet = null;
235
	public static function inButtonScopeSet() {
236
		if ( self::$inButtonScopeSet === null ) {
237
			self::$inButtonScopeSet = self::$inScopeSet;
238
			self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
239
		}
240
		return self::$inButtonScopeSet;
241
	}
242
243
	public static $inTableScopeSet = [
244
		self::HTML_NAMESPACE => [
245
			'html' => true, 'table' => true, 'template' => true
246
		]
247
	];
248
249
	public static $inInvertedSelectScopeSet = [
250
		self::HTML_NAMESPACE => [
251
			'option' => true, 'optgroup' => true
252
		]
253
	];
254
255
	public static $mathmlTextIntegrationPointSet = [
256
		self::MATHML_NAMESPACE => [
257
			'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
258
			'mtext' => true
259
		]
260
	];
261
262
	public static $htmlIntegrationPointSet = [
263
		self::SVG_NAMESPACE => [
264
			'foreignobject' => true,
265
			'desc' => true,
266
			'title' => true
267
		]
268
	];
269
270
	// For tidy compatibility.
271
	public static $tidyPWrapSet = [
272
		self::HTML_NAMESPACE => [
273
			'body' => true, 'blockquote' => true,
274
			// We parse with <body> as the fragment context, but the top-level
275
			// element on the stack is actually <html>.  We could use the
276
			// "adjusted current node" everywhere to work around this, but it's
277
			// easier just to add <html> to the p-wrap set.
278
			'html' => true,
279
		],
280
	];
281
	public static $tidyInlineSet = [
282
		self::HTML_NAMESPACE => [
283
			'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
284
			'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
285
			'br' => true, 'button' => true, 'cite' => true, 'code' => true,
286
			'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
287
			'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
288
			'label' => true, 'legend' => true, 'map' => true, 'object' => true,
289
			'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
290
			'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
291
			's' => true, 'samp' => true, 'select' => true, 'small' => true,
292
			'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
293
			'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
294
			'var' => true,
295
		],
296
	];
297
}
298
299
/**
300
 * A BalanceElement is a simplified version of a DOM Node.  The main
301
 * difference is that we only keep BalanceElements around for nodes
302
 * currently on the BalanceStack of open elements.  As soon as an
303
 * element is closed, with some minor exceptions relating to the
304
 * tree builder "adoption agency algorithm", the element and all its
305
 * children are serialized to a string using the flatten() method.
306
 * This keeps our memory usage low.
307
 *
308
 * @ingroup Parser
309
 * @since 1.27
310
 */
311
class BalanceElement {
312
	/**
313
	 * The namespace of the element.
314
	 * @var string $namespaceURI
315
	 */
316
	public $namespaceURI;
317
	/**
318
	 * The lower-cased name of the element.
319
	 * @var string $localName
320
	 */
321
	public $localName;
322
	/**
323
	 * Attributes for the element, in array form
324
	 * @var array $attribs
325
	 */
326
	public $attribs;
327
328
	/**
329
	 * Parent of this element, or the string "flat" if this element has
330
	 * already been flattened into its parent.
331
	 * @var string|null $parent
332
	 */
333
	public $parent;
334
335
	/**
336
	 * An array of children of this element.  Typically only the last
337
	 * child will be an actual BalanceElement object; the rest will
338
	 * be strings, representing either text nodes or flattened
339
	 * BalanceElement objects.
340
	 * @var array $children
341
	 */
342
	public $children;
343
344
	/**
345
	 * A unique string identifier for Noah's Ark purposes, lazy initialized
346
	 */
347
	private $noahKey;
348
349
	/**
350
	 * The next active formatting element in the list, or null if this is the
351
	 * end of the AFE list or if the element is not in the AFE list.
352
	 */
353
	public $nextAFE;
354
355
	/**
356
	 * The previous active formatting element in the list, or null if this is
357
	 * the start of the list or if the element is not in the AFE list.
358
	 */
359
	public $prevAFE;
360
361
	/**
362
	 * The next element in the Noah's Ark species bucket.
363
	 */
364
	public $nextNoah;
365
366
	/**
367
	 * Make a new BalanceElement corresponding to the HTML DOM Element
368
	 * with the given localname, namespace, and attributes.
369
	 *
370
	 * @param string $namespaceURI The namespace of the element.
371
	 * @param string $localName The lowercased name of the tag.
372
	 * @param array $attribs Attributes of the element
373
	 */
374
	public function __construct( $namespaceURI, $localName, array $attribs ) {
375
		$this->localName = $localName;
376
		$this->namespaceURI = $namespaceURI;
377
		$this->attribs = $attribs;
378
		$this->contents = '';
0 ignored issues
show
Bug introduced by
The property contents does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
379
		$this->parent = null;
380
		$this->children = [];
381
	}
382
383
	/**
384
	 * Remove the given child from this element.
385
	 * @param BalanceElement $elt
386
	 */
387
	private function removeChild( BalanceElement $elt ) {
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
388
		Assert::precondition(
389
			$this->parent !== 'flat', "Can't removeChild after flattening $this"
390
		);
391
		Assert::parameter(
392
			$elt->parent === $this, 'elt', 'must have $this as a parent'
393
		);
394
		$idx = array_search( $elt, $this->children, true );
395
		Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
396
		$elt->parent = null;
397
		array_splice( $this->children, $idx, 1 );
398
	}
399
400
	/**
401
	 * Find $a in the list of children and insert $b before it.
402
	 * @param BalanceElement $a
403
	 * @param BalanceElement|string $b
404
	 */
405
	public function insertBefore( BalanceElement $a, $b ) {
406
		Assert::precondition(
407
			$this->parent !== 'flat', "Can't insertBefore after flattening."
408
		);
409
		$idx = array_search( $a, $this->children, true );
410
		Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
411
		if ( is_string( $b ) ) {
412
			array_splice( $this->children, $idx, 0, [ $b ] );
413
		} else {
414
			Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
415
			if ( $b->parent !== null ) {
416
				$b->parent->removeChild( $b );
0 ignored issues
show
Bug introduced by
The method removeChild cannot be called on $b->parent (of type string).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
417
			}
418
			array_splice( $this->children, $idx, 0, [ $b ] );
419
			$b->parent = $this;
0 ignored issues
show
Documentation Bug introduced by
It seems like $this of type this<MediaWiki\Tidy\BalanceElement> is incompatible with the declared type string|null of property $parent.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
420
		}
421
	}
422
423
	/**
424
	 * Append $elt to the end of the list of children.
425
	 * @param BalanceElement|string $elt
426
	 */
427
	public function appendChild( $elt ) {
428
		Assert::precondition(
429
			$this->parent !== 'flat', "Can't appendChild after flattening."
430
		);
431
		if ( is_string( $elt ) ) {
432
			array_push( $this->children, $elt );
433
			return;
434
		}
435
		// Remove $elt from parent, if it had one.
436
		if ( $elt->parent !== null ) {
437
			$elt->parent->removeChild( $elt );
0 ignored issues
show
Bug introduced by
The method removeChild cannot be called on $elt->parent (of type string).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
438
		}
439
		array_push( $this->children, $elt );
440
		$elt->parent = $this;
0 ignored issues
show
Documentation Bug introduced by
It seems like $this of type this<MediaWiki\Tidy\BalanceElement> is incompatible with the declared type string|null of property $parent.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
441
	}
442
443
	/**
444
	 * Transfer all of the children of $elt to $this.
445
	 * @param BalanceElement $elt
446
	 */
447
	public function adoptChildren( BalanceElement $elt ) {
448
		Assert::precondition(
449
			$elt->parent !== 'flat', "Can't adoptChildren after flattening."
450
		);
451
		foreach ( $elt->children as $child ) {
452
			if ( !is_string( $child ) ) {
453
				// This is an optimization which avoids an O(n^2) set of
454
				// array_splice operations.
455
				$child->parent = null;
456
			}
457
			$this->appendChild( $child );
458
		}
459
		$elt->children = [];
460
	}
461
462
	/**
463
	 * Flatten this node and all of its children into a string, as specified
464
	 * by the HTML serialization specification, and replace this node
465
	 * in its parent by that string.
466
	 *
467
	 * @see __toString()
468
	 */
469
	public function flatten( $tidyCompat = false ) {
470
		Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
471
		Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
472
		$idx = array_search( $this, $this->parent->children, true );
473
		Assert::parameter(
474
			$idx !== false, '$this', 'must be a child of its parent'
475
		);
476
		if ( $tidyCompat ) {
477
			$blank = true;
478
			foreach ( $this->children as $elt ) {
479
				if ( !is_string( $elt ) ) {
480
					$elt = $elt->flatten( $tidyCompat );
481
				}
482
				if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
483
					$blank = false;
484
				}
485
			}
486
			if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
487
				$this->localName = 'p';
488
			} elseif ( $blank ) {
489
				// Add 'mw-empty-elt' class so elements can be hidden via CSS
490
				// for compatibility with legacy tidy.
491
				if ( !count( $this->attribs ) &&
492
					( $this->localName === 'tr' || $this->localName === 'li' )
493
				) {
494
					$this->attribs = [ 'class' => "mw-empty-elt" ];
495
				}
496
				$blank = false;
497
			}
498
			$flat = $blank ? '' : "{$this}";
499
		} else {
500
			$flat = "{$this}";
501
		}
502
		$this->parent->children[$idx] = $flat;
503
		$this->parent = 'flat'; # for assertion checking
504
		return $flat;
505
	}
506
507
	/**
508
	 * Serialize this node and all of its children to a string, as specified
509
	 * by the HTML serialization specification.
510
	 *
511
	 * @return string The serialization of the BalanceElement
512
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#serialising-html-fragments
513
	 */
514
	public function __toString() {
515
		$encAttribs = '';
516
		foreach ( $this->attribs as $name => $value ) {
517
			$encValue = Sanitizer::encodeAttribute( $value );
518
			$encAttribs .= " $name=\"$encValue\"";
519
		}
520
		if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
521
			$out = "<{$this->localName}{$encAttribs}>";
522
			$len = strlen( $out );
523
			// flatten children
524
			foreach ( $this->children as $elt ) {
525
				$out .= "{$elt}";
526
			}
527
			$out .= "</{$this->localName}>";
528
			if (
529
				$this->isA( BalanceSets::$extraLinefeedSet ) &&
530
				$out[$len] === "\n"
531
			) {
532
				// Double the linefeed after pre/listing/textarea
533
				// according to the HTML5 fragment serialization algorithm.
534
				$out = substr( $out, 0, $len + 1 ) .
535
					substr( $out, $len );
536
			}
537
		} else {
538
			$out = "<{$this->localName}{$encAttribs} />";
539
			Assert::invariant(
540
				count( $this->children ) === 0,
541
				"Empty elements shouldn't have children."
542
			);
543
		}
544
		return $out;
545
	}
546
547
	# Utility functions on BalanceElements.
548
549
	/**
550
	 * Determine if $this represents a specific HTML tag, is a member of
551
	 * a tag set, or is equal to another BalanceElement.
552
	 *
553
	 * @param BalanceElement|array|string $set The target BalanceElement,
554
	 *   set (from the BalanceSets class), or string (HTML tag name).
555
	 * @return bool
556
	 */
557
	public function isA( $set ) {
558
		if ( $set instanceof BalanceElement ) {
559
			return $this === $set;
560
		} elseif ( is_array( $set ) ) {
561
			return isset( $set[$this->namespaceURI] ) &&
562
				isset( $set[$this->namespaceURI][$this->localName] );
563
		} else {
564
			# assume this is an HTML element name.
565
			return $this->isHtml() && $this->localName === $set;
566
		}
567
	}
568
569
	/**
570
	 * Determine if this element is an HTML element with the specified name
571
	 * @param string $tagName
572
	 * @return bool
573
	 */
574
	public function isHtmlNamed( $tagName ) {
575
		return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
576
			&& $this->localName === $tagName;
577
	}
578
579
	/**
580
	 * Determine if $this represents an element in the HTML namespace.
581
	 *
582
	 * @return bool
583
	 */
584
	public function isHtml() {
585
		return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
586
	}
587
588
	/**
589
	 * Determine if $this represents a MathML text integration point,
590
	 * as defined in the HTML5 specification.
591
	 *
592
	 * @return bool
593
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#mathml-text-integration-point
594
	 */
595
	public function isMathmlTextIntegrationPoint() {
596
		return $this->isA( BalanceSets::$mathmlTextIntegrationPointSet );
597
	}
598
599
	/**
600
	 * Determine if $this represents an HTML integration point,
601
	 * as defined in the HTML5 specification.
602
	 *
603
	 * @return bool
604
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#html-integration-point
605
	 */
606
	public function isHtmlIntegrationPoint() {
607
		if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
608
			return true;
609
		}
610
		if (
611
			$this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
612
			$this->localName === 'annotation-xml' &&
613
			isset( $this->attribs['encoding'] ) &&
614
			( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
615
			strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
616
		) {
617
			return true;
618
		}
619
		return false;
620
	}
621
622
	/**
623
	 * Get a string key for the Noah's Ark algorithm
624
	 */
625
	public function getNoahKey() {
626
		if ( $this->noahKey === null ) {
627
			$attribs = $this->attribs;
628
			ksort( $attribs );
629
			$this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
630
		}
631
		return $this->noahKey;
632
	}
633
}
634
635
/**
636
 * The "stack of open elements" as defined in the HTML5 tree builder
637
 * spec.  This contains methods to ensure that content (start tags, text)
638
 * are inserted at the correct place in the output string, and to
639
 * flatten BalanceElements are they are closed to avoid holding onto
640
 * a complete DOM tree for the document in memory.
641
 *
642
 * The stack defines a PHP iterator to traverse it in "reverse order",
643
 * that is, the most-recently-added element is visited first in a
644
 * foreach loop.
645
 *
646
 * @ingroup Parser
647
 * @since 1.27
648
 * @see https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
649
 */
650
class BalanceStack implements IteratorAggregate {
651
	/**
652
	 * Backing storage for the stack.
653
	 * @var array $elements
654
	 */
655
	private $elements = [];
656
	/**
657
	 * Foster parent mode determines how nodes are inserted into the
658
	 * stack.
659
	 * @var bool $fosterParentMode
660
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
661
	 */
662
	public $fosterParentMode = false;
663
	/**
664
	 * Tidy compatibility mode, determines behavior of body/blockquote
665
	 */
666
	public $tidyCompat = false;
667
	/**
668
	 * Reference to the current element
669
	 */
670
	public $currentNode;
671
672
	/**
673
	 * Create a new BalanceStack with a single BalanceElement on it,
674
	 * representing the root &lt;html&gt; node.
675
	 */
676
	public function __construct() {
677
		# always a root <html> element on the stack
678
		array_push(
679
			$this->elements,
680
			new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
681
		);
682
		$this->currentNode = $this->elements[0];
683
	}
684
685
	/**
686
	 * Return a string representing the output of the tree builder:
687
	 * all the children of the root &lt;html&gt; node.
688
	 * @return string
689
	 */
690
	public function getOutput() {
691
		// Don't include the outer '<html>....</html>'
692
		$out = '';
693
		foreach ( $this->elements[0]->children as $elt ) {
694
			$out .= is_string( $elt ) ? $elt :
695
				$elt->flatten( $this->tidyCompat );
696
		}
697
		return $out;
698
	}
699
700
	/**
701
	 * Insert a comment at the appropriate place for inserting a node.
702
	 * @param string $value Content of the comment.
703
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-comment
704
	 */
705
	public function insertComment( $value ) {
706
		// Just another type of text node, except for tidy p-wrapping.
707
		return $this->insertText( '<!--' . $value . '-->', true );
708
	}
709
710
	/**
711
	 * Insert text at the appropriate place for inserting a node.
712
	 * @param string $value
713
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
714
	 */
715
	public function insertText( $value, $isComment = false ) {
716
		if (
717
			$this->fosterParentMode &&
718
			$this->currentNode->isA( BalanceSets::$tableSectionRowSet )
719
		) {
720
			$this->fosterParent( $value );
721
		} elseif (
722
			$this->tidyCompat && !$isComment &&
723
			$this->currentNode->isA( BalanceSets::$tidyPWrapSet )
724
		) {
725
			$this->insertHTMLELement( 'mw:p-wrap', [] );
726
			return $this->insertText( $value );
727
		} else {
728
			$this->currentNode->appendChild( $value );
729
		}
730
	}
731
732
	/**
733
	 * Insert a BalanceElement at the appropriate place, pushing it
734
	 * on to the open elements stack.
735
	 * @param string $namespaceURI The element namespace
736
	 * @param string $tag The tag name
737
	 * @param string $attribs Normalized attributes, as a string.
738
	 * @return BalanceElement
739
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-foreign-element
740
	 */
741
	public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
742
		return $this->insertElement(
743
			new BalanceElement( $namespaceURI, $tag, $attribs )
744
		);
745
	}
746
747
	/**
748
	 * Insert an HTML element at the appropriate place, pushing it on to
749
	 * the open elements stack.
750
	 * @param string $tag The tag name
751
	 * @param string $attribs Normalized attributes, as a string.
752
	 * @return BalanceElement
753
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-an-html-element
754
	 */
755
	public function insertHTMLElement( $tag, $attribs ) {
756
		return $this->insertForeignElement(
757
			BalanceSets::HTML_NAMESPACE, $tag, $attribs
758
		);
759
	}
760
761
	/**
762
	 * Insert an element at the appropriate place and push it on to the
763
	 * open elements stack.
764
	 * @param BalanceElement $elt
765
	 * @return BalanceElement
766
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
767
	 */
768
	public function insertElement( BalanceElement $elt ) {
769
		if (
770
			$this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
771
			!$elt->isA( BalanceSets::$tidyInlineSet )
772
		) {
773
			// Tidy compatibility.
774
			$this->pop();
775
		}
776
		if (
777
			$this->fosterParentMode &&
778
			$this->currentNode->isA( BalanceSets::$tableSectionRowSet )
779
		) {
780
			$elt = $this->fosterParent( $elt );
781
		} else {
782
			$this->currentNode->appendChild( $elt );
783
		}
784
		Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
785
		Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
786
		array_push( $this->elements, $elt );
787
		$this->currentNode = $elt;
788
		return $elt;
789
	}
790
791
	/**
792
	 * Determine if the stack has $tag in scope.
793
	 * @param BalanceElement|array|string $tag
794
	 * @return bool
795
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-scope
796
	 */
797
	public function inScope( $tag ) {
798
		return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
799
	}
800
801
	/**
802
	 * Determine if the stack has $tag in button scope.
803
	 * @param BalanceElement|array|string $tag
804
	 * @return bool
805
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-button-scope
806
	 */
807
	public function inButtonScope( $tag ) {
808
		return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
809
	}
810
811
	/**
812
	 * Determine if the stack has $tag in list item scope.
813
	 * @param BalanceElement|array|string $tag
814
	 * @return bool
815
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-list-item-scope
816
	 */
817
	public function inListItemScope( $tag ) {
818
		return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
819
	}
820
821
	/**
822
	 * Determine if the stack has $tag in table scope.
823
	 * @param BalanceElement|array|string $tag
824
	 * @return bool
825
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-table-scope
826
	 */
827
	public function inTableScope( $tag ) {
828
		return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
829
	}
830
831
	/**
832
	 * Determine if the stack has $tag in select scope.
833
	 * @param BalanceElement|array|string $tag
834
	 * @return bool
835
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-select-scope
836
	 */
837
	public function inSelectScope( $tag ) {
838
		// Can't use inSpecificScope to implement this, since it involves
839
		// *inverting* a set of tags.  Implement manually.
840
		foreach ( $this as $elt ) {
841
			if ( $elt->isA( $tag ) ) {
842
				return true;
843
			}
844
			if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
845
				return false;
846
			}
847
		}
848
		return false;
849
	}
850
851
	/**
852
	 * Determine if the stack has $tag in a specific scope, $set.
853
	 * @param BalanceElement|array|string $tag
854
	 * @param BalanceElement|array|string $set
855
	 * @return bool
856
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-the-specific-scope
857
	 */
858
	public function inSpecificScope( $tag, $set ) {
859
		foreach ( $this as $elt ) {
860
			if ( $elt->isA( $tag ) ) {
861
				return true;
862
			}
863
			if ( $elt->isA( $set ) ) {
864
				return false;
865
			}
866
		}
867
		return false;
868
	}
869
870
	/**
871
	 * Generate implied end tags.
872
	 * @param string $butnot
873
	 * @param bool $thorough True if we should generate end tags thoroughly.
874
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#generate-implied-end-tags
875
	 */
876
	public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
877
		$endTagSet = $thorough ?
878
			BalanceSets::$thoroughImpliedEndTagsSet :
879
			BalanceSets::$impliedEndTagsSet;
880
		while ( $this->currentNode ) {
881
			if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
882
				break;
883
			}
884
			if ( !$this->currentNode->isA( $endTagSet ) ) {
885
				break;
886
			}
887
			$this->pop();
888
		}
889
	}
890
891
	/**
892
	 * Return the adjusted current node.
893
	 */
894
	public function adjustedCurrentNode( $fragmentContext ) {
895
		return ( $fragmentContext && count( $this->elements ) === 1 ) ?
896
			$fragmentContext : $this->currentNode;
897
	}
898
899
	/**
900
	 * Return an iterator over this stack which visits the current node
901
	 * first, and the root node last.
902
	 * @return Iterator
903
	 */
904
	public function getIterator() {
905
		return new ReverseArrayIterator( $this->elements );
906
	}
907
908
	/**
909
	 * Return the BalanceElement at the given position $idx, where
910
	 * position 0 represents the root element.
911
	 * @param int $idx
912
	 * @return BalanceElement
913
	 */
914
	public function node( $idx ) {
915
		return $this->elements[ $idx ];
916
	}
917
918
	/**
919
	 * Replace the element at position $idx in the BalanceStack with $elt.
920
	 * @param int $idx
921
	 * @param BalanceElement $elt
922
	 */
923
	public function replaceAt( $idx, BalanceElement $elt ) {
924
		Assert::precondition(
925
			$this->elements[$idx]->parent !== 'flat',
926
			'Replaced element should not have already been flattened.'
927
		);
928
		Assert::precondition(
929
			$elt->parent !== 'flat',
930
			'New element should not have already been flattened.'
931
		);
932
		$this->elements[$idx] = $elt;
933
		if ( $idx === count( $this->elements ) - 1 ) {
934
			$this->currentNode = $elt;
935
		}
936
	}
937
938
	/**
939
	 * Return the position of the given BalanceElement, set, or
940
	 * HTML tag name string in the BalanceStack.
941
	 * @param BalanceElement|array|string $tag
942
	 * @return int
943
	 */
944
	public function indexOf( $tag ) {
945
		for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
946
			if ( $this->elements[$i]->isA( $tag ) ) {
947
				return $i;
948
			}
949
		}
950
		return -1;
951
	}
952
953
	/**
954
	 * Return the number of elements currently in the BalanceStack.
955
	 * @return int
956
	 */
957
	public function length() {
958
		return count( $this->elements );
959
	}
960
961
	/**
962
	 * Remove the current node from the BalanceStack, flattening it
963
	 * in the process.
964
	 */
965
	public function pop() {
966
		$elt = array_pop( $this->elements );
967
		if ( count( $this->elements ) ) {
968
			$this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
969
		} else {
970
			$this->currentNode = null;
971
		}
972
		if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
973
			$elt->flatten( $this->tidyCompat );
974
		}
975
	}
976
977
	/**
978
	 * Remove all nodes up to and including position $idx from the
979
	 * BalanceStack, flattening them in the process.
980
	 * @param int $idx
981
	 */
982
	public function popTo( $idx ) {
983
		$length = count( $this->elements );
0 ignored issues
show
Unused Code introduced by
$length is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
984
		for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
985
			$this->pop();
986
		}
987
	}
988
989
	/**
990
	 * Pop elements off the stack up to and including the first
991
	 * element with the specified HTML tagname (or matching the given
992
	 * set).
993
	 * @param BalanceElement|array|string $tag
994
	 */
995
	public function popTag( $tag ) {
996
		while ( $this->currentNode ) {
997
			if ( $this->currentNode->isA( $tag ) ) {
998
				$this->pop();
999
				break;
1000
			}
1001
			$this->pop();
1002
		}
1003
	}
1004
1005
	/**
1006
	 * Pop elements off the stack *not including* the first element
1007
	 * in the specified set.
1008
	 * @param BalanceElement|array|string $set
1009
	 */
1010
	public function clearToContext( $set ) {
1011
		// Note that we don't loop to 0. Never pop the <html> elt off.
1012
		for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1013
			if ( $this->currentNode->isA( $set ) ) {
1014
				break;
1015
			}
1016
			$this->pop();
1017
		}
1018
	}
1019
1020
	/**
1021
	 * Remove the given $elt from the BalanceStack, optionally
1022
	 * flattening it in the process.
1023
	 * @param BalanceElement $elt The element to remove.
1024
	 * @param bool $flatten Whether to flatten the removed element.
1025
	 */
1026
	public function removeElement( BalanceElement $elt, $flatten = true ) {
1027
		Assert::parameter(
1028
			$elt->parent !== 'flat',
1029
			'$elt',
1030
			'$elt should not already have been flattened.'
1031
		);
1032
		Assert::parameter(
1033
			$elt->parent->parent !== 'flat',
1034
			'$elt',
1035
			'The parent of $elt should not already have been flattened.'
1036
		);
1037
		$idx = array_search( $elt, $this->elements, true );
1038
		Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1039
		array_splice( $this->elements, $idx, 1 );
1040
		if ( $idx === count( $this->elements ) ) {
1041
			$this->currentNode = $this->elements[$idx - 1];
1042
		}
1043
		if ( $flatten ) {
1044
			// serialize $elt into its parent
1045
			// otherwise, it will eventually serialize when the parent
1046
			// is serialized, we just hold onto the memory for its
1047
			// tree of objects a little longer.
1048
			$elt->flatten( $this->tidyCompat );
1049
		}
1050
		Assert::postcondition(
1051
			array_search( $elt, $this->elements, true ) === false,
1052
			'$elt should no longer be in open elements stack'
1053
		);
1054
	}
1055
1056
	/**
1057
	 * Find $a in the BalanceStack and insert $b after it.
1058
	 * @param BalanceElement $a
1059
	 * @param BalanceElement $b
1060
	 */
1061
	public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1062
		$idx = $this->indexOf( $a );
1063
		Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1064
		if ( $idx === count( $this->elements ) - 1 ) {
1065
			array_push( $this->elements, $b );
1066
			$this->currentNode = $b;
1067
		} else {
1068
			array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1069
		}
1070
	}
1071
1072
	# Fostering and adoption.
1073
1074
	/**
1075
	 * Foster parent the given $elt in the stack of open elements.
1076
	 * @param BalanceElement|string $elt
1077
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
1078
	 */
1079
	private function fosterParent( $elt ) {
1080
		$lastTable = $this->indexOf( 'table' );
1081
		$lastTemplate = $this->indexOf( 'template' );
1082
		$parent = null;
0 ignored issues
show
Unused Code introduced by
$parent is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1083
		$before = null;
1084
1085
		if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1086
			$parent = $this->elements[$lastTemplate];
1087
		} elseif ( $lastTable >= 0 ) {
1088
			$parent = $this->elements[$lastTable]->parent;
1089
			# Assume all tables have parents, since we're not running scripts!
1090
			Assert::invariant(
1091
				$parent !== null, "All tables should have parents"
1092
			);
1093
			$before = $this->elements[$lastTable];
1094
		} else {
1095
			$parent = $this->elements[0]; // the `html` element.
1096
		}
1097
1098
		if ( $this->tidyCompat ) {
1099
			if ( is_string( $elt ) ) {
1100
				// We're fostering text: do we need a p-wrapper?
1101
				if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1102
					$this->insertHTMLElement( 'mw:p-wrap', [] );
1103
					$this->insertText( $elt );
1104
					return $elt;
1105
				}
1106
			} else {
1107
				// We're fostering an element; do we need to merge p-wrappers?
1108
				if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1109
					$idx = $before ?
1110
						array_search( $before, $parent->children, true ) :
1111
						count( $parent->children );
1112
					$after = $idx > 0 ? $parent->children[$idx - 1] : '';
1113
					if (
1114
						$after instanceof BalanceElement &&
1115
						$after->isHtmlNamed( 'mw:p-wrap' )
1116
					) {
1117
						return $after; // Re-use existing p-wrapper.
1118
					}
1119
				}
1120
			}
1121
		}
1122
1123
		if ( $before ) {
1124
			$parent->insertBefore( $before, $elt );
1125
		} else {
1126
			$parent->appendChild( $elt );
1127
		}
1128
		return $elt;
1129
	}
1130
1131
	/**
1132
	 * Run the "adoption agency algoritm" (AAA) for the given subject
1133
	 * tag name.
1134
	 * @param string $tag The subject tag name.
1135
	 * @param BalanceActiveFormattingElements $afe The current
1136
	 *   active formatting elements list.
1137
	 * @return true if the adoption agency algorithm "did something", false
1138
	 *   if more processing is required by the caller.
1139
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1140
	 */
1141
	public function adoptionAgency( $tag, $afe ) {
1142
		// If the current node is an HTML element whose tag name is subject,
1143
		// and the current node is not in the list of active formatting
1144
		// elements, then pop the current node off the stack of open
1145
		// elements and abort these steps.
1146
		if (
1147
			$this->currentNode->isHtmlNamed( $tag ) &&
1148
			!$afe->isInList( $this->currentNode )
0 ignored issues
show
Bug introduced by
It seems like $this->currentNode can also be of type null or string; however, MediaWiki\Tidy\BalanceAc...ingElements::isInList() does only seem to accept object<MediaWiki\Tidy\BalanceElement>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1149
		) {
1150
			$this->pop();
1151
			return true; // no more handling required
1152
		}
1153
1154
		// Let outer loop counter be zero.
1155
		$outer = 0;
1156
1157
		// Outer loop: If outer loop counter is greater than or
1158
		// equal to eight, then abort these steps.
1159
		while ( $outer < 8 ) {
1160
			// Increment outer loop counter by one.
1161
			$outer++;
1162
1163
			// Let the formatting element be the last element in the list
1164
			// of active formatting elements that: is between the end of
1165
			// the list and the last scope marker in the list, if any, or
1166
			// the start of the list otherwise, and has the same tag name
1167
			// as the token.
1168
			$fmtelt = $afe->findElementByTag( $tag );
1169
1170
			// If there is no such node, then abort these steps and instead
1171
			// act as described in the "any other end tag" entry below.
1172
			if ( !$fmtelt ) {
1173
				return false; // false means handle by the default case
1174
			}
1175
1176
			// Otherwise, if there is such a node, but that node is not in
1177
			// the stack of open elements, then this is a parse error;
1178
			// remove the element from the list, and abort these steps.
1179
			$index = $this->indexOf( $fmtelt );
1180
			if ( $index < 0 ) {
1181
				$afe->remove( $fmtelt );
1182
				return true;   // true means no more handling required
1183
			}
1184
1185
			// Otherwise, if there is such a node, and that node is also in
1186
			// the stack of open elements, but the element is not in scope,
1187
			// then this is a parse error; ignore the token, and abort
1188
			// these steps.
1189
			if ( !$this->inScope( $fmtelt ) ) {
1190
				return true;
1191
			}
1192
1193
			// Let the furthest block be the topmost node in the stack of
1194
			// open elements that is lower in the stack than the formatting
1195
			// element, and is an element in the special category. There
1196
			// might not be one.
1197
			$furthestblock = null;
1198
			$furthestblockindex = -1;
1199
			$stacklen = $this->length();
1200
			for ( $i = $index+1; $i < $stacklen; $i++ ) {
1201
				if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1202
					$furthestblock = $this->node( $i );
1203
					$furthestblockindex = $i;
1204
					break;
1205
				}
1206
			}
1207
1208
			// If there is no furthest block, then the UA must skip the
1209
			// subsequent steps and instead just pop all the nodes from the
1210
			// bottom of the stack of open elements, from the current node
1211
			// up to and including the formatting element, and remove the
1212
			// formatting element from the list of active formatting
1213
			// elements.
1214
			if ( !$furthestblock ) {
1215
				$this->popTag( $fmtelt );
1216
				$afe->remove( $fmtelt );
1217
				return true;
1218
			} else {
1219
				// Let the common ancestor be the element immediately above
1220
				// the formatting element in the stack of open elements.
1221
				$ancestor = $this->node( $index-1 );
1222
1223
				// Let a bookmark note the position of the formatting
1224
				// element in the list of active formatting elements
1225
				// relative to the elements on either side of it in the
1226
				// list.
1227
				$BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1228
				$afe->insertAfter( $fmtelt, $BOOKMARK );
1229
1230
				// Let node and last node be the furthest block.
1231
				$node = $furthestblock;
0 ignored issues
show
Unused Code introduced by
$node is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1232
				$lastnode = $furthestblock;
1233
				$nodeindex = $furthestblockindex;
1234
				$isAFE = false;
0 ignored issues
show
Unused Code introduced by
$isAFE is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1235
1236
				// Let inner loop counter be zero.
1237
				$inner = 0;
1238
1239
				while ( true ) {
1240
1241
					// Increment inner loop counter by one.
1242
					$inner++;
1243
1244
					// Let node be the element immediately above node in
1245
					// the stack of open elements, or if node is no longer
1246
					// in the stack of open elements (e.g. because it got
1247
					// removed by this algorithm), the element that was
1248
					// immediately above node in the stack of open elements
1249
					// before node was removed.
1250
					$node = $this->node( --$nodeindex );
1251
1252
					// If node is the formatting element, then go
1253
					// to the next step in the overall algorithm.
1254
					if ( $node === $fmtelt ) break;
1255
1256
					// If the inner loop counter is greater than three and node
1257
					// is in the list of active formatting elements, then remove
1258
					// node from the list of active formatting elements.
1259
					$isAFE = $afe->isInList( $node );
1260
					if ( $inner > 3 && $isAFE ) {
1261
						$afe->remove( $node );
1262
						$isAFE = false;
1263
					}
1264
1265
					// If node is not in the list of active formatting
1266
					// elements, then remove node from the stack of open
1267
					// elements and then go back to the step labeled inner
1268
					// loop.
1269
					if ( !$isAFE ) {
1270
						// Don't flatten here, since we're about to relocate
1271
						// parts of this $node.
1272
						$this->removeElement( $node, false );
1273
						continue;
1274
					}
1275
1276
					// Create an element for the token for which the
1277
					// element node was created with common ancestor as
1278
					// the intended parent, replace the entry for node
1279
					// in the list of active formatting elements with an
1280
					// entry for the new element, replace the entry for
1281
					// node in the stack of open elements with an entry for
1282
					// the new element, and let node be the new element.
1283
					$newelt = new BalanceElement(
1284
						$node->namespaceURI, $node->localName, $node->attribs );
1285
					$afe->replace( $node, $newelt );
1286
					$this->replaceAt( $nodeindex, $newelt );
1287
					$node = $newelt;
1288
1289
					// If last node is the furthest block, then move the
1290
					// aforementioned bookmark to be immediately after the
1291
					// new node in the list of active formatting elements.
1292
					if ( $lastnode === $furthestblock ) {
1293
						$afe->remove( $BOOKMARK );
1294
						$afe->insertAfter( $newelt, $BOOKMARK );
1295
					}
1296
1297
					// Insert last node into node, first removing it from
1298
					// its previous parent node if any.
1299
					$node->appendChild( $lastnode );
1300
1301
					// Let last node be node.
1302
					$lastnode = $node;
1303
				}
1304
1305
				// If the common ancestor node is a table, tbody, tfoot,
1306
				// thead, or tr element, then, foster parent whatever last
1307
				// node ended up being in the previous step, first removing
1308
				// it from its previous parent node if any.
1309
				if (
1310
					$this->fosterParentMode &&
1311
					$ancestor->isA( BalanceSets::$tableSectionRowSet )
1312
				) {
1313
					$this->fosterParent( $lastnode );
1314
				} else {
1315
					// Otherwise, append whatever last node ended up being in
1316
					// the previous step to the common ancestor node, first
1317
					// removing it from its previous parent node if any.
1318
					$ancestor->appendChild( $lastnode );
1319
				}
1320
1321
				// Create an element for the token for which the
1322
				// formatting element was created, with furthest block
1323
				// as the intended parent.
1324
				$newelt2 = new BalanceElement(
1325
					$fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
1326
1327
				// Take all of the child nodes of the furthest block and
1328
				// append them to the element created in the last step.
1329
				$newelt2->adoptChildren( $furthestblock );
1330
1331
				// Append that new element to the furthest block.
1332
				$furthestblock->appendChild( $newelt2 );
1333
1334
				// Remove the formatting element from the list of active
1335
				// formatting elements, and insert the new element into the
1336
				// list of active formatting elements at the position of
1337
				// the aforementioned bookmark.
1338
				$afe->remove( $fmtelt );
1339
				$afe->replace( $BOOKMARK, $newelt2 );
1340
1341
				// Remove the formatting element from the stack of open
1342
				// elements, and insert the new element into the stack of
1343
				// open elements immediately below the position of the
1344
				// furthest block in that stack.
1345
				$this->removeElement( $fmtelt );
1346
				$this->insertAfter( $furthestblock, $newelt2 );
1347
			}
1348
		}
1349
1350
		return true;
1351
	}
1352
1353
	/**
1354
	 * Return the contents of the open elements stack as a string for
1355
	 * debugging.
1356
	 * @return string
1357
	 */
1358
	public function __toString() {
1359
		$r = [];
1360
		foreach ( $this->elements as $elt ) {
1361
			array_push( $r, $elt->localName );
1362
		}
1363
		return implode( $r, ' ' );
1364
	}
1365
}
1366
1367
/**
1368
 * A pseudo-element used as a marker in the list of active formatting elements
1369
 *
1370
 * @ingroup Parser
1371
 * @since 1.27
1372
 */
1373
class BalanceMarker {
1374
	public $nextAFE;
1375
	public $prevAFE;
1376
}
1377
1378
/**
1379
 * The list of active formatting elements, which is used to handle
1380
 * mis-nested formatting element tags in the HTML5 tree builder
1381
 * specification.
1382
 *
1383
 * @ingroup Parser
1384
 * @since 1.27
1385
 * @see https://html.spec.whatwg.org/multipage/syntax.html#list-of-active-formatting-elements
1386
 */
1387
class BalanceActiveFormattingElements {
1388
	/** The last (most recent) element in the list */
1389
	private $tail;
1390
1391
	/** The first (least recent) element in the list */
1392
	private $head;
1393
1394
	/**
1395
	 * An array of arrays representing the population of elements in each bucket
1396
	 * according to the Noah's Ark clause. The outer array is stack-like, with each
1397
	 * integer-indexed element representing a segment of the list, bounded by
1398
	 * markers. The first element represents the segment of the list before the
1399
	 * first marker.
1400
	 *
1401
	 * The inner arrays are indexed by "Noah key", which is a string which uniquely
1402
	 * identifies each bucket according to the rules in the spec. The value in
1403
	 * the inner array is the first (least recently inserted) element in the bucket,
1404
	 * and subsequent members of the bucket can be found by iterating through the
1405
	 * singly-linked list via $node->nextNoah.
1406
	 *
1407
	 * This is optimised for the most common case of inserting into a bucket
1408
	 * with zero members, and deleting a bucket containing one member. In the
1409
	 * worst case, iteration through the list is still O(1) in the document
1410
	 * size, since each bucket can have at most 3 members.
1411
	 */
1412
	private $noahTableStack = [ [] ];
1413
1414
	public function __destruct() {
1415
		for ( $node = $this->head; $node; $node = $next ) {
1416
			$next = $node->nextAFE;
1417
			$node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1418
		}
1419
		$this->head = $this->tail = $this->noahTableStack = null;
0 ignored issues
show
Documentation Bug introduced by
It seems like null of type null is incompatible with the declared type array of property $noahTableStack.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
1420
	}
1421
1422
	public function insertMarker() {
1423
		$elt = new BalanceMarker;
1424 View Code Duplication
		if ( $this->tail ) {
1425
			$this->tail->nextAFE = $elt;
1426
			$elt->prevAFE = $this->tail;
1427
		} else {
1428
			$this->head = $elt;
1429
		}
1430
		$this->tail = $elt;
1431
		$this->noahTableStack[] = [];
1432
	}
1433
1434
	/**
1435
	 * Follow the steps required when the spec requires us to "push onto the
1436
	 * list of active formatting elements".
1437
	 * @param BalanceElement $elt
1438
	 */
1439
	public function push( BalanceElement $elt ) {
1440
		// Must not be in the list already
1441
		if ( $elt->prevAFE !== null || $this->head === $elt ) {
1442
			throw new ParameterAssertionException( '$elt',
1443
				'Cannot insert a node into the AFE list twice' );
1444
		}
1445
1446
		// "Noah's Ark clause" -- if there are already three copies of
1447
		// this element before we encounter a marker, then drop the last
1448
		// one.
1449
		$noahKey = $elt->getNoahKey();
1450
		$table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1451
		if ( !isset( $table[$noahKey] ) ) {
1452
			$table[$noahKey] = $elt;
1453
		} else {
1454
			$count = 1;
1455
			$head = $tail = $table[$noahKey];
1456
			while ( $tail->nextNoah ) {
1457
				$tail = $tail->nextNoah;
1458
				$count++;
1459
			}
1460
			if ( $count >= 3 ) {
1461
				$this->remove( $head );
1462
			}
1463
			$tail->nextNoah = $elt;
1464
		}
1465
		// Add to the main AFE list
1466 View Code Duplication
		if ( $this->tail ) {
1467
			$this->tail->nextAFE = $elt;
1468
			$elt->prevAFE = $this->tail;
1469
		} else {
1470
			$this->head = $elt;
1471
		}
1472
		$this->tail = $elt;
1473
	}
1474
1475
	/**
1476
	 * Follow the steps required when the spec asks us to "clear the list of
1477
	 * active formatting elements up to the last marker".
1478
	 */
1479
	public function clearToMarker() {
1480
		// Iterate back through the list starting from the tail
1481
		$tail = $this->tail;
1482
		while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1483
			// Unlink the element
1484
			$prev = $tail->prevAFE;
1485
			$tail->prevAFE = null;
1486
			if ( $prev ) {
1487
				$prev->nextAFE = null;
1488
			}
1489
			$tail->nextNoah = null;
1490
			$tail = $prev;
1491
		}
1492
		// If we finished on a marker, unlink it and pop it off the Noah table stack
1493
		if ( $tail ) {
1494
			$prev = $tail->prevAFE;
1495
			if ( $prev ) {
1496
				$prev->nextAFE = null;
1497
			}
1498
			$tail = $prev;
1499
			array_pop( $this->noahTableStack );
1500
		} else {
1501
			// No marker: wipe the top-level Noah table (which is the only one)
1502
			$this->noahTableStack[0] = [];
1503
		}
1504
		// If we removed all the elements, clear the head pointer
1505
		if ( !$tail ) {
1506
			$this->head = null;
1507
		}
1508
		$this->tail = $tail;
1509
	}
1510
1511
	/**
1512
	 * Find and return the last element with the specified tag between the
1513
	 * end of the list and the last marker on the list.
1514
	 * Used when parsing &lt;a&gt; "in body mode".
1515
	 */
1516
	public function findElementByTag( $tag ) {
1517
		$elt = $this->tail;
1518
		while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1519
			if ( $elt->localName === $tag ) {
1520
				return $elt;
1521
			}
1522
			$elt = $elt->prevAFE;
1523
		}
1524
		return null;
1525
	}
1526
1527
	/**
1528
	 * Determine whether an element is in the list of formatting elements.
1529
	 * @return boolean
1530
	 */
1531
	public function isInList( BalanceElement $elt ) {
1532
		return $this->head === $elt || $elt->prevAFE;
1533
	}
1534
1535
	/**
1536
	 * Find the element $elt in the list and remove it.
1537
	 * Used when parsing &lt;a&gt; in body mode.
1538
	 */
1539
	public function remove( BalanceElement $elt ) {
1540
		if ( $this->head !== $elt && !$elt->prevAFE ) {
1541
			throw new ParameterAssertionException( '$elt',
1542
				"Attempted to remove an element which is not in the AFE list" );
1543
		}
1544
		// Update head and tail pointers
1545
		if ( $this->head === $elt ) {
1546
			$this->head = $elt->nextAFE;
1547
		}
1548
		if ( $this->tail === $elt ) {
1549
			$this->tail = $elt->prevAFE;
1550
		}
1551
		// Update previous element
1552
		if ( $elt->prevAFE ) {
1553
			$elt->prevAFE->nextAFE = $elt->nextAFE;
1554
		}
1555
		// Update next element
1556
		if ( $elt->nextAFE ) {
1557
			$elt->nextAFE->prevAFE = $elt->prevAFE;
1558
		}
1559
		// Clear pointers so that isInList() etc. will work
1560
		$elt->prevAFE = $elt->nextAFE = null;
1561
		// Update Noah list
1562
		$this->removeFromNoahList( $elt );
1563
	}
1564
1565
	private function addToNoahList( BalanceElement $elt ) {
1566
		$noahKey = $elt->getNoahKey();
1567
		$table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1568
		if ( !isset( $table[$noahKey] ) ) {
1569
			$table[$noahKey] = $elt;
1570
		} else {
1571
			$tail = $table[$noahKey];
1572
			while ( $tail->nextNoah ) {
1573
				$tail = $tail->nextNoah;
1574
			}
1575
			$tail->nextNoah = $elt;
1576
		}
1577
	}
1578
1579
	private function removeFromNoahList( BalanceElement $elt ) {
1580
		$table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1581
		$key = $elt->getNoahKey();
1582
		$noahElt = $table[$key];
1583
		if ( $noahElt === $elt ) {
1584
			if ( $noahElt->nextNoah ) {
1585
				$table[$key] = $noahElt->nextNoah;
1586
				$noahElt->nextNoah = null;
1587
			} else {
1588
				unset( $table[$key] );
1589
			}
1590
		} else {
1591
			do {
1592
				$prevNoahElt = $noahElt;
1593
				$noahElt = $prevNoahElt->nextNoah;
1594
				if ( $noahElt === $elt ) {
1595
					// Found it, unlink
1596
					$prevNoahElt->nextNoah = $elt->nextNoah;
1597
					$elt->nextNoah = null;
1598
					break;
1599
				}
1600
			} while ( $noahElt );
1601
		}
1602
	}
1603
1604
	/**
1605
	 * Find element $a in the list and replace it with element $b
1606
	 */
1607
	public function replace( BalanceElement $a, BalanceElement $b ) {
1608
		if ( $this->head !== $a && !$a->prevAFE ) {
1609
			throw new ParameterAssertionException( '$a',
1610
				"Attempted to replace an element which is not in the AFE list" );
1611
		}
1612
		// Update head and tail pointers
1613
		if ( $this->head === $a ) {
1614
			$this->head = $b;
1615
		}
1616
		if ( $this->tail === $a ) {
1617
			$this->tail = $b;
1618
		}
1619
		// Update previous element
1620
		if ( $a->prevAFE ) {
1621
			$a->prevAFE->nextAFE = $b;
1622
		}
1623
		// Update next element
1624
		if ( $a->nextAFE ) {
1625
			$a->nextAFE->prevAFE = $b;
1626
		}
1627
		$b->prevAFE = $a->prevAFE;
1628
		$b->nextAFE = $a->nextAFE;
1629
		$a->nextAFE = $a->prevAFE = null;
1630
		// Update Noah list
1631
		$this->removeFromNoahList( $a );
1632
		$this->addToNoahList( $b );
1633
	}
1634
1635
	/**
1636
	 * Find $a in the list and insert $b after it.
1637
	 */
1638
	public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1639
		if ( $this->head !== $a && !$a->prevAFE ) {
1640
			throw new ParameterAssertionException( '$a',
1641
				"Attempted to insert after an element which is not in the AFE list" );
1642
		}
1643
		if ( $this->tail === $a ) {
1644
			$this->tail = $b;
1645
		}
1646
		if ( $a->nextAFE ) {
1647
			$a->nextAFE->prevAFE = $b;
1648
		}
1649
		$b->nextAFE = $a->nextAFE;
1650
		$b->prevAFE = $a;
1651
		$a->nextAFE = $b;
1652
		$this->addToNoahList( $b );
1653
	}
1654
1655
	// @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1656
	/**
1657
	 * Reconstruct the active formatting elements.
1658
	 * @param BalanceStack $stack The open elements stack
1659
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements
1660
	 */
1661
	// @codingStandardsIgnoreEnd
1662
	public function reconstruct( $stack ) {
1663
		$entry = $this->tail;
1664
		// If there are no entries in the list of active formatting elements,
1665
		// then there is nothing to reconstruct
1666
		if ( !$entry ) {
1667
			return;
1668
		}
1669
		// If the last is a marker, do nothing.
1670
		if ( $entry instanceof BalanceMarker ) {
1671
			return;
1672
		}
1673
		// Or if it is an open element, do nothing.
1674
		if ( $stack->indexOf( $entry ) >= 0 ) {
1675
			return;
1676
		}
1677
1678
		// Loop backward through the list until we find a marker or an
1679
		// open element
1680
		$foundit = false;
1681
		while ( $entry->prevAFE ) {
1682
			$entry = $entry->prevAFE;
1683
			if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1684
				$foundit = true;
1685
				break;
1686
			}
1687
		}
1688
1689
		// Now loop forward, starting from the element after the current one (or
1690
		// the first element if we didn't find a marker or open element),
1691
		// recreating formatting elements and pushing them back onto the list
1692
		// of open elements.
1693
		if ( $foundit ) {
1694
			$entry = $entry->nextAFE;
1695
		}
1696
		do {
1697
			$newElement = $stack->insertHTMLElement(
1698
				$entry->localName,
1699
				$entry->attribs );
1700
			$this->replace( $entry, $newElement );
1701
			$entry = $newElement->nextAFE;
1702
		} while ( $entry );
1703
	}
1704
1705
	/**
1706
	 * Get a string representation of the AFE list, for debugging
1707
	 */
1708
	public function __toString() {
1709
		$prev = null;
1710
		$s = '';
1711
		for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1712
			if ( $node instanceof BalanceMarker ) {
1713
				$s .= "MARKER\n";
1714
				continue;
1715
			}
1716
			$s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1717
			if ( $node->nextNoah ) {
1718
				$s .= " (noah sibling: {$node->nextNoah->localName}#" .
1719
					substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1720
					')';
1721
			}
1722
			if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1723
				$s .= " (reverse link is wrong!)";
1724
			}
1725
			$s .= "\n";
1726
		}
1727
		if ( $prev !== $this->tail ) {
1728
			$s .= "(tail pointer is wrong!)\n";
1729
		}
1730
		return $s;
1731
	}
1732
}
1733
1734
/**
1735
 * An implementation of the tree building portion of the HTML5 parsing
1736
 * spec.
1737
 *
1738
 * This is used to balance and tidy output so that the result can
1739
 * always be cleanly serialized/deserialized by an HTML5 parser.  It
1740
 * does *not* guarantee "conforming" output -- the HTML5 spec contains
1741
 * a number of constraints which are not enforced by the HTML5 parsing
1742
 * process.  But the result will be free of gross errors: misnested or
1743
 * unclosed tags, for example, and will be unchanged by spec-complient
1744
 * parsing followed by serialization.
1745
 *
1746
 * The tree building stage is structured as a state machine.
1747
 * When comparing the implementation to
1748
 * https://www.w3.org/TR/html5/syntax.html#tree-construction
1749
 * note that each state is implemented as a function with a
1750
 * name ending in `Mode` (because the HTML spec refers to them
1751
 * as insertion modes).  The current insertion mode is held by
1752
 * the $parseMode property.
1753
 *
1754
 * The following simplifications have been made:
1755
 * - We handle body content only (ie, we start `in body`.)
1756
 * - The document is never in "quirks mode".
1757
 * - All occurrences of < and > have been entity escaped, so we
1758
 *   can parse tags by simply splitting on those two characters.
1759
 *   (This also simplifies the handling of < inside <textarea>.)
1760
 *   The character < must not appear inside comments.
1761
 *   Similarly, all attributes have been "cleaned" and are double-quoted
1762
 *   and escaped.
1763
 * - All null characters are assumed to have been removed.
1764
 * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
1765
 *   <frame>, <plaintext>, <isindex>, <xmp>, <iframe>,
1766
 *   <noembed>, <noscript>, <script>, <title>.  As a result,
1767
 *   further simplifications can be made:
1768
 *   - `frameset-ok` is not tracked.
1769
 *   - `head element pointer` is not tracked (but presumed non-null)
1770
 *   - Tokenizer has only a single mode. (<textarea> wants RCDATA and
1771
 *     <style>/<noframes> want RAWTEXT modes which we only loosely emulate.)
1772
 *
1773
 *   We generally mark places where we omit cases from the spec due to
1774
 *   disallowed elements with a comment: `# OMITTED: <element-name>`.
1775
 *
1776
 *   The HTML spec keeps a flag during the parsing process to track
1777
 *   whether or not a "parse error" has been encountered.  We don't
1778
 *   bother to track that flag, we just implement the error-handling
1779
 *   process as specified.
1780
 *
1781
 * @ingroup Parser
1782
 * @since 1.27
1783
 * @see https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
1784
 */
1785
class Balancer {
1786
	private $parseMode;
1787
	private $bitsIterator;
1788
	private $allowedHtmlElements;
1789
	private $afe;
1790
	private $stack;
1791
	private $strict;
1792
	private $tidyCompat;
1793
	private $allowComments;
1794
1795
	private $textIntegrationMode;
1796
	private $pendingTableText;
1797
	private $originalInsertionMode;
1798
	private $fragmentContext;
1799
	private $formElementPointer;
1800
	private $ignoreLinefeed;
1801
	private $inRCDATA;
1802
	private $inRAWTEXT;
1803
1804
	/**
1805
	 * Valid HTML5 comments.
1806
	 * Regex borrowed from Tim Starling's "remex-html" project.
1807
	 */
1808
	const VALID_COMMENT_REGEX = "~ !--
1809
		(                             # 1. Comment match detector
1810
			> | -> | # Invalid short close
1811
			(                         # 2. Comment contents
1812
				(?:
1813
					(?! --> )
1814
					(?! --!> )
1815
					(?! --! \z )
1816
					(?! -- \z )
1817
					(?! - \z )
1818
					.
1819
				)*+
1820
			)
1821
			(                         # 3. Comment close
1822
				--> |   # Normal close
1823
				--!> |  # Comment end bang
1824
				(                     # 4. Indicate matches requiring EOF
1825
					--! |   # EOF in comment end bang state
1826
					-- |    # EOF in comment end state
1827
					-  |    # EOF in comment end dash state
1828
					        # EOF in comment state
1829
				)
1830
			)
1831
		)
1832
		([^<]*) \z                    # 5. Non-tag text after the comment
1833
		~xs";
1834
1835
	/**
1836
	 * Create a new Balancer.
1837
	 * @param array $config Balancer configuration.  Includes:
1838
	 *     'strict' : boolean, defaults to false.
1839
	 *         When true, enforces syntactic constraints on input:
1840
	 *         all non-tag '<' must be escaped, all attributes must be
1841
	 *         separated by a single space and double-quoted.  This is
1842
	 *         consistent with the output of the Sanitizer.
1843
	 *     'allowedHtmlElements' : array, defaults to null.
1844
	 *         When present, the keys of this associative array give
1845
	 *         the acceptable HTML tag names.  When not present, no
1846
	 *         tag sanitization is done.
1847
	 *     'tidyCompat' : boolean, defaults to false.
1848
	 *         When true, the serialization algorithm is tweaked to
1849
	 *         provide historical compatibility with the old "tidy"
1850
	 *         program: <p>-wrapping is done to the children of
1851
	 *         <body> and <blockquote> elements, and empty elements
1852
	 *         are removed.
1853
	 *     'allowComments': boolean, defaults to true.
1854
	 *         When true, allows HTML comments in the input.
1855
	 *         The Sanitizer generally strips all comments, so if you
1856
	 *         are running on sanitized output you can set this to
1857
	 *         false to get a bit more performance.
1858
	 */
1859
	public function __construct( array $config = [] ) {
1860
		$config = $config + [
1861
			'strict' => false,
1862
			'allowedHtmlElements' => null,
1863
			'tidyCompat' => false,
1864
			'allowComments' => true,
1865
		];
1866
		$this->allowedHtmlElements = $config['allowedHtmlElements'];
1867
		$this->strict = $config['strict'];
1868
		$this->tidyCompat = $config['tidyCompat'];
1869
		$this->allowComments = $config['allowComments'];
1870
		if ( $this->allowedHtmlElements !== null ) {
1871
			# Sanity check!
1872
			$bad = array_uintersect_assoc(
1873
				$this->allowedHtmlElements,
1874
				BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
1875
				function( $a, $b ) {
1876
					// Ignore the values (just intersect the keys) by saying
1877
					// all values are equal to each other.
1878
					return 0;
1879
				}
1880
			);
1881
			if ( count( $bad ) > 0 ) {
1882
				$badstr = implode( array_keys( $bad ), ',' );
1883
				throw new ParameterAssertionException(
1884
					'$config',
1885
					'Balance attempted with sanitization including ' .
1886
					"unsupported elements: {$badstr}"
1887
				);
1888
			}
1889
		}
1890
	}
1891
1892
	/**
1893
	 * Return a balanced HTML string for the HTML fragment given by $text,
1894
	 * subject to the caveats listed in the class description.  The result
1895
	 * will typically be idempotent -- that is, rebalancing the output
1896
	 * would result in no change.
1897
	 *
1898
	 * @param string $text The markup to be balanced
1899
	 * @param callable $processingCallback Callback to do any variable or
1900
	 *   parameter replacements in HTML attributes values
1901
	 * @param array|bool $processingArgs Arguments for the processing callback
1902
	 * @return string The balanced markup
1903
	 */
1904
	public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1905
		$this->parseMode = 'inBodyMode';
1906
		$this->bitsIterator = new ExplodeIterator( '<', $text );
1907
		$this->afe = new BalanceActiveFormattingElements();
1908
		$this->stack = new BalanceStack();
1909
		$this->stack->tidyCompat = $this->tidyCompat;
1910
		$this->processingCallback = $processingCallback;
0 ignored issues
show
Bug introduced by
The property processingCallback does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
1911
		$this->processingArgs = $processingArgs;
0 ignored issues
show
Bug introduced by
The property processingArgs does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
1912
1913
		$this->textIntegrationMode =
1914
			$this->ignoreLinefeed =
1915
			$this->inRCDATA =
1916
			$this->inRAWTEXT = false;
1917
1918
		# The stack is constructed with an <html> element already on it.
1919
		# Set this up as a fragment parsed with <body> as the context.
1920
		$this->fragmentContext =
1921
			new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1922
		$this->resetInsertionMode();
1923
		$this->formElementPointer = null;
1924
		for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1925
			if ( $e->isHtmlNamed( 'form' ) ) {
1926
				$this->formElementPointer = $e;
1927
				break;
1928
			}
1929
		}
1930
1931
		// First element is text not tag
1932
		$x = $this->bitsIterator->current();
1933
		$this->bitsIterator->next();
1934
		$this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1935
		// Now process each tag.
1936
		while ( $this->bitsIterator->valid() ) {
1937
			$this->advance();
1938
		}
1939
		$this->insertToken( 'eof', null );
1940
		$result = $this->stack->getOutput();
1941
		// Free memory before returning.
1942
		$this->bitsIterator = null;
1943
		$this->afe = null;
1944
		$this->stack = null;
1945
		$this->fragmentContext = null;
1946
		$this->formElementPointer = null;
1947
		return $result;
1948
	}
1949
1950
	/**
1951
	 * Pass a token to the tree builder.  The $token will be one of the
1952
	 * strings "tag", "endtag", or "text".
1953
	 */
1954
	private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
1955
		// validate tags against $unsupportedSet
1956
		if ( $token === 'tag' || $token === 'endtag' ) {
1957
			if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
1958
				# As described in "simplifications" above, these tags are
1959
				# not supported in the balancer.
1960
				Assert::invariant(
1961
					!$this->strict,
1962
					"Unsupported $token <$value> found."
1963
				);
1964
				return false;
1965
			}
1966
		} elseif ( $token === 'text' && $value === '' ) {
1967
			# Don't actually inject the empty string as a text token.
1968
			return true;
1969
		}
1970
		// Support pre/listing/textarea by suppressing initial linefeed
1971
		if ( $this->ignoreLinefeed ) {
1972
			$this->ignoreLinefeed = false;
1973
			if ( $token === 'text' ) {
1974
				if ( $value[0] === "\n" ) {
1975
					if ( $value === "\n" ) {
1976
						# Nothing would be left, don't inject the empty string.
1977
						return true;
1978
					}
1979
					$value = substr( $value, 1 );
1980
				}
1981
			}
1982
		}
1983
		// Some hoops we have to jump through
1984
		$adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1985
1986
		$isForeign = true;
1987
		if (
1988
			$this->stack->length() === 0 ||
1989
			$adjusted->isHtml() ||
1990
			$token === 'eof'
1991
		) {
1992
			$isForeign = false;
1993
		} elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1994
			if ( $token === 'text' ) {
1995
				$isForeign = false;
1996
			} elseif (
1997
				$token === 'tag' &&
1998
				$value !== 'mglyph' && $value !== 'malignmark'
1999
			) {
2000
				$isForeign = false;
2001
			}
2002
		} elseif (
2003
			$adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
2004
			$adjusted->localName === 'annotation-xml' &&
2005
			$token === 'tag' && $value === 'svg'
2006
		) {
2007
			$isForeign = false;
2008
		} elseif (
2009
			$adjusted->isHtmlIntegrationPoint() &&
2010
			( $token === 'tag' || $token === 'text' )
2011
		) {
2012
			$isForeign = false;
2013
		}
2014
		if ( $isForeign ) {
2015
			return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
2016
		} else {
2017
			$func = $this->parseMode;
2018
			return $this->$func( $token, $value, $attribs, $selfclose );
2019
		}
2020
	}
2021
2022
	private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
2023
		if ( $token === 'text' ) {
2024
			$this->stack->insertText( $value );
2025
			return true;
2026
		} elseif ( $token === 'tag' ) {
2027
			switch ( $value ) {
2028
			case 'font':
2029
				if ( isset( $attribs['color'] )
2030
					|| isset( $attribs['face'] )
2031
					|| isset( $attribs['size'] )
2032
				) {
2033
					break;
2034
				}
2035
				/* otherwise, fall through */
2036
			case 'b':
2037
			case 'big':
2038
			case 'blockquote':
2039
			case 'body':
2040
			case 'br':
2041
			case 'center':
2042
			case 'code':
2043
			case 'dd':
2044
			case 'div':
2045
			case 'dl':
2046
			case 'dt':
2047
			case 'em':
2048
			case 'embed':
2049
			case 'h1':
2050
			case 'h2':
2051
			case 'h3':
2052
			case 'h4':
2053
			case 'h5':
2054
			case 'h6':
2055
			case 'head':
2056
			case 'hr':
2057
			case 'i':
2058
			case 'img':
2059
			case 'li':
2060
			case 'listing':
2061
			case 'menu':
2062
			case 'meta':
2063
			case 'nobr':
2064
			case 'ol':
2065
			case 'p':
2066
			case 'pre':
2067
			case 'ruby':
2068
			case 's':
2069
			case 'small':
2070
			case 'span':
2071
			case 'strong':
2072
			case 'strike':
2073
			case 'sub':
2074
			case 'sup':
2075
			case 'table':
2076
			case 'tt':
2077
			case 'u':
2078
			case 'ul':
2079
			case 'var':
2080
				if ( $this->fragmentContext ) {
2081
					break;
2082
				}
2083
				while ( true ) {
2084
					$this->stack->pop();
2085
					$node = $this->stack->currentNode;
2086
					if (
2087
						$node->isMathmlTextIntegrationPoint() ||
2088
						$node->isHtmlIntegrationPoint() ||
2089
						$node->isHtml()
2090
					) {
2091
						break;
2092
					}
2093
				}
2094
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2095
			}
2096
			// "Any other start tag"
2097
			$adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2098
				$this->fragmentContext : $this->stack->currentNode;
2099
			$this->stack->insertForeignElement(
2100
				$adjusted->namespaceURI, $value, $attribs
2101
			);
2102
			if ( $selfclose ) {
2103
				$this->stack->pop();
2104
			}
2105
			return true;
2106
		} elseif ( $token === 'endtag' ) {
2107
			$first = true;
2108
			foreach ( $this->stack as $i => $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2109
				if ( $node->isHtml() && !$first ) {
2110
					// process the end tag as HTML
2111
					$func = $this->parseMode;
2112
					return $this->$func( $token, $value, $attribs, $selfclose );
2113
				} elseif ( $i === 0 ) {
2114
					return true;
2115
				} elseif ( $node->localName === $value ) {
2116
					$this->stack->popTag( $node );
2117
					return true;
2118
				}
2119
				$first = false;
2120
			}
2121
		}
2122
	}
2123
2124
	/**
2125
	 * Grab the next "token" from $bitsIterator.  This is either a open/close
2126
	 * tag or text or a comment, depending on whether the Sanitizer approves.
2127
	 */
2128
	private function advance() {
2129
		$x = $this->bitsIterator->current();
2130
		$this->bitsIterator->next();
2131
		$regs = [];
2132
		# Handle comments.  These won't be generated by mediawiki (they
2133
		# are stripped in the Sanitizer) but may be generated by extensions.
2134
		if (
2135
			$this->allowComments &&
2136
			!( $this->inRCDATA || $this->inRAWTEXT ) &&
2137
			preg_match( Balancer::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2138
			/* verify EOF condition where necessary */
2139
			( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2140
		) {
2141
			$contents = $regs[2][0];
2142
			$rest = $regs[5][0];
2143
			$this->insertToken( 'comment', $contents );
2144
			$this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2145
			return;
2146
		}
2147
		# $slash: Does the current element start with a '/'?
2148
		# $t: Current element name
2149
		# $attribStr: String between element name and >
2150
		# $brace: Ending '>' or '/>'
2151
		# $rest: Everything until the next element from the $bitsIterator
2152
		if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2153
			list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2154
			$t = strtolower( $t );
2155
			if ( $this->strict ) {
2156
				/* Verify that attributes are all properly double-quoted */
2157
				Assert::invariant(
2158
					preg_match(
2159
						'/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2160
					),
2161
					"Bad attribute string found"
2162
				);
2163
			}
2164
		} else {
2165
			Assert::invariant(
2166
				!$this->strict, "< found which does not start a valid tag"
2167
			);
2168
			$slash = $t = $attribStr = $brace = $rest = null;
2169
		}
2170
		$goodtag = $t;
2171
		if ( $this->inRCDATA ) {
2172
			if ( $slash && $t === $this->inRCDATA ) {
2173
				$this->inRCDATA = false;
2174
			} else {
2175
				// No tags allowed; this emulates the "rcdata" tokenizer mode.
2176
				$goodtag = false;
2177
			}
2178
		}
2179
		if ( $this->inRAWTEXT ) {
2180
			if ( $slash && $t === $this->inRAWTEXT ) {
2181
				$this->inRAWTEXT = false;
2182
			} else {
2183
				// No tags allowed, no entity-escaping done.
2184
				$goodtag = false;
2185
			}
2186
		}
2187
		$sanitize = $this->allowedHtmlElements !== null;
2188
		if ( $sanitize ) {
2189
			$goodtag = $t && isset( $this->allowedHtmlElements[$t] );
0 ignored issues
show
Bug Best Practice introduced by
The expression $t of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2190
		}
2191
		if ( $goodtag ) {
2192
			if ( is_callable( $this->processingCallback ) ) {
2193
				call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2194
			}
2195
			if ( $sanitize ) {
2196
				$goodtag = Sanitizer::validateTag( $attribStr, $t );
2197
			}
2198
		}
2199
		if ( $goodtag ) {
2200
			if ( $sanitize ) {
2201
				$attribs = Sanitizer::decodeTagAttributes( $attribStr );
2202
				$attribs = Sanitizer::validateTagAttributes( $attribs, $t );
2203
			} else {
2204
				$attribs = Sanitizer::decodeTagAttributes( $attribStr );
2205
			}
2206
			$goodtag = $this->insertToken(
2207
				$slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2208
			);
2209
		}
2210
		if ( $goodtag ) {
2211
			$rest = str_replace( '>', '&gt;', $rest );
2212
			$this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2213
		} elseif ( $this->inRAWTEXT ) {
2214
			$this->insertToken( 'text', "<$x" );
2215
		} else {
2216
			# bad tag; serialize entire thing as text.
2217
			$this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2218
		}
2219
	}
2220
2221
	private function switchMode( $mode ) {
2222
		Assert::parameter(
2223
			substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
2224
		);
2225
		$oldMode = $this->parseMode;
2226
		$this->parseMode = $mode;
2227
		return $oldMode;
2228
	}
2229
2230
	private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
2231
		$this->switchMode( $mode );
2232
		return $this->insertToken( $token, $value, $attribs, $selfclose );
2233
	}
2234
2235
	private function resetInsertionMode() {
2236
		$last = false;
2237
		foreach ( $this->stack as $i => $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2238
			if ( $i === 0 ) {
2239
				$last = true;
2240
				if ( $this->fragmentContext ) {
2241
					$node = $this->fragmentContext;
2242
				}
2243
			}
2244
			if ( $node->isHtml() ) {
2245
				switch ( $node->localName ) {
2246
				case 'select':
2247
					$stacklen = $this->stack->length();
2248
					for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
2249
						$ancestor = $this->stack->node( $stacklen-$j-1 );
2250
						if ( $ancestor->isHtmlNamed( 'template' ) ) {
2251
							break;
2252
						}
2253
						if ( $ancestor->isHtmlNamed( 'table' ) ) {
2254
							$this->switchMode( 'inSelectInTableMode' );
2255
							return;
2256
						}
2257
					}
2258
					$this->switchMode( 'inSelectMode' );
2259
					return;
2260
				case 'tr':
2261
					$this->switchMode( 'inRowMode' );
2262
					return;
2263
				case 'tbody':
2264
				case 'tfoot':
2265
				case 'thead':
2266
					$this->switchMode( 'inTableBodyMode' );
2267
					return;
2268
				case 'caption':
2269
					$this->switchMode( 'inCaptionMode' );
2270
					return;
2271
				case 'colgroup':
2272
					$this->switchMode( 'inColumnGroupMode' );
2273
					return;
2274
				case 'table':
2275
					$this->switchMode( 'inTableMode' );
2276
					return;
2277
				case 'template':
2278
					$this->switchMode(
2279
						array_slice( $this->templateInsertionModes, -1 )[0]
0 ignored issues
show
Bug introduced by
The property templateInsertionModes does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
2280
					);
2281
					return;
2282
				case 'body':
2283
					$this->switchMode( 'inBodyMode' );
2284
					return;
2285
				# OMITTED: <frameset>
2286
				# OMITTED: <html>
2287
				# OMITTED: <head>
2288
				default:
2289
					if ( !$last ) {
2290
						# OMITTED: <head>
2291
						if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2292
							$this->switchMode( 'inCellMode' );
2293
							return;
2294
						}
2295
					}
2296
				}
2297
			}
2298
			if ( $last ) {
2299
				$this->switchMode( 'inBodyMode' );
2300
				return;
2301
			}
2302
		}
2303
	}
2304
2305
	private function stopParsing() {
2306
		# Most of the spec methods are inapplicable, other than step 2:
2307
		# "pop all the nodes off the stack of open elements".
2308
		# We're going to keep the top-most <html> element on the stack, though.
2309
2310
		# Clear the AFE list first, otherwise the element objects will stay live
2311
		# during serialization, potentially using O(N^2) memory. Note that
2312
		# popping the stack will never result in reconstructing the active
2313
		# formatting elements.
2314
		$this->afe = null;
2315
		$this->stack->popTo( 1 );
2316
	}
2317
2318
	private function parseRawText( $value, $attribs = null ) {
2319
		$this->stack->insertHTMLElement( $value, $attribs );
2320
		$this->inRAWTEXT = $value;
2321
		$this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2322
		return true;
2323
	}
2324
2325
	private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
2326
		if ( $token === 'text' ) {
2327
			$this->stack->insertText( $value );
2328
			return true;
2329
		} elseif ( $token === 'eof' ) {
2330
			$this->stack->pop();
2331
			return $this->switchModeAndReprocess(
2332
				$this->originalInsertionMode, $token, $value, $attribs, $selfclose
2333
			);
2334
		} elseif ( $token === 'endtag' ) {
2335
			$this->stack->pop();
2336
			$this->switchMode( $this->originalInsertionMode );
2337
			return true;
2338
		}
2339
		return true;
2340
	}
2341
2342
	private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
2343
		if ( $token === 'text' ) {
2344 View Code Duplication
			if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2345
				$this->stack->insertText( $matches[0] );
2346
				$value = substr( $value, strlen( $matches[0] ) );
2347
			}
2348
			if ( strlen( $value ) === 0 ) {
2349
				return true; // All text handled.
2350
			}
2351
			// Fall through to handle non-whitespace below.
2352
		} elseif ( $token === 'tag' ) {
2353
			switch ( $value ) {
2354
			case 'meta':
2355
				# OMITTED: in a full HTML parser, this might change the encoding.
2356
				/* falls through */
2357
			# OMITTED: <html>
2358
			case 'base':
2359
			case 'basefont':
2360
			case 'bgsound':
2361
			case 'link':
2362
				$this->stack->insertHTMLElement( $value, $attribs );
2363
				$this->stack->pop();
2364
				return true;
2365
			# OMITTED: <title>
2366
			# OMITTED: <noscript>
2367
			case 'noframes':
2368
			case 'style':
2369
				return $this->parseRawText( $value, $attribs );
2370
			# OMITTED: <script>
2371
			case 'template':
2372
				$this->stack->insertHTMLElement( $value, $attribs );
2373
				$this->afe->insertMarker();
2374
				# OMITTED: frameset_ok
2375
				$this->switchMode( 'inTemplateMode' );
2376
				$this->templateInsertionModes[] = $this->parseMode;
2377
				return true;
2378
			# OMITTED: <head>
2379
			}
2380
		} elseif ( $token === 'endtag' ) {
2381
			switch ( $value ) {
2382
			# OMITTED: <head>
2383
			# OMITTED: <body>
2384
			# OMITTED: <html>
2385
			case 'br':
2386
				break; // handle at the bottom of the function
2387
			case 'template':
2388
				if ( $this->stack->indexOf( $value ) < 0 ) {
2389
					return true; // Ignore the token.
2390
				}
2391
				$this->stack->generateImpliedEndTags( null, true /* thorough */ );
2392
				$this->stack->popTag( $value );
2393
				$this->afe->clearToMarker();
2394
				array_pop( $this->templateInsertionModes );
2395
				$this->resetInsertionMode();
2396
				return true;
2397
			default:
2398
				// ignore any other end tag
2399
				return true;
2400
			}
2401
		} elseif ( $token === 'comment' ) {
2402
			$this->stack->insertComment( $value );
2403
			return true;
2404
		}
2405
2406
		// If not handled above
2407
		$this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2408
		// Then redo this one
2409
		return $this->insertToken( $token, $value, $attribs, $selfclose );
2410
	}
2411
2412
	private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
2413
		if ( $token === 'text' ) {
2414
			$this->afe->reconstruct( $this->stack );
2415
			$this->stack->insertText( $value );
2416
			return true;
2417
		} elseif ( $token === 'eof' ) {
2418
			if ( !empty( $this->templateInsertionModes ) ) {
2419
				return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
2420
			}
2421
			$this->stopParsing();
2422
			return true;
2423
		} elseif ( $token === 'tag' ) {
2424
			switch ( $value ) {
2425
			# OMITTED: <html>
2426
			case 'base':
2427
			case 'basefont':
2428
			case 'bgsound':
2429
			case 'link':
2430
			case 'meta':
2431
			case 'noframes':
2432
			# OMITTED: <script>
2433
			case 'style':
2434
			case 'template':
2435
			# OMITTED: <title>
2436
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2437
			# OMITTED: <body>
2438
			# OMITTED: <frameset>
2439
2440
			case 'address':
2441
			case 'article':
2442
			case 'aside':
2443
			case 'blockquote':
2444
			case 'center':
2445
			case 'details':
2446
			case 'dialog':
2447
			case 'dir':
2448
			case 'div':
2449
			case 'dl':
2450
			case 'fieldset':
2451
			case 'figcaption':
2452
			case 'figure':
2453
			case 'footer':
2454
			case 'header':
2455
			case 'hgroup':
2456
			case 'main':
2457
			case 'menu':
2458
			case 'nav':
2459
			case 'ol':
2460
			case 'p':
2461
			case 'section':
2462
			case 'summary':
2463
			case 'ul':
2464
				if ( $this->stack->inButtonScope( 'p' ) ) {
2465
					$this->inBodyMode( 'endtag', 'p' );
2466
				}
2467
				$this->stack->insertHTMLElement( $value, $attribs );
2468
				return true;
2469
2470
			case 'h1':
2471
			case 'h2':
2472
			case 'h3':
2473
			case 'h4':
2474
			case 'h5':
2475 View Code Duplication
			case 'h6':
2476
				if ( $this->stack->inButtonScope( 'p' ) ) {
2477
					$this->inBodyMode( 'endtag', 'p' );
2478
				}
2479
				if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2480
					$this->stack->pop();
2481
				}
2482
				$this->stack->insertHTMLElement( $value, $attribs );
2483
				return true;
2484
2485
			case 'pre':
2486 View Code Duplication
			case 'listing':
2487
				if ( $this->stack->inButtonScope( 'p' ) ) {
2488
					$this->inBodyMode( 'endtag', 'p' );
2489
				}
2490
				$this->stack->insertHTMLElement( $value, $attribs );
2491
				$this->ignoreLinefeed = true;
2492
				# OMITTED: frameset_ok
2493
				return true;
2494
2495
			case 'form':
2496
				if (
2497
					$this->formElementPointer &&
2498
					$this->stack->indexOf( 'template' ) < 0
2499
				) {
2500
					return true; // in a form, not in a template.
2501
				}
2502
				if ( $this->stack->inButtonScope( "p" ) ) {
2503
					$this->inBodyMode( 'endtag', 'p' );
2504
				}
2505
				$elt = $this->stack->insertHTMLElement( $value, $attribs );
2506
				if ( $this->stack->indexOf( 'template' ) < 0 ) {
2507
					$this->formElementPointer = $elt;
2508
				}
2509
				return true;
2510
2511
			case 'li':
2512
				# OMITTED: frameset_ok
2513
				foreach ( $this->stack as $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2514
					if ( $node->isHtmlNamed( 'li' ) ) {
2515
						$this->inBodyMode( 'endtag', 'li' );
2516
						break;
2517
					}
2518
					if (
2519
						$node->isA( BalanceSets::$specialSet ) &&
2520
						!$node->isA( BalanceSets::$addressDivPSet )
2521
					) {
2522
						break;
2523
					}
2524
				}
2525
				if ( $this->stack->inButtonScope( 'p' ) ) {
2526
					$this->inBodyMode( 'endtag', 'p' );
2527
				}
2528
				$this->stack->insertHTMLElement( $value, $attribs );
2529
				return true;
2530
2531
			case 'dd':
2532
			case 'dt':
2533
				# OMITTED: frameset_ok
2534
				foreach ( $this->stack as $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2535
					if ( $node->isHtmlNamed( 'dd' ) ) {
2536
						$this->inBodyMode( 'endtag', 'dd' );
2537
						break;
2538
					}
2539
					if ( $node->isHtmlNamed( 'dt' ) ) {
2540
						$this->inBodyMode( 'endtag', 'dt' );
2541
						break;
2542
					}
2543
					if (
2544
						$node->isA( BalanceSets::$specialSet ) &&
2545
						!$node->isA( BalanceSets::$addressDivPSet )
2546
					) {
2547
						break;
2548
					}
2549
				}
2550
				if ( $this->stack->inButtonScope( 'p' ) ) {
2551
					$this->inBodyMode( 'endtag', 'p' );
2552
				}
2553
				$this->stack->insertHTMLElement( $value, $attribs );
2554
				return true;
2555
2556
			# OMITTED: <plaintext>
2557
2558 View Code Duplication
			case 'button':
2559
				if ( $this->stack->inScope( 'button' ) ) {
2560
					$this->inBodyMode( 'endtag', 'button' );
2561
					return $this->insertToken( $token, $value, $attribs, $selfclose );
2562
				}
2563
				$this->afe->reconstruct( $this->stack );
2564
				$this->stack->insertHTMLElement( $value, $attribs );
2565
				return true;
2566
2567
			case 'a':
2568
				$activeElement = $this->afe->findElementByTag( 'a' );
2569
				if ( $activeElement ) {
2570
					$this->inBodyMode( 'endtag', 'a' );
2571
					if ( $this->afe->isInList( $activeElement ) ) {
2572
						$this->afe->remove( $activeElement );
2573
						// Don't flatten here, since when we fall
2574
						// through below we might foster parent
2575
						// the new <a> tag inside this one.
2576
						$this->stack->removeElement( $activeElement, false );
2577
					}
2578
				}
2579
				/* Falls through */
2580
			case 'b':
2581
			case 'big':
2582
			case 'code':
2583
			case 'em':
2584
			case 'font':
2585
			case 'i':
2586
			case 's':
2587
			case 'small':
2588
			case 'strike':
2589
			case 'strong':
2590
			case 'tt':
2591 View Code Duplication
			case 'u':
2592
				$this->afe->reconstruct( $this->stack );
2593
				$this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2594
				return true;
2595
2596
			case 'nobr':
2597
				$this->afe->reconstruct( $this->stack );
2598
				if ( $this->stack->inScope( 'nobr' ) ) {
2599
					$this->inBodyMode( 'endtag', 'nobr' );
2600
					$this->afe->reconstruct( $this->stack );
2601
				}
2602
				$this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2603
				return true;
2604
2605
			case 'applet':
2606
			case 'marquee':
2607
			case 'object':
2608
				$this->afe->reconstruct( $this->stack );
2609
				$this->stack->insertHTMLElement( $value, $attribs );
2610
				$this->afe->insertMarker();
2611
				# OMITTED: frameset_ok
2612
				return true;
2613
2614 View Code Duplication
			case 'table':
2615
				# The document is never in "quirks mode"; see simplifications
2616
				# above.
2617
				if ( $this->stack->inButtonScope( 'p' ) ) {
2618
					$this->inBodyMode( 'endtag', 'p' );
2619
				}
2620
				$this->stack->insertHTMLElement( $value, $attribs );
2621
				# OMITTED: frameset_ok
2622
				$this->switchMode( 'inTableMode' );
2623
				return true;
2624
2625
			case 'area':
2626
			case 'br':
2627
			case 'embed':
2628
			case 'img':
2629
			case 'keygen':
2630 View Code Duplication
			case 'wbr':
2631
				$this->afe->reconstruct( $this->stack );
2632
				$this->stack->insertHTMLElement( $value, $attribs );
2633
				$this->stack->pop();
2634
				# OMITTED: frameset_ok
2635
				return true;
2636
2637 View Code Duplication
			case 'input':
2638
				$this->afe->reconstruct( $this->stack );
2639
				$this->stack->insertHTMLElement( $value, $attribs );
2640
				$this->stack->pop();
2641
				# OMITTED: frameset_ok
2642
				# (hence we don't need to examine the tag's "type" attribute)
2643
				return true;
2644
2645
			case 'menuitem':
2646
			case 'param':
2647
			case 'source':
2648
			case 'track':
2649
				$this->stack->insertHTMLElement( $value, $attribs );
2650
				$this->stack->pop();
2651
				return true;
2652
2653 View Code Duplication
			case 'hr':
2654
				if ( $this->stack->inButtonScope( 'p' ) ) {
2655
					$this->inBodyMode( 'endtag', 'p' );
2656
				}
2657
				$this->stack->insertHTMLElement( $value, $attribs );
2658
				$this->stack->pop();
2659
				return true;
2660
2661
			case 'image':
2662
				# warts!
2663
				return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
2664
2665
			# OMITTED: <isindex>
2666
2667
			case 'textarea':
2668
				$this->stack->insertHTMLElement( $value, $attribs );
2669
				$this->ignoreLinefeed = true;
2670
				$this->inRCDATA = $value; // emulate rcdata tokenizer mode
2671
				# OMITTED: frameset_ok
2672
				return true;
2673
2674
			# OMITTED: <xmp>
2675
			# OMITTED: <iframe>
2676
			# OMITTED: <noembed>
2677
			# OMITTED: <noscript>
2678
2679
			case 'select':
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
2680
				$this->afe->reconstruct( $this->stack );
2681
				$this->stack->insertHTMLElement( $value, $attribs );
2682
				switch ( $this->parseMode ) {
2683
				case 'inTableMode':
2684
				case 'inCaptionMode':
2685
				case 'inTableBodyMode':
2686
				case 'inRowMode':
2687
				case 'inCellMode':
2688
					$this->switchMode( 'inSelectInTableMode' );
2689
					return true;
2690
				default:
2691
					$this->switchMode( 'inSelectMode' );
2692
					return true;
2693
				}
2694
2695
			case 'optgroup':
2696 View Code Duplication
			case 'option':
2697
				if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2698
					$this->inBodyMode( 'endtag', 'option' );
2699
				}
2700
				$this->afe->reconstruct( $this->stack );
2701
				$this->stack->insertHTMLElement( $value, $attribs );
2702
				return true;
2703
2704
			case 'rb':
2705
			case 'rtc':
2706
				if ( $this->stack->inScope( 'ruby' ) ) {
2707
					$this->stack->generateImpliedEndTags();
2708
				}
2709
				$this->stack->insertHTMLElement( $value, $attribs );
2710
				return true;
2711
2712
			case 'rp':
2713
			case 'rt':
2714
				if ( $this->stack->inScope( 'ruby' ) ) {
2715
					$this->stack->generateImpliedEndTags( 'rtc' );
2716
				}
2717
				$this->stack->insertHTMLElement( $value, $attribs );
2718
				return true;
2719
2720 View Code Duplication
			case 'math':
2721
				$this->afe->reconstruct( $this->stack );
2722
				# We skip the spec's "adjust MathML attributes" and
2723
				# "adjust foreign attributes" steps, since the browser will
2724
				# do this later when it parses the output and it doesn't affect
2725
				# balancing.
2726
				$this->stack->insertForeignElement(
2727
					BalanceSets::MATHML_NAMESPACE, $value, $attribs
2728
				);
2729
				if ( $selfclose ) {
2730
					# emit explicit </math> tag.
2731
					$this->stack->pop();
2732
				}
2733
				return true;
2734
2735 View Code Duplication
			case 'svg':
2736
				$this->afe->reconstruct( $this->stack );
2737
				# We skip the spec's "adjust SVG attributes" and
2738
				# "adjust foreign attributes" steps, since the browser will
2739
				# do this later when it parses the output and it doesn't affect
2740
				# balancing.
2741
				$this->stack->insertForeignElement(
2742
					BalanceSets::SVG_NAMESPACE, $value, $attribs
2743
				);
2744
				if ( $selfclose ) {
2745
					# emit explicit </svg> tag.
2746
					$this->stack->pop();
2747
				}
2748
				return true;
2749
2750
			case 'caption':
2751
			case 'col':
2752
			case 'colgroup':
2753
			# OMITTED: <frame>
2754
			case 'head':
2755
			case 'tbody':
2756
			case 'td':
2757
			case 'tfoot':
2758
			case 'th':
2759
			case 'thead':
2760
			case 'tr':
2761
				// Ignore table tags if we're not inTableMode
2762
				return true;
2763
			}
2764
2765
			// Handle any other start tag here
2766
			$this->afe->reconstruct( $this->stack );
2767
			$this->stack->insertHTMLElement( $value, $attribs );
2768
			return true;
2769
		} elseif ( $token === 'endtag' ) {
2770
			switch ( $value ) {
2771
			# </body>,</html> are unsupported.
2772
2773
			case 'template':
2774
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2775
2776
			case 'address':
2777
			case 'article':
2778
			case 'aside':
2779
			case 'blockquote':
2780
			case 'button':
2781
			case 'center':
2782
			case 'details':
2783
			case 'dialog':
2784
			case 'dir':
2785
			case 'div':
2786
			case 'dl':
2787
			case 'fieldset':
2788
			case 'figcaption':
2789
			case 'figure':
2790
			case 'footer':
2791
			case 'header':
2792
			case 'hgroup':
2793
			case 'listing':
2794
			case 'main':
2795
			case 'menu':
2796
			case 'nav':
2797
			case 'ol':
2798
			case 'pre':
2799
			case 'section':
2800
			case 'summary':
2801
			case 'ul':
2802
				// Ignore if there is not a matching open tag
2803
				if ( !$this->stack->inScope( $value ) ) {
2804
					return true;
2805
				}
2806
				$this->stack->generateImpliedEndTags();
2807
				$this->stack->popTag( $value );
2808
				return true;
2809
2810
			case 'form':
2811
				if ( $this->stack->indexOf( 'template' ) < 0 ) {
2812
					$openform = $this->formElementPointer;
2813
					$this->formElementPointer = null;
2814
					if ( !$openform || !$this->stack->inScope( $openform ) ) {
2815
						return true;
2816
					}
2817
					$this->stack->generateImpliedEndTags();
2818
					// Don't flatten yet if we're removing a <form> element
2819
					// out-of-order. (eg. `<form><div></form>`)
2820
					$flatten = ( $this->stack->currentNode === $openform );
2821
					$this->stack->removeElement( $openform, $flatten );
2822
				} else {
2823
					if ( !$this->stack->inScope( 'form' ) ) {
2824
						return true;
2825
					}
2826
					$this->stack->generateImpliedEndTags();
2827
					$this->stack->popTag( 'form' );
2828
				}
2829
				return true;
2830
2831 View Code Duplication
			case 'p':
2832
				if ( !$this->stack->inButtonScope( 'p' ) ) {
2833
					$this->inBodyMode( 'tag', 'p', [] );
2834
					return $this->insertToken( $token, $value, $attribs, $selfclose );
2835
				}
2836
				$this->stack->generateImpliedEndTags( $value );
2837
				$this->stack->popTag( $value );
2838
				return true;
2839
2840
			case 'li':
2841
				if ( !$this->stack->inListItemScope( $value ) ) {
2842
					return true; # ignore
2843
				}
2844
				$this->stack->generateImpliedEndTags( $value );
2845
				$this->stack->popTag( $value );
2846
				return true;
2847
2848
			case 'dd':
2849
			case 'dt':
2850
				if ( !$this->stack->inScope( $value ) ) {
2851
					return true; # ignore
2852
				}
2853
				$this->stack->generateImpliedEndTags( $value );
2854
				$this->stack->popTag( $value );
2855
				return true;
2856
2857
			case 'h1':
2858
			case 'h2':
2859
			case 'h3':
2860
			case 'h4':
2861
			case 'h5':
2862
			case 'h6':
2863
				if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2864
					return true; # ignore
2865
				}
2866
				$this->stack->generateImpliedEndTags();
2867
				$this->stack->popTag( BalanceSets::$headingSet );
2868
				return true;
2869
2870
			case 'sarcasm':
2871
				# Take a deep breath, then:
2872
				break;
2873
2874
			case 'a':
2875
			case 'b':
2876
			case 'big':
2877
			case 'code':
2878
			case 'em':
2879
			case 'font':
2880
			case 'i':
2881
			case 'nobr':
2882
			case 's':
2883
			case 'small':
2884
			case 'strike':
2885
			case 'strong':
2886
			case 'tt':
2887
			case 'u':
2888
				if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
0 ignored issues
show
Bug introduced by
It seems like $this->afe can be null; however, adoptionAgency() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
2889
					return true; # If we did something, we're done.
2890
				}
2891
				break; # Go to the "any other end tag" case.
2892
2893
			case 'applet':
2894
			case 'marquee':
2895 View Code Duplication
			case 'object':
2896
				if ( !$this->stack->inScope( $value ) ) {
2897
					return true; # ignore
2898
				}
2899
				$this->stack->generateImpliedEndTags();
2900
				$this->stack->popTag( $value );
2901
				$this->afe->clearToMarker();
2902
				return true;
2903
2904
			case 'br':
2905
				# Turn </br> into <br>
2906
				return $this->inBodyMode( 'tag', $value, [] );
2907
			}
2908
2909
			// Any other end tag goes here
2910
			foreach ( $this->stack as $i => $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2911
				if ( $node->isHtmlNamed( $value ) ) {
2912
					$this->stack->generateImpliedEndTags( $value );
2913
					$this->stack->popTo( $i ); # including $i
2914
					break;
2915
				} elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2916
					return true; // ignore this close token.
2917
				}
2918
			}
2919
			return true;
2920
		} elseif ( $token === 'comment' ) {
2921
			$this->stack->insertComment( $value );
2922
			return true;
2923
		} else {
2924
			Assert::invariant( false, "Bad token type: $token" );
2925
		}
2926
	}
2927
2928
	private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2929
		if ( $token === 'text' ) {
2930
			if ( $this->textIntegrationMode ) {
2931
				return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2932
			} elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2933
				$this->pendingTableText = '';
2934
				$this->originalInsertionMode = $this->parseMode;
2935
				return $this->switchModeAndReprocess( 'inTableTextMode', $token, $value, $attribs, $selfclose );
2936
			}
2937
			// fall through to default case.
2938
		} elseif ( $token === 'eof' ) {
2939
			$this->stopParsing();
2940
			return true;
2941
		} elseif ( $token === 'tag' ) {
2942
			switch ( $value ) {
2943
			case 'caption':
2944
				$this->afe->insertMarker();
2945
				$this->stack->insertHTMLElement( $value, $attribs );
2946
				$this->switchMode( 'inCaptionMode' );
2947
				return true;
2948
			case 'colgroup':
2949
				$this->stack->clearToContext( BalanceSets::$tableContextSet );
2950
				$this->stack->insertHTMLElement( $value, $attribs );
2951
				$this->switchMode( 'inColumnGroupMode' );
2952
				return true;
2953
			case 'col':
2954
				$this->inTableMode( 'tag', 'colgroup', [] );
2955
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2956
			case 'tbody':
2957
			case 'tfoot':
2958
			case 'thead':
2959
				$this->stack->clearToContext( BalanceSets::$tableContextSet );
2960
				$this->stack->insertHTMLElement( $value, $attribs );
2961
				$this->switchMode( 'inTableBodyMode' );
2962
				return true;
2963
			case 'td':
2964
			case 'th':
2965
			case 'tr':
2966
				$this->inTableMode( 'tag', 'tbody', [] );
2967
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2968
			case 'table':
2969
				if ( !$this->stack->inTableScope( $value ) ) {
2970
					return true; // Ignore this tag.
2971
				}
2972
				$this->inTableMode( 'endtag', $value );
2973
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2974
2975
			case 'style':
2976
			# OMITTED: <script>
2977
			case 'template':
2978
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2979
2980
			case 'input':
2981
				if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
2982
					break; // Handle this as "everything else"
2983
				}
2984
				$this->stack->insertHTMLElement( $value, $attribs );
2985
				$this->stack->pop();
2986
				return true;
2987
2988
			case 'form':
2989
				if (
2990
					$this->formElementPointer ||
2991
					$this->stack->indexOf( 'template' ) >= 0
2992
				) {
2993
					return true; // ignore this token
2994
				}
2995
				$this->formElementPointer =
2996
					$this->stack->insertHTMLElement( $value, $attribs );
2997
				$this->stack->popTag( $this->formElementPointer );
2998
				return true;
2999
			}
3000
			// Fall through for "anything else" clause.
3001
		} elseif ( $token === 'endtag' ) {
3002
			switch ( $value ) {
3003
			case 'table':
3004
				if ( !$this->stack->inTableScope( $value ) ) {
3005
					return true; // Ignore.
3006
				}
3007
				$this->stack->popTag( $value );
3008
				$this->resetInsertionMode();
3009
				return true;
3010
			# OMITTED: <body>
3011
			case 'caption':
3012
			case 'col':
3013
			case 'colgroup':
3014
			# OMITTED: <html>
3015
			case 'tbody':
3016
			case 'td':
3017
			case 'tfoot':
3018
			case 'th':
3019
			case 'thead':
3020
			case 'tr':
3021
				return true; // Ignore the token.
3022
			case 'template':
3023
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3024
			}
3025
			// Fall through for "anything else" clause.
3026
		} elseif ( $token === 'comment' ) {
3027
			$this->stack->insertComment( $value );
3028
			return true;
3029
		}
3030
		// This is the "anything else" case:
3031
		$this->stack->fosterParentMode = true;
3032
		$this->inBodyMode( $token, $value, $attribs, $selfclose );
3033
		$this->stack->fosterParentMode = false;
3034
		return true;
3035
	}
3036
3037
	private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
3038
		if ( $token === 'text' ) {
3039
			$this->pendingTableText .= $value;
3040
			return true;
3041
		}
3042
		// Non-text token:
3043
		$text = $this->pendingTableText;
3044
		$this->pendingTableText = '';
3045
		if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3046
			// This should match the "anything else" case inTableMode
3047
			$this->stack->fosterParentMode = true;
3048
			$this->inBodyMode( 'text', $text );
3049
			$this->stack->fosterParentMode = false;
3050
		} else {
3051
			// Pending text is just whitespace.
3052
			$this->stack->insertText( $text );
3053
		}
3054
		return $this->switchModeAndReprocess(
3055
			$this->originalInsertionMode, $token, $value, $attribs, $selfclose
3056
		);
3057
	}
3058
3059
	// helper for inCaptionMode
3060
	private function endCaption() {
3061
		if ( !$this->stack->inTableScope( 'caption' ) ) {
3062
			return false;
3063
		}
3064
		$this->stack->generateImpliedEndTags();
3065
		$this->stack->popTag( 'caption' );
3066
		$this->afe->clearToMarker();
3067
		$this->switchMode( 'inTableMode' );
3068
		return true;
3069
	}
3070
3071
	private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
3072
		if ( $token === 'tag' ) {
3073 View Code Duplication
			switch ( $value ) {
3074
			case 'caption':
3075
			case 'col':
3076
			case 'colgroup':
3077
			case 'tbody':
3078
			case 'td':
3079
			case 'tfoot':
3080
			case 'th':
3081
			case 'thead':
3082
			case 'tr':
3083
				if ( $this->endCaption() ) {
3084
					$this->insertToken( $token, $value, $attribs, $selfclose );
3085
				}
3086
				return true;
3087
			}
3088
			// Fall through to "anything else" case.
3089
		} elseif ( $token === 'endtag' ) {
3090
			switch ( $value ) {
3091
			case 'caption':
3092
				$this->endCaption();
3093
				return true;
3094
			case 'table':
3095
				if ( $this->endCaption() ) {
3096
					$this->insertToken( $token, $value, $attribs, $selfclose );
3097
				}
3098
				return true;
3099
			case 'body':
3100
			case 'col':
3101
			case 'colgroup':
3102
			# OMITTED: <html>
3103
			case 'tbody':
3104
			case 'td':
3105
			case 'tfoot':
3106
			case 'th':
3107
			case 'thead':
3108
			case 'tr':
3109
				// Ignore the token
3110
				return true;
3111
			}
3112
			// Fall through to "anything else" case.
3113
		}
3114
		// The Anything Else case
3115
		return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3116
	}
3117
3118
	private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
3119
		if ( $token === 'text' ) {
3120 View Code Duplication
			if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3121
				$this->stack->insertText( $matches[0] );
3122
				$value = substr( $value, strlen( $matches[0] ) );
3123
			}
3124
			if ( strlen( $value ) === 0 ) {
3125
				return true; // All text handled.
3126
			}
3127
			// Fall through to handle non-whitespace below.
3128
		} elseif ( $token === 'tag' ) {
3129
			switch ( $value ) {
3130
			# OMITTED: <html>
3131
			case 'col':
3132
				$this->stack->insertHTMLElement( $value, $attribs );
3133
				$this->stack->pop();
3134
				return true;
3135
			case 'template':
3136
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3137
			}
3138
			// Fall through for "anything else".
3139
		} elseif ( $token === 'endtag' ) {
3140
			switch ( $value ) {
3141
			case 'colgroup':
3142
				if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3143
					return true; // Ignore the token.
3144
				}
3145
				$this->stack->pop();
3146
				$this->switchMode( 'inTableMode' );
3147
				return true;
3148
			case 'col':
3149
				return true; // Ignore the token.
3150
			case 'template':
3151
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3152
			}
3153
			// Fall through for "anything else".
3154
		} elseif ( $token === 'eof' ) {
3155
			return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3156
		} elseif ( $token === 'comment' ) {
3157
			$this->stack->insertComment( $value );
3158
			return true;
3159
		}
3160
3161
		// Anything else
3162
		if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3163
			return true; // Ignore the token.
3164
		}
3165
		$this->inColumnGroupMode( 'endtag', 'colgroup' );
3166
		return $this->insertToken( $token, $value, $attribs, $selfclose );
3167
	}
3168
3169
	// Helper function for inTableBodyMode
3170
	private function endSection() {
3171
		if ( !(
3172
			$this->stack->inTableScope( 'tbody' ) ||
3173
			$this->stack->inTableScope( 'thead' ) ||
3174
			$this->stack->inTableScope( 'tfoot' )
3175
		) ) {
3176
			return false;
3177
		}
3178
		$this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3179
		$this->stack->pop();
3180
		$this->switchMode( 'inTableMode' );
3181
		return true;
3182
	}
3183 View Code Duplication
	private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
3184
		if ( $token === 'tag' ) {
3185
			switch ( $value ) {
3186
			case 'tr':
3187
				$this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3188
				$this->stack->insertHTMLElement( $value, $attribs );
3189
				$this->switchMode( 'inRowMode' );
3190
				return true;
3191
			case 'th':
3192
			case 'td':
3193
				$this->inTableBodyMode( 'tag', 'tr', [] );
3194
				$this->insertToken( $token, $value, $attribs, $selfclose );
3195
				return true;
3196
			case 'caption':
3197
			case 'col':
3198
			case 'colgroup':
3199
			case 'tbody':
3200
			case 'tfoot':
3201
			case 'thead':
3202
				if ( $this->endSection() ) {
3203
					$this->insertToken( $token, $value, $attribs, $selfclose );
3204
				}
3205
				return true;
3206
			}
3207
		} elseif ( $token === 'endtag' ) {
3208
			switch ( $value ) {
3209
			case 'table':
3210
				if ( $this->endSection() ) {
3211
					$this->insertToken( $token, $value, $attribs, $selfclose );
3212
				}
3213
				return true;
3214
			case 'tbody':
3215
			case 'tfoot':
3216
			case 'thead':
3217
				if ( $this->stack->inTableScope( $value ) ) {
3218
					$this->endSection();
3219
				}
3220
				return true;
3221
			# OMITTED: <body>
3222
			case 'caption':
3223
			case 'col':
3224
			case 'colgroup':
3225
			# OMITTED: <html>
3226
			case 'td':
3227
			case 'th':
3228
			case 'tr':
3229
				return true; // Ignore the token.
3230
			}
3231
		}
3232
		// Anything else:
3233
		return $this->inTableMode( $token, $value, $attribs, $selfclose );
3234
	}
3235
3236
	// Helper function for inRowMode
3237
	private function endRow() {
3238
		if ( !$this->stack->inTableScope( 'tr' ) ) {
3239
			return false;
3240
		}
3241
		$this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3242
		$this->stack->pop();
3243
		$this->switchMode( 'inTableBodyMode' );
3244
		return true;
3245
	}
3246 View Code Duplication
	private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
3247
		if ( $token === 'tag' ) {
3248
			switch ( $value ) {
3249
			case 'th':
3250
			case 'td':
3251
				$this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3252
				$this->stack->insertHTMLElement( $value, $attribs );
3253
				$this->switchMode( 'inCellMode' );
3254
				$this->afe->insertMarker();
3255
				return true;
3256
			case 'caption':
3257
			case 'col':
3258
			case 'colgroup':
3259
			case 'tbody':
3260
			case 'tfoot':
3261
			case 'thead':
3262
			case 'tr':
3263
				if ( $this->endRow() ) {
3264
					$this->insertToken( $token, $value, $attribs, $selfclose );
3265
				}
3266
				return true;
3267
			}
3268
		} elseif ( $token === 'endtag' ) {
3269
			switch ( $value ) {
3270
			case 'tr':
3271
				$this->endRow();
3272
				return true;
3273
			case 'table':
3274
				if ( $this->endRow() ) {
3275
					$this->insertToken( $token, $value, $attribs, $selfclose );
3276
				}
3277
				return true;
3278
			case 'tbody':
3279
			case 'tfoot':
3280
			case 'thead':
3281
				if (
3282
					$this->stack->inTableScope( $value ) &&
3283
					$this->endRow()
3284
				) {
3285
					$this->insertToken( $token, $value, $attribs, $selfclose );
3286
				}
3287
				return true;
3288
			# OMITTED: <body>
3289
			case 'caption':
3290
			case 'col':
3291
			case 'colgroup':
3292
			# OMITTED: <html>
3293
			case 'td':
3294
			case 'th':
3295
				return true; // Ignore the token.
3296
			}
3297
		}
3298
		// Anything else:
3299
		return $this->inTableMode( $token, $value, $attribs, $selfclose );
3300
	}
3301
3302
	// Helper for inCellMode
3303
	private function endCell() {
3304
		if ( $this->stack->inTableScope( 'td' ) ) {
3305
			$this->inCellMode( 'endtag', 'td' );
3306
			return true;
3307
		} elseif ( $this->stack->inTableScope( 'th' ) ) {
3308
			$this->inCellMode( 'endtag', 'th' );
3309
			return true;
3310
		} else {
3311
			return false;
3312
		}
3313
	}
3314
	private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
3315
		if ( $token === 'tag' ) {
3316 View Code Duplication
			switch ( $value ) {
3317
			case 'caption':
3318
			case 'col':
3319
			case 'colgroup':
3320
			case 'tbody':
3321
			case 'td':
3322
			case 'tfoot':
3323
			case 'th':
3324
			case 'thead':
3325
			case 'tr':
3326
				if ( $this->endCell() ) {
3327
					$this->insertToken( $token, $value, $attribs, $selfclose );
3328
				}
3329
				return true;
3330
			}
3331
		} elseif ( $token === 'endtag' ) {
3332
			switch ( $value ) {
3333
			case 'td':
3334 View Code Duplication
			case 'th':
3335
				if ( $this->stack->inTableScope( $value ) ) {
3336
					$this->stack->generateImpliedEndTags();
3337
					$this->stack->popTag( $value );
3338
					$this->afe->clearToMarker();
3339
					$this->switchMode( 'inRowMode' );
3340
				}
3341
				return true;
3342
			# OMITTED: <body>
3343
			case 'caption':
3344
			case 'col':
3345
			case 'colgroup':
3346
			# OMITTED: <html>
3347
				return true;
3348
3349
			case 'table':
3350
			case 'tbody':
3351
			case 'tfoot':
3352
			case 'thead':
3353
			case 'tr':
3354
				if ( $this->stack->inTableScope( $value ) ) {
3355
					$this->stack->generateImpliedEndTags();
3356
					$this->stack->popTag( BalanceSets::$tableCellSet );
3357
					$this->afe->clearToMarker();
3358
					$this->switchMode( 'inRowMode' );
3359
					$this->insertToken( $token, $value, $attribs, $selfclose );
3360
				}
3361
				return true;
3362
			}
3363
		}
3364
		// Anything else:
3365
		return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3366
	}
3367
3368
	private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
3369
		if ( $token === 'text' ) {
3370
			$this->stack->insertText( $value );
3371
			return true;
3372
		} elseif ( $token === 'eof' ) {
3373
			return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3374
		} elseif ( $token === 'tag' ) {
3375
			switch ( $value ) {
3376
			# OMITTED: <html>
3377
			case 'option':
3378
				if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3379
					$this->stack->pop();
3380
				}
3381
				$this->stack->insertHTMLElement( $value, $attribs );
3382
				return true;
3383 View Code Duplication
			case 'optgroup':
3384
				if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3385
					$this->stack->pop();
3386
				}
3387
				if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3388
					$this->stack->pop();
3389
				}
3390
				$this->stack->insertHTMLElement( $value, $attribs );
3391
				return true;
3392
			case 'select':
3393
				$this->inSelectMode( 'endtag', $value ); // treat it like endtag
3394
				return true;
3395
			case 'input':
3396
			case 'keygen':
3397
			case 'textarea':
3398
				if ( !$this->stack->inSelectScope( 'select' ) ) {
3399
					return true; // ignore token (fragment case)
3400
				}
3401
				$this->inSelectMode( 'endtag', 'select' );
3402
				return $this->insertToken( $token, $value, $attribs, $selfclose );
3403
			case 'script':
3404
			case 'template':
3405
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3406
			}
3407
		} elseif ( $token === 'endtag' ) {
3408
			switch ( $value ) {
3409
			case 'optgroup':
3410
				if (
3411
					$this->stack->currentNode->isHtmlNamed( 'option' ) &&
3412
					$this->stack->length() >= 2 &&
3413
					$this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3414
				) {
3415
					$this->stack->pop();
3416
				}
3417
				if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3418
					$this->stack->pop();
3419
				}
3420
				return true;
3421
			case 'option':
3422
				if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3423
					$this->stack->pop();
3424
				}
3425
				return true;
3426
			case 'select':
3427
				if ( !$this->stack->inSelectScope( $value ) ) {
3428
					return true; // fragment case
3429
				}
3430
				$this->stack->popTag( $value );
3431
				$this->resetInsertionMode();
3432
				return true;
3433
			case 'template':
3434
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3435
			}
3436
		} elseif ( $token === 'comment' ) {
3437
			$this->stack->insertComment( $value );
3438
			return true;
3439
		}
3440
		// anything else: just ignore the token
3441
		return true;
3442
	}
3443
3444
	private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
3445
		switch ( $value ) {
3446
		case 'caption':
3447
		case 'table':
3448
		case 'tbody':
3449
		case 'tfoot':
3450
		case 'thead':
3451
		case 'tr':
3452
		case 'td':
3453
		case 'th':
3454
			if ( $token === 'tag' ) {
3455
				$this->inSelectInTableMode( 'endtag', 'select' );
3456
				return $this->insertToken( $token, $value, $attribs, $selfclose );
3457
			} elseif ( $token === 'endtag' ) {
3458
				if ( $this->stack->inTableScope( $value ) ) {
3459
					$this->inSelectInTableMode( 'endtag', 'select' );
3460
					return $this->insertToken( $token, $value, $attribs, $selfclose );
3461
				}
3462
				return true;
3463
			}
3464
		}
3465
		// anything else
3466
		return $this->inSelectMode( $token, $value, $attribs, $selfclose );
3467
	}
3468
3469
	private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
3470
		if ( $token === 'text' || $token === 'comment' ) {
3471
			return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3472
		} elseif ( $token === 'eof' ) {
3473
			if ( $this->stack->indexOf( 'template' ) < 0 ) {
3474
				$this->stopParsing();
3475
			} else {
3476
				$this->stack->popTag( 'template' );
3477
				$this->afe->clearToMarker();
3478
				array_pop( $this->templateInsertionModes );
3479
				$this->resetInsertionMode();
3480
				$this->insertToken( $token, $value, $attribs, $selfclose );
3481
			}
3482
			return true;
3483
		} elseif ( $token === 'tag' ) {
3484
			switch ( $value ) {
3485
			case 'base':
3486
			case 'basefont':
3487
			case 'bgsound':
3488
			case 'link':
3489
			case 'meta':
3490
			case 'noframes':
3491
			# OMITTED: <script>
3492
			case 'style':
3493
			case 'template':
3494
			# OMITTED: <title>
3495
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3496
3497
			case 'caption':
3498
			case 'colgroup':
3499
			case 'tbody':
3500
			case 'tfoot':
3501
			case 'thead':
3502
				return $this->switchModeAndReprocess(
3503
					'inTableMode', $token, $value, $attribs, $selfclose
3504
				);
3505
3506
			case 'col':
3507
				return $this->switchModeAndReprocess(
3508
					'inColumnGroupMode', $token, $value, $attribs, $selfclose
3509
				);
3510
3511
			case 'tr':
3512
				return $this->switchModeAndReprocess(
3513
					'inTableBodyMode', $token, $value, $attribs, $selfclose
3514
				);
3515
3516
			case 'td':
3517
			case 'th':
3518
				return $this->switchModeAndReprocess(
3519
					'inRowMode', $token, $value, $attribs, $selfclose
3520
				);
3521
			}
3522
			return $this->switchModeAndReprocess(
3523
				'inBodyMode', $token, $value, $attribs, $selfclose
3524
			);
3525
		} elseif ( $token === 'endtag' ) {
3526
			switch ( $value ) {
3527
			case 'template':
3528
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3529
			}
3530
			return true;
3531
		} else {
3532
			Assert::invariant( false, "Bad token type: $token" );
3533
		}
3534
	}
3535
}
3536