Completed
Branch master (af7ffa)
by
unknown
24:08
created

Balancer::advance()   D

Complexity

Conditions 12
Paths 270

Size

Total Lines 59
Code Lines 38

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 12
eloc 38
nc 270
nop 0
dl 0
loc 59
rs 4.7135
c 2
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * An implementation of the tree building portion of the HTML5 parsing
4
 * spec.
5
 *
6
 * This program is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with this program; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
 * http://www.gnu.org/copyleft/gpl.html
20
 *
21
 * @file
22
 * @ingroup Parser
23
 * @since 1.27
24
 * @author C. Scott Ananian, 2016
25
 */
26
namespace MediaWiki\Tidy;
27
28
use Wikimedia\Assert\Assert;
29
use Wikimedia\Assert\ParameterAssertionException;
30
use \ExplodeIterator;
31
use \IteratorAggregate;
32
use \ReverseArrayIterator;
33
use \Sanitizer;
34
35
# A note for future librarization[1] -- this file is a good candidate
36
# for splitting into an independent library, except that it is currently
37
# highly optimized for MediaWiki use.  It only implements the portions
38
# of the HTML5 tree builder used by tags supported by MediaWiki, and
39
# does not contain a true tokenizer pass, instead relying on
40
# comment stripping, attribute normalization, and escaping done by
41
# the MediaWiki Sanitizer.  It also deliberately avoids building
42
# a true DOM in memory, instead serializing elements to an output string
43
# as soon as possible (usually as soon as the tag is closed) to reduce
44
# its memory footprint.
45
46
# On the other hand, I've been pretty careful to note with comments in the
47
# code the places where this implementation omits features of the spec or
48
# depends on the MediaWiki Sanitizer.  Perhaps in the future we'll want to
49
# implement the missing pieces and make this a standalone PHP HTML5 parser.
50
# In order to do so, some sort of MediaWiki-specific API will need
51
# to be added to (a) allow the Balancer to bypass the tokenizer,
52
# and (b) support on-the-fly flattening instead of DOM node creation.
53
54
# [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
55
56
/**
57
 * Utility constants and sets for the HTML5 tree building algorithm.
58
 * Sets are associative arrays indexed first by namespace and then by
59
 * lower-cased tag name.
60
 *
61
 * @ingroup Parser
62
 * @since 1.27
63
 */
64
class BalanceSets {
65
	const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
66
	const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
67
	const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
68
69
	public static $unsupportedSet = [
70
		self::HTML_NAMESPACE => [
71
			'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
72
			'form' => true, 'frame' => true,
73
			'plaintext' => true, 'isindex' => true, 'textarea' => true,
74
			'xmp' => true, 'iframe' => true, 'noembed' => true,
75
			'noscript' => true, 'select' => true, 'script' => true,
76
			'title' => true
77
		]
78
	];
79
80
	public static $emptyElementSet = [
81
		self::HTML_NAMESPACE => [
82
			'area' => true, 'base' => true, 'basefont' => true,
83
			'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
84
			'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
85
			'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
86
			'param' => true, 'source' => true, 'track' => true, 'wbr' => true
87
		]
88
	];
89
90
	public static $headingSet = [
91
		self::HTML_NAMESPACE => [
92
			'h1' => true, 'h2' => true, 'h3' => true,
93
			'h4' => true, 'h5' => true, 'h6' => true
94
		]
95
	];
96
97
	public static $specialSet = [
98
		self::HTML_NAMESPACE => [
99
			'address' => true, 'applet' => true, 'area' => true,
100
			'article' => true, 'aside' => true, 'base' => true,
101
			'basefont' => true, 'bgsound' => true, 'blockquote' => true,
102
			'body' => true, 'br' => true, 'button' => true, 'caption' => true,
103
			'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
104
			'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
105
			'dt' => true, 'embed' => true, 'fieldset' => true,
106
			'figcaption' => true, 'figure' => true, 'footer' => true,
107
			'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
108
			'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
109
			'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
110
			'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
111
			'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
112
			'listing' => true, 'main' => true, 'marquee' => true,
113
			'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
114
			'noembed' => true, 'noframes' => true, 'noscript' => true,
115
			'object' => true, 'ol' => true, 'p' => true, 'param' => true,
116
			'plaintext' => true, 'pre' => true, 'script' => true,
117
			'section' => true, 'select' => true, 'source' => true,
118
			'style' => true, 'summary' => true, 'table' => true,
119
			'tbody' => true, 'td' => true, 'template' => true,
120
			'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
121
			'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
122
			'wbr' => true, 'xmp' => true
123
		],
124
		self::SVG_NAMESPACE => [
125
			'foreignobject' => true, 'desc' => true, 'title' => true
126
		],
127
		self::MATHML_NAMESPACE => [
128
			'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
129
			'mtext' => true, 'annotation-xml' => true
130
		]
131
	];
132
133
	public static $addressDivPSet = [
134
		self::HTML_NAMESPACE => [
135
			'address' => true, 'div' => true, 'p' => true
136
		]
137
	];
138
139
	public static $tableSectionRowSet = [
140
		self::HTML_NAMESPACE => [
141
			'table' => true, 'thead' => true, 'tbody' => true,
142
			'tfoot' => true, 'tr' => true
143
		]
144
	];
145
146
	public static $impliedEndTagsSet = [
147
		self::HTML_NAMESPACE => [
148
			'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
149
			'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
150
			'rt' => true, 'rtc' => true
151
		]
152
	];
153
154
	public static $thoroughImpliedEndTagsSet = [
155
		self::HTML_NAMESPACE => [
156
			'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
157
			'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
158
			'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
159
			'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
160
			'thead' => true, 'tr' => true
161
		]
162
	];
163
164
	public static $tableCellSet = [
165
		self::HTML_NAMESPACE => [
166
			'td' => true, 'th' => true
167
		]
168
	];
169
	public static $tableContextSet = [
170
		self::HTML_NAMESPACE => [
171
			'table' => true, 'template' => true, 'html' => true
172
		]
173
	];
174
175
	public static $tableBodyContextSet = [
176
		self::HTML_NAMESPACE => [
177
			'tbody' => true, 'tfoot' => true, 'thead' => true,
178
			'template' => true, 'html' => true
179
		]
180
	];
181
182
	public static $tableRowContextSet = [
183
		self::HTML_NAMESPACE => [
184
			'tr' => true, 'template' => true, 'html' => true
185
		]
186
	];
187
188
	# OMITTED: formAssociatedSet, since we don't allow <form>
189
190
	public static $inScopeSet = [
191
		self::HTML_NAMESPACE => [
192
			'applet' => true, 'caption' => true, 'html' => true,
193
			'marquee' => true, 'object' => true,
194
			'table' => true, 'td' => true, 'template' => true,
195
			'th' => true
196
		],
197
		self::SVG_NAMESPACE => [
198
			'foreignobject' => true, 'desc' => true, 'title' => true
199
		],
200
		self::MATHML_NAMESPACE => [
201
			'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
202
			'mtext' => true, 'annotation-xml' => true
203
		]
204
	];
205
206
	private static $inListItemScopeSet = null;
207
	public static function inListItemScopeSet() {
208
		if ( self::$inListItemScopeSet === null ) {
209
			self::$inListItemScopeSet = self::$inScopeSet;
210
			self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
211
			self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
212
		}
213
		return self::$inListItemScopeSet;
214
	}
215
216
	private static $inButtonScopeSet = null;
217
	public static function inButtonScopeSet() {
218
		if ( self::$inButtonScopeSet === null ) {
219
			self::$inButtonScopeSet = self::$inScopeSet;
220
			self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
221
		}
222
		return self::$inButtonScopeSet;
223
	}
224
225
	public static $inTableScopeSet = [
226
		self::HTML_NAMESPACE => [
227
			'html' => true, 'table' => true, 'template' => true
228
		]
229
	];
230
231
	public static $mathmlTextIntegrationPointSet = [
232
		self::MATHML_NAMESPACE => [
233
			'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
234
			'mtext' => true
235
		]
236
	];
237
238
	public static $htmlIntegrationPointSet = [
239
		self::SVG_NAMESPACE => [
240
			'foreignobject' => true,
241
			'desc' => true,
242
			'title' => true
243
		]
244
	];
245
246
	// For tidy compatibility.
247
	public static $tidyPWrapSet = [
248
		self::HTML_NAMESPACE => [
249
			'body' => true, 'blockquote' => true,
250
			// We parse with <body> as the fragment context, but the top-level
251
			// element on the stack is actually <html>.  We could use the
252
			// "adjusted current node" everywhere to work around this, but it's
253
			// easier just to add <html> to the p-wrap set.
254
			'html' => true,
255
		],
256
	];
257
	public static $tidyInlineSet = [
258
		self::HTML_NAMESPACE => [
259
			'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
260
			'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
261
			'br' => true, 'button' => true, 'cite' => true, 'code' => true,
262
			'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
263
			'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
264
			'label' => true, 'legend' => true, 'map' => true, 'object' => true,
265
			'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
266
			'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
267
			's' => true, 'samp' => true, 'select' => true, 'small' => true,
268
			'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
269
			'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
270
			'var' => true,
271
		],
272
	];
273
}
274
275
/**
276
 * A BalanceElement is a simplified version of a DOM Node.  The main
277
 * difference is that we only keep BalanceElements around for nodes
278
 * currently on the BalanceStack of open elements.  As soon as an
279
 * element is closed, with some minor exceptions relating to the
280
 * tree builder "adoption agency algorithm", the element and all its
281
 * children are serialized to a string using the flatten() method.
282
 * This keeps our memory usage low.
283
 *
284
 * @ingroup Parser
285
 * @since 1.27
286
 */
287
class BalanceElement {
288
	/**
289
	 * The namespace of the element.
290
	 * @var string $namespaceURI
291
	 */
292
	public $namespaceURI;
293
	/**
294
	 * The lower-cased name of the element.
295
	 * @var string $localName
296
	 */
297
	public $localName;
298
	/**
299
	 * Attributes for the element, in array form
300
	 * @var array $attribs
301
	 */
302
	public $attribs;
303
304
	/**
305
	 * Parent of this element, or the string "flat" if this element has
306
	 * already been flattened into its parent.
307
	 * @var string|null $parent
308
	 */
309
	public $parent;
310
311
	/**
312
	 * An array of children of this element.  Typically only the last
313
	 * child will be an actual BalanceElement object; the rest will
314
	 * be strings, representing either text nodes or flattened
315
	 * BalanceElement objects.
316
	 * @var array $children
317
	 */
318
	public $children;
319
320
	/**
321
	 * A unique string identifier for Noah's Ark purposes, lazy initialized
322
	 */
323
	private $noahKey;
324
325
	/**
326
	 * The next active formatting element in the list, or null if this is the
327
	 * end of the AFE list or if the element is not in the AFE list.
328
	 */
329
	public $nextAFE;
330
331
	/**
332
	 * The previous active formatting element in the list, or null if this is
333
	 * the start of the list or if the element is not in the AFE list.
334
	 */
335
	public $prevAFE;
336
337
	/**
338
	 * The next element in the Noah's Ark species bucket.
339
	 */
340
	public $nextNoah;
341
342
	/**
343
	 * Make a new BalanceElement corresponding to the HTML DOM Element
344
	 * with the given localname, namespace, and attributes.
345
	 *
346
	 * @param string $namespaceURI The namespace of the element.
347
	 * @param string $localName The lowercased name of the tag.
348
	 * @param array $attribs Attributes of the element
349
	 */
350
	public function __construct( $namespaceURI, $localName, array $attribs ) {
351
		Assert::parameterType( 'string', $namespaceURI, '$namespaceURI' );
352
		Assert::parameterType( 'string', $localName, '$localName' );
353
354
		$this->localName = $localName;
355
		$this->namespaceURI = $namespaceURI;
356
		$this->attribs = $attribs;
357
		$this->contents = '';
0 ignored issues
show
Bug introduced by
The property contents does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
358
		$this->parent = null;
359
		$this->children = [];
360
	}
361
362
	/**
363
	 * Remove the given child from this element.
364
	 * @param BalanceElement $elt
365
	 */
366
	private function removeChild( $elt ) {
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
367
		Assert::precondition(
368
			$this->parent !== 'flat', "Can't removeChild after flattening $this"
369
		);
370
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
371
		Assert::parameter(
372
			$elt->parent === $this, 'elt', 'must have $this as a parent'
373
		);
374
		$idx = array_search( $elt, $this->children, true );
375
		Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
376
		$elt->parent = null;
377
		array_splice( $this->children, $idx, 1 );
378
	}
379
380
	/**
381
	 * Find $a in the list of children and insert $b before it.
382
	 * @param BalanceElement $a
383
	 * @param BalanceElement|string $b
384
	 */
385
	public function insertBefore( $a, $b ) {
386
		Assert::precondition(
387
			$this->parent !== 'flat', "Can't insertBefore after flattening."
388
		);
389
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $a, '$a' );
390
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|string', $b, '$b' );
391
		$idx = array_search( $a, $this->children, true );
392
		Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
393
		if ( is_string( $b ) ) {
394
			array_splice( $this->children, $idx, 0, [ $b ] );
395
		} else {
396
			Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
397
			if ( $b->parent !== null ) {
398
				$b->parent->removeChild( $b );
0 ignored issues
show
Bug introduced by
The method removeChild cannot be called on $b->parent (of type string).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
399
			}
400
			array_splice( $this->children, $idx, 0, [ $b ] );
401
			$b->parent = $this;
0 ignored issues
show
Documentation Bug introduced by
It seems like $this of type this<MediaWiki\Tidy\BalanceElement> is incompatible with the declared type string|null of property $parent.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
402
		}
403
	}
404
405
	/**
406
	 * Append $elt to the end of the list of children.
407
	 * @param BalanceElement|string $elt
408
	 */
409
	public function appendChild( $elt ) {
410
		Assert::precondition(
411
			$this->parent !== 'flat', "Can't appendChild after flattening."
412
		);
413
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|string', $elt, '$elt' );
414
		if ( is_string( $elt ) ) {
415
			array_push( $this->children, $elt );
416
			return;
417
		}
418
		// Remove $elt from parent, if it had one.
419
		if ( $elt->parent !== null ) {
420
			$elt->parent->removeChild( $elt );
0 ignored issues
show
Bug introduced by
The method removeChild cannot be called on $elt->parent (of type string).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
421
		}
422
		array_push( $this->children, $elt );
423
		$elt->parent = $this;
0 ignored issues
show
Documentation Bug introduced by
It seems like $this of type this<MediaWiki\Tidy\BalanceElement> is incompatible with the declared type string|null of property $parent.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
424
	}
425
426
	/**
427
	 * Transfer all of the children of $elt to $this.
428
	 * @param BalanceElement $elt
429
	 */
430
	public function adoptChildren( $elt ) {
431
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
432
		Assert::precondition(
433
			$elt->parent !== 'flat', "Can't adoptChildren after flattening."
434
		);
435
		foreach ( $elt->children as $child ) {
436
			if ( !is_string( $child ) ) {
437
				// This is an optimization which avoids an O(n^2) set of
438
				// array_splice operations.
439
				$child->parent = null;
440
			}
441
			$this->appendChild( $child );
442
		}
443
		$elt->children = [];
444
	}
445
446
	/**
447
	 * Flatten this node and all of its children into a string, as specified
448
	 * by the HTML serialization specification, and replace this node
449
	 * in its parent by that string.
450
	 *
451
	 * @see __toString()
452
	 */
453
	public function flatten( $tidyCompat = false ) {
454
		Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
455
		Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
456
		$idx = array_search( $this, $this->parent->children, true );
457
		Assert::parameter(
458
			$idx !== false, '$this', 'must be a child of its parent'
459
		);
460
		if ( $tidyCompat ) {
461
			$blank = true;
462
			foreach ( $this->children as $elt ) {
463
				if ( !is_string( $elt ) ) {
464
					$elt = $elt->flatten( $tidyCompat );
465
				}
466
				if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
467
					$blank = false;
468
				}
469
			}
470
			if ( $this->isA( 'mw:p-wrap' ) ) {
471
				$this->localName = 'p';
472
			} elseif ( $blank ) {
473
				// Add 'mw-empty-elt' class so elements can be hidden via CSS
474
				// for compatibility with legacy tidy.
475
				if ( !count( $this->attribs ) &&
476
					( $this->localName === 'tr' || $this->localName === 'li' )
477
				) {
478
					$this->attribs = [ 'class' => "mw-empty-elt" ];
479
				}
480
				$blank = false;
481
			}
482
			$flat = $blank ? '' : "{$this}";
483
		} else {
484
			$flat = "{$this}";
485
		}
486
		$this->parent->children[$idx] = $flat;
487
		$this->parent = 'flat'; # for assertion checking
488
		return $flat;
489
	}
490
491
	/**
492
	 * Serialize this node and all of its children to a string, as specified
493
	 * by the HTML serialization specification.
494
	 *
495
	 * @return string The serialization of the BalanceElement
496
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#serialising-html-fragments
497
	 */
498
	public function __toString() {
499
		$encAttribs = '';
500
		foreach ( $this->attribs as $name => $value ) {
501
			$encValue = Sanitizer::encodeAttribute( $value );
502
			$encAttribs .= " $name=\"$encValue\"";
503
		}
504
		if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
505
			$out = "<{$this->localName}{$encAttribs}>";
506
			// flatten children
507
			foreach ( $this->children as $elt ) {
508
				$out .= "{$elt}";
509
			}
510
			$out .= "</{$this->localName}>";
511
		} else {
512
			$out = "<{$this->localName}{$encAttribs} />";
513
			Assert::invariant(
514
				count( $this->children ) === 0,
515
				"Empty elements shouldn't have children."
516
			);
517
		}
518
		return $out;
519
	}
520
521
	# Utility functions on BalanceElements.
522
523
	/**
524
	 * Determine if $this represents a specific HTML tag, is a member of
525
	 * a tag set, or is equal to another BalanceElement.
526
	 *
527
	 * @param BalanceElement|array|string $set The target BalanceElement,
528
	 *   set (from the BalanceSets class), or string (HTML tag name).
529
	 * @return bool
530
	 */
531
	public function isA( $set ) {
532
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|array|string', $set, '$set' );
533
		if ( $set instanceof BalanceElement ) {
534
			return $this === $set;
535
		} elseif ( is_array( $set ) ) {
536
			return isset( $set[$this->namespaceURI] ) &&
537
				isset( $set[$this->namespaceURI][$this->localName] );
538
		} else {
539
			# assume this is an HTML element name.
540
			return $this->isHtml() && $this->localName === $set;
541
		}
542
	}
543
544
	/**
545
	 * Determine if $this represents an element in the HTML namespace.
546
	 *
547
	 * @return bool
548
	 */
549
	public function isHtml() {
550
		return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
551
	}
552
553
	/**
554
	 * Determine if $this represents a MathML text integration point,
555
	 * as defined in the HTML5 specification.
556
	 *
557
	 * @return bool
558
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#mathml-text-integration-point
559
	 */
560
	public function isMathmlTextIntegrationPoint() {
561
		return $this->isA( BalanceSets::$mathmlTextIntegrationPointSet );
562
	}
563
564
	/**
565
	 * Determine if $this represents an HTML integration point,
566
	 * as defined in the HTML5 specification.
567
	 *
568
	 * @return bool
569
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#html-integration-point
570
	 */
571
	public function isHtmlIntegrationPoint() {
572
		if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
573
			return true;
574
		}
575
		if (
576
			$this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
577
			$this->localName === 'annotation-xml' &&
578
			isset( $this->attribs['encoding'] ) &&
579
			( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
580
			strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
581
		) {
582
			return true;
583
		}
584
		return false;
585
	}
586
587
	/**
588
	 * Get a string key for the Noah's Ark algorithm
589
	 */
590
	public function getNoahKey() {
591
		if ( $this->noahKey === null ) {
592
			$attribs = $this->attribs;
593
			ksort( $attribs );
594
			$this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
595
		}
596
		return $this->noahKey;
597
	}
598
}
599
600
/**
601
 * The "stack of open elements" as defined in the HTML5 tree builder
602
 * spec.  This contains methods to ensure that content (start tags, text)
603
 * are inserted at the correct place in the output string, and to
604
 * flatten BalanceElements are they are closed to avoid holding onto
605
 * a complete DOM tree for the document in memory.
606
 *
607
 * The stack defines a PHP iterator to traverse it in "reverse order",
608
 * that is, the most-recently-added element is visited first in a
609
 * foreach loop.
610
 *
611
 * @ingroup Parser
612
 * @since 1.27
613
 * @see https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
614
 */
615
class BalanceStack implements IteratorAggregate {
616
	/**
617
	 * Backing storage for the stack.
618
	 * @var array $elements
619
	 */
620
	private $elements = [];
621
	/**
622
	 * Foster parent mode determines how nodes are inserted into the
623
	 * stack.
624
	 * @var bool $fosterParentMode
625
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
626
	 */
627
	public $fosterParentMode = false;
628
	/**
629
	 * Tidy compatibility mode, determines behavior of body/blockquote
630
	 */
631
	public $tidyCompat = false;
632
633
	/**
634
	 * Create a new BalanceStack with a single BalanceElement on it,
635
	 * representing the root &lt;html&gt; node.
636
	 */
637
	public function __construct() {
638
		# always a root <html> element on the stack
639
		array_push(
640
			$this->elements,
641
			new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
642
		);
643
	}
644
645
	/**
646
	 * Return a string representing the output of the tree builder:
647
	 * all the children of the root &lt;html&gt; node.
648
	 * @return string
649
	 */
650
	public function getOutput() {
651
		// Don't include the outer '<html>....</html>'
652
		$out = '';
653
		foreach ( $this->elements[0]->children as $elt ) {
654
			$out .= is_string( $elt ) ? $elt :
655
				$elt->flatten( $this->tidyCompat );
656
		}
657
		return $out;
658
	}
659
660
	/**
661
	 * Insert text at the appropriate place for inserting a node.
662
	 * @param string $value
663
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
664
	 */
665
	public function insertText( $value ) {
666
		Assert::parameterType( 'string', $value, '$value' );
667
		if (
668
			$this->fosterParentMode &&
669
			$this->currentNode()->isA( BalanceSets::$tableSectionRowSet )
670
		) {
671
			$this->fosterParent( $value );
672
		} elseif (
673
			$this->tidyCompat &&
674
			$this->currentNode()->isA( BalanceSets::$tidyPWrapSet )
675
		) {
676
			$this->insertHTMLELement( 'mw:p-wrap', [] );
677
			return $this->insertText( $value );
678
		} else {
679
			$this->currentNode()->appendChild( $value );
680
		}
681
	}
682
683
	/**
684
	 * Insert a BalanceElement at the appropriate place, pushing it
685
	 * on to the open elements stack.
686
	 * @param string $namespaceURI The element namespace
687
	 * @param string $tag The tag name
688
	 * @param string $attribs Normalized attributes, as a string.
689
	 * @return BalanceElement
690
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-foreign-element
691
	 */
692
	public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
693
		return $this->insertElement(
694
			new BalanceElement( $namespaceURI, $tag, $attribs )
695
		);
696
	}
697
698
	/**
699
	 * Insert an HTML element at the appropriate place, pushing it on to
700
	 * the open elements stack.
701
	 * @param string $tag The tag name
702
	 * @param string $attribs Normalized attributes, as a string.
703
	 * @return BalanceElement
704
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-an-html-element
705
	 */
706
	public function insertHTMLElement( $tag, $attribs ) {
707
		return $this->insertForeignElement(
708
			BalanceSets::HTML_NAMESPACE, $tag, $attribs
709
		);
710
	}
711
712
	/**
713
	 * Insert an element at the appropriate place and push it on to the
714
	 * open elements stack.
715
	 * @param BalanceElement $elt
716
	 * @return BalanceElement
717
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
718
	 */
719
	public function insertElement( $elt ) {
720
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
721
		if (
722
			$this->currentNode()->isA( 'mw:p-wrap' ) &&
723
			!$elt->isA( BalanceSets::$tidyInlineSet )
724
		) {
725
			// Tidy compatibility.
726
			$this->pop();
727
		}
728
		if (
729
			$this->fosterParentMode &&
730
			$this->currentNode()->isA( BalanceSets::$tableSectionRowSet )
731
		) {
732
			$elt = $this->fosterParent( $elt );
733
		} else {
734
			$this->currentNode()->appendChild( $elt );
735
		}
736
		Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
737
		Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
738
		array_push( $this->elements, $elt );
739
		return $elt;
740
	}
741
742
	/**
743
	 * Determine if the stack has $tag in scope.
744
	 * @param BalanceElement|array|string $tag
745
	 * @return bool
746
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-scope
747
	 */
748
	public function inScope( $tag ) {
749
		return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
750
	}
751
752
	/**
753
	 * Determine if the stack has $tag in button scope.
754
	 * @param BalanceElement|array|string $tag
755
	 * @return bool
756
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-button-scope
757
	 */
758
	public function inButtonScope( $tag ) {
759
		return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
760
	}
761
762
	/**
763
	 * Determine if the stack has $tag in list item scope.
764
	 * @param BalanceElement|array|string $tag
765
	 * @return bool
766
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-list-item-scope
767
	 */
768
	public function inListItemScope( $tag ) {
769
		return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
770
	}
771
772
	/**
773
	 * Determine if the stack has $tag in table scope.
774
	 * @param BalanceElement|array|string $tag
775
	 * @return bool
776
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-table-scope
777
	 */
778
	public function inTableScope( $tag ) {
779
		return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
780
	}
781
782
	/**
783
	 * Determine if the stack has $tag in a specific scope, $set.
784
	 * @param BalanceElement|array|string $tag
785
	 * @param BalanceElement|array|string $set
786
	 * @return bool
787
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-the-specific-scope
788
	 */
789
	public function inSpecificScope( $tag, $set ) {
790
		foreach ( $this as $elt ) {
791
			if ( $elt->isA( $tag ) ) {
792
				return true;
793
			}
794
			if ( $elt->isA( $set ) ) {
795
				return false;
796
			}
797
		}
798
		return false;
799
	}
800
801
	/**
802
	 * Generate implied end tags.
803
	 * @param BalanceElement|array|string|null $butnot
804
	 * @param bool $thorough True if we should generate end tags thoroughly.
805
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#generate-implied-end-tags
806
	 */
807
	public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
808
		$endTagSet = $thorough ?
809
			BalanceSets::$thoroughImpliedEndTagsSet :
810
			BalanceSets::$impliedEndTagsSet;
811
		while ( $this->length() > 0 ) {
812
			if ( $butnot !== null && $this->currentNode()->isA( $butnot ) ) {
813
				break;
814
			}
815
			if ( !$this->currentNode()->isA( $endTagSet ) ) {
816
				break;
817
			}
818
			$this->pop();
819
		}
820
	}
821
822
	/**
823
	 * Return the current node (the element in the stack with the largest
824
	 * index).
825
	 * @return BalanceElement
826
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#current-node
827
	 */
828
	public function currentNode() {
829
		return $this->node( count( $this->elements ) - 1 );
830
	}
831
832
	/**
833
	 * Return the adjusted current node.
834
	 */
835
	public function adjustedCurrentNode( $fragmentContext ) {
836
		return ( $fragmentContext && $this->length() === 1 ) ?
837
			$fragmentContext : $this->currentNode();
838
	}
839
840
	/**
841
	 * Return an iterator over this stack which visits the current node
842
	 * first, and the root node last.
843
	 * @return Iterator
844
	 */
845
	public function getIterator() {
846
		return new ReverseArrayIterator( $this->elements );
847
	}
848
849
	/**
850
	 * Return the BalanceElement at the given position $idx, where
851
	 * position 0 represents the root element.
852
	 * @param int $idx
853
	 * @return BalanceElement
854
	 */
855
	public function node( $idx ) {
856
		return $this->elements[ $idx ];
857
	}
858
859
	/**
860
	 * Replace the element at position $idx in the BalanceStack with $elt.
861
	 * @param int $idx
862
	 * @param BalanceElement $elt
863
	 */
864
	public function replaceAt( $idx, $elt ) {
865
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
866
		Assert::precondition(
867
			$this->elements[$idx]->parent !== 'flat',
868
			'Replaced element should not have already been flattened.'
869
		);
870
		Assert::precondition(
871
			$elt->parent !== 'flat',
872
			'New element should not have already been flattened.'
873
		);
874
		$this->elements[$idx] = $elt;
875
	}
876
877
	/**
878
	 * Return the position of the given BalanceElement, set, or
879
	 * HTML tag name string in the BalanceStack.
880
	 * @param BalanceElement|array|string $tag
881
	 * @return int
882
	 */
883
	public function indexOf( $tag ) {
884
		for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
885
			if ( $this->elements[$i]->isA( $tag ) ) {
886
				return $i;
887
			}
888
		}
889
		return -1;
890
	}
891
892
	/**
893
	 * Return the number of elements currently in the BalanceStack.
894
	 * @return int
895
	 */
896
	public function length() {
897
		return count( $this->elements );
898
	}
899
900
	/**
901
	 * Remove the current node from the BalanceStack, flattening it
902
	 * in the process.
903
	 */
904
	public function pop() {
905
		$elt = array_pop( $this->elements );
906
		if ( !$elt->isA( 'mw:p-wrap' ) ) {
907
			$elt->flatten( $this->tidyCompat );
908
		}
909
	}
910
911
	/**
912
	 * Remove all nodes up to and including position $idx from the
913
	 * BalanceStack, flattening them in the process.
914
	 * @param int $idx
915
	 */
916
	public function popTo( $idx ) {
917
		while ( $this->length() > $idx ) {
918
			$this->pop();
919
		}
920
	}
921
922
	/**
923
	 * Pop elements off the stack up to and including the first
924
	 * element with the specified HTML tagname (or matching the given
925
	 * set).
926
	 * @param BalanceElement|array|string $tag
927
	 */
928
	public function popTag( $tag ) {
929
		while ( $this->length() > 0 ) {
930
			if ( $this->currentNode()->isA( $tag ) ) {
931
				$this->pop();
932
				break;
933
			}
934
			$this->pop();
935
		}
936
	}
937
938
	/**
939
	 * Pop elements off the stack *not including* the first element
940
	 * in the specified set.
941
	 * @param BalanceElement|array|string $set
942
	 */
943
	public function clearToContext( $set ) {
944
		// Note that we don't loop to 0. Never pop the <html> elt off.
945
		while ( $this->length() > 1 ) {
946
			if ( $this->currentNode()->isA( $set ) ) {
947
				break;
948
			}
949
			$this->pop();
950
		}
951
	}
952
953
	/**
954
	 * Remove the given $elt from the BalanceStack, optionally
955
	 * flattening it in the process.
956
	 * @param BalanceElement $elt The element to remove.
957
	 * @param bool $flatten Whether to flatten the removed element.
958
	 */
959
	public function removeElement( $elt, $flatten = true ) {
960
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $elt, '$elt' );
961
		Assert::parameter(
962
			$elt->parent !== 'flat',
963
			'$elt',
964
			'$elt should not already have been flattened.'
965
		);
966
		Assert::parameter(
967
			$elt->parent->parent !== 'flat',
968
			'$elt',
969
			'The parent of $elt should not already have been flattened.'
970
		);
971
		$idx = array_search( $elt, $this->elements, true );
972
		Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
973
		array_splice( $this->elements, $idx, 1 );
974
		if ( $flatten ) {
975
			// serialize $elt into its parent
976
			// otherwise, it will eventually serialize when the parent
977
			// is serialized, we just hold onto the memory for its
978
			// tree of objects a little longer.
979
			$elt->flatten( $this->tidyCompat );
980
		}
981
		Assert::postcondition(
982
			array_search( $elt, $this->elements, true ) === false,
983
			'$elt should no longer be in open elements stack'
984
		);
985
	}
986
987
	/**
988
	 * Find $a in the BalanceStack and insert $b after it.
989
	 * @param BalanceElement $a
990
	 * @param BalanceElement $b
991
	 */
992
	public function insertAfter( $a, $b ) {
993
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $a, '$a' );
994
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement', $b, '$b' );
995
		$idx = $this->indexOf( $a );
996
		Assert::parameter( $idx !== false, '$a', 'must be in stack' );
997
		array_splice( $this->elements, $idx + 1, 0, [ $b ] );
998
	}
999
1000
	# Fostering and adoption.
1001
1002
	/**
1003
	 * Foster parent the given $elt in the stack of open elements.
1004
	 * @param BalanceElement|string $elt
1005
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
1006
	 */
1007
	private function fosterParent( $elt ) {
1008
		Assert::parameterType( 'MediaWiki\Tidy\BalanceElement|string', $elt, '$elt' );
1009
		$lastTable = $this->indexOf( 'table' );
1010
		$lastTemplate = $this->indexOf( 'template' );
1011
		$parent = null;
0 ignored issues
show
Unused Code introduced by
$parent is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1012
		$before = null;
1013
1014
		if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1015
			$parent = $this->elements[$lastTemplate];
1016
		} elseif ( $lastTable >= 0 ) {
1017
			$parent = $this->elements[$lastTable]->parent;
1018
			# Assume all tables have parents, since we're not running scripts!
1019
			Assert::invariant(
1020
				$parent !== null, "All tables should have parents"
1021
			);
1022
			$before = $this->elements[$lastTable];
1023
		} else {
1024
			$parent = $this->elements[0]; // the `html` element.
1025
		}
1026
1027
		if ( $this->tidyCompat ) {
1028
			if ( is_string( $elt ) ) {
1029
				// We're fostering text: do we need a p-wrapper?
1030
				if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1031
					$this->insertHTMLElement( 'mw:p-wrap', [] );
1032
					$this->insertText( $elt );
1033
					return $elt;
1034
				}
1035
			} else {
1036
				// We're fostering an element; do we need to merge p-wrappers?
1037
				if ( $elt->isA( 'mw:p-wrap' ) ) {
1038
					$idx = $before ?
1039
						array_search( $before, $parent->children, true ) :
1040
						count( $parent->children );
1041
					$after = $idx > 0 ? $parent->children[$idx - 1] : '';
1042
					if (
1043
						$after instanceof BalanceElement &&
1044
						$after->isA( 'mw:p-wrap' )
1045
					) {
1046
						return $after; // Re-use existing p-wrapper.
1047
					}
1048
				}
1049
			}
1050
		}
1051
1052
		if ( $before ) {
1053
			$parent->insertBefore( $before, $elt );
1054
		} else {
1055
			$parent->appendChild( $elt );
1056
		}
1057
		return $elt;
1058
	}
1059
1060
	/**
1061
	 * Run the "adoption agency algoritm" (AAA) for the given subject
1062
	 * tag name.
1063
	 * @param string $tag The subject tag name.
1064
	 * @param BalanceActiveFormattingElements $afe The current
1065
	 *   active formatting elements list.
1066
	 * @return true if the adoption agency algorithm "did something", false
1067
	 *   if more processing is required by the caller.
1068
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1069
	 */
1070
	public function adoptionAgency( $tag, $afe ) {
1071
		// If the current node is an HTML element whose tag name is subject,
1072
		// and the current node is not in the list of active formatting
1073
		// elements, then pop the current node off the stack of open
1074
		// elements and abort these steps.
1075
		if (
1076
			$this->currentNode()->isA( $tag ) &&
1077
			!$afe->isInList( $this->currentNode() )
1078
		) {
1079
			$this->pop();
1080
			return true; // no more handling required
1081
		}
1082
1083
		// Let outer loop counter be zero.
1084
		$outer = 0;
1085
1086
		// Outer loop: If outer loop counter is greater than or
1087
		// equal to eight, then abort these steps.
1088
		while ( $outer < 8 ) {
1089
			// Increment outer loop counter by one.
1090
			$outer++;
1091
1092
			// Let the formatting element be the last element in the list
1093
			// of active formatting elements that: is between the end of
1094
			// the list and the last scope marker in the list, if any, or
1095
			// the start of the list otherwise, and has the same tag name
1096
			// as the token.
1097
			$fmtelt = $afe->findElementByTag( $tag );
1098
1099
			// If there is no such node, then abort these steps and instead
1100
			// act as described in the "any other end tag" entry below.
1101
			if ( !$fmtelt ) {
1102
				return false; // false means handle by the default case
1103
			}
1104
1105
			// Otherwise, if there is such a node, but that node is not in
1106
			// the stack of open elements, then this is a parse error;
1107
			// remove the element from the list, and abort these steps.
1108
			$index = $this->indexOf( $fmtelt );
1109
			if ( $index < 0 ) {
1110
				$afe->remove( $fmtelt );
1111
				return true;   // true means no more handling required
1112
			}
1113
1114
			// Otherwise, if there is such a node, and that node is also in
1115
			// the stack of open elements, but the element is not in scope,
1116
			// then this is a parse error; ignore the token, and abort
1117
			// these steps.
1118
			if ( !$this->inScope( $fmtelt ) ) {
1119
				return true;
1120
			}
1121
1122
			// Let the furthest block be the topmost node in the stack of
1123
			// open elements that is lower in the stack than the formatting
1124
			// element, and is an element in the special category. There
1125
			// might not be one.
1126
			$furthestblock = null;
1127
			$furthestblockindex = -1;
1128
			$stacklen = $this->length();
1129
			for ( $i = $index+1; $i < $stacklen; $i++ ) {
1130
				if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1131
					$furthestblock = $this->node( $i );
1132
					$furthestblockindex = $i;
1133
					break;
1134
				}
1135
			}
1136
1137
			// If there is no furthest block, then the UA must skip the
1138
			// subsequent steps and instead just pop all the nodes from the
1139
			// bottom of the stack of open elements, from the current node
1140
			// up to and including the formatting element, and remove the
1141
			// formatting element from the list of active formatting
1142
			// elements.
1143
			if ( !$furthestblock ) {
1144
				$this->popTag( $fmtelt );
1145
				$afe->remove( $fmtelt );
1146
				return true;
1147
			} else {
1148
				// Let the common ancestor be the element immediately above
1149
				// the formatting element in the stack of open elements.
1150
				$ancestor = $this->node( $index-1 );
1151
1152
				// Let a bookmark note the position of the formatting
1153
				// element in the list of active formatting elements
1154
				// relative to the elements on either side of it in the
1155
				// list.
1156
				$BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1157
				$afe->insertAfter( $fmtelt, $BOOKMARK );
1158
1159
				// Let node and last node be the furthest block.
1160
				$node = $furthestblock;
0 ignored issues
show
Unused Code introduced by
$node is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1161
				$lastnode = $furthestblock;
1162
				$nodeindex = $furthestblockindex;
1163
				$isAFE = false;
0 ignored issues
show
Unused Code introduced by
$isAFE is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1164
1165
				// Let inner loop counter be zero.
1166
				$inner = 0;
1167
1168
				while ( true ) {
1169
1170
					// Increment inner loop counter by one.
1171
					$inner++;
1172
1173
					// Let node be the element immediately above node in
1174
					// the stack of open elements, or if node is no longer
1175
					// in the stack of open elements (e.g. because it got
1176
					// removed by this algorithm), the element that was
1177
					// immediately above node in the stack of open elements
1178
					// before node was removed.
1179
					$node = $this->node( --$nodeindex );
1180
1181
					// If node is the formatting element, then go
1182
					// to the next step in the overall algorithm.
1183
					if ( $node === $fmtelt ) break;
1184
1185
					// If the inner loop counter is greater than three and node
1186
					// is in the list of active formatting elements, then remove
1187
					// node from the list of active formatting elements.
1188
					$isAFE = $afe->isInList( $node );
1189
					if ( $inner > 3 && $isAFE ) {
1190
						$afe->remove( $node );
1191
						$isAFE = false;
1192
					}
1193
1194
					// If node is not in the list of active formatting
1195
					// elements, then remove node from the stack of open
1196
					// elements and then go back to the step labeled inner
1197
					// loop.
1198
					if ( !$isAFE ) {
1199
						// Don't flatten here, since we're about to relocate
1200
						// parts of this $node.
1201
						$this->removeElement( $node, false );
1202
						continue;
1203
					}
1204
1205
					// Create an element for the token for which the
1206
					// element node was created with common ancestor as
1207
					// the intended parent, replace the entry for node
1208
					// in the list of active formatting elements with an
1209
					// entry for the new element, replace the entry for
1210
					// node in the stack of open elements with an entry for
1211
					// the new element, and let node be the new element.
1212
					$newelt = new BalanceElement(
1213
						$node->namespaceURI, $node->localName, $node->attribs );
1214
					$afe->replace( $node, $newelt );
1215
					$this->replaceAt( $nodeindex, $newelt );
1216
					$node = $newelt;
1217
1218
					// If last node is the furthest block, then move the
1219
					// aforementioned bookmark to be immediately after the
1220
					// new node in the list of active formatting elements.
1221
					if ( $lastnode === $furthestblock ) {
1222
						$afe->remove( $BOOKMARK );
1223
						$afe->insertAfter( $newelt, $BOOKMARK );
1224
					}
1225
1226
					// Insert last node into node, first removing it from
1227
					// its previous parent node if any.
1228
					$node->appendChild( $lastnode );
1229
1230
					// Let last node be node.
1231
					$lastnode = $node;
1232
				}
1233
1234
				// If the common ancestor node is a table, tbody, tfoot,
1235
				// thead, or tr element, then, foster parent whatever last
1236
				// node ended up being in the previous step, first removing
1237
				// it from its previous parent node if any.
1238
				if (
1239
					$this->fosterParentMode &&
1240
					$ancestor->isA( BalanceSets::$tableSectionRowSet )
1241
				) {
1242
					$this->fosterParent( $lastnode );
1243
				} else {
1244
					// Otherwise, append whatever last node ended up being in
1245
					// the previous step to the common ancestor node, first
1246
					// removing it from its previous parent node if any.
1247
					$ancestor->appendChild( $lastnode );
1248
				}
1249
1250
				// Create an element for the token for which the
1251
				// formatting element was created, with furthest block
1252
				// as the intended parent.
1253
				$newelt2 = new BalanceElement(
1254
					$fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
1255
1256
				// Take all of the child nodes of the furthest block and
1257
				// append them to the element created in the last step.
1258
				$newelt2->adoptChildren( $furthestblock );
1259
1260
				// Append that new element to the furthest block.
1261
				$furthestblock->appendChild( $newelt2 );
1262
1263
				// Remove the formatting element from the list of active
1264
				// formatting elements, and insert the new element into the
1265
				// list of active formatting elements at the position of
1266
				// the aforementioned bookmark.
1267
				$afe->remove( $fmtelt );
1268
				$afe->replace( $BOOKMARK, $newelt2 );
1269
1270
				// Remove the formatting element from the stack of open
1271
				// elements, and insert the new element into the stack of
1272
				// open elements immediately below the position of the
1273
				// furthest block in that stack.
1274
				$this->removeElement( $fmtelt );
1275
				$this->insertAfter( $furthestblock, $newelt2 );
1276
			}
1277
		}
1278
1279
		return true;
1280
	}
1281
1282
	/**
1283
	 * Return the contents of the open elements stack as a string for
1284
	 * debugging.
1285
	 * @return string
1286
	 */
1287
	public function __toString() {
1288
		$r = [];
1289
		foreach ( $this->elements as $elt ) {
1290
			array_push( $r, $elt->localName );
1291
		}
1292
		return implode( $r, ' ' );
1293
	}
1294
}
1295
1296
/**
1297
 * A pseudo-element used as a marker in the list of active formatting elements
1298
 *
1299
 * @ingroup Parser
1300
 * @since 1.27
1301
 */
1302
class BalanceMarker {
1303
	public $nextAFE;
1304
	public $prevAFE;
1305
}
1306
1307
/**
1308
 * The list of active formatting elements, which is used to handle
1309
 * mis-nested formatting element tags in the HTML5 tree builder
1310
 * specification.
1311
 *
1312
 * @ingroup Parser
1313
 * @since 1.27
1314
 * @see https://html.spec.whatwg.org/multipage/syntax.html#list-of-active-formatting-elements
1315
 */
1316
class BalanceActiveFormattingElements {
1317
	/** The last (most recent) element in the list */
1318
	private $tail;
1319
1320
	/** The first (least recent) element in the list */
1321
	private $head;
1322
1323
	/**
1324
	 * An array of arrays representing the population of elements in each bucket
1325
	 * according to the Noah's Ark clause. The outer array is stack-like, with each
1326
	 * integer-indexed element representing a segment of the list, bounded by
1327
	 * markers. The first element represents the segment of the list before the
1328
	 * first marker.
1329
	 *
1330
	 * The inner arrays are indexed by "Noah key", which is a string which uniquely
1331
	 * identifies each bucket according to the rules in the spec. The value in
1332
	 * the inner array is the first (least recently inserted) element in the bucket,
1333
	 * and subsequent members of the bucket can be found by iterating through the
1334
	 * singly-linked list via $node->nextNoah.
1335
	 *
1336
	 * This is optimised for the most common case of inserting into a bucket
1337
	 * with zero members, and deleting a bucket containing one member. In the
1338
	 * worst case, iteration through the list is still O(1) in the document
1339
	 * size, since each bucket can have at most 3 members.
1340
	 */
1341
	private $noahTableStack = [ [] ];
1342
1343
	public function __destruct() {
1344
		for ( $node = $this->head; $node; $node = $next ) {
1345
			$next = $node->nextAFE;
1346
			$node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1347
		}
1348
		$this->head = $this->tail = $this->noahTableStack = null;
0 ignored issues
show
Documentation Bug introduced by
It seems like null of type null is incompatible with the declared type array of property $noahTableStack.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
1349
	}
1350
1351
	public function insertMarker() {
1352
		$elt = new BalanceMarker;
1353 View Code Duplication
		if ( $this->tail ) {
1354
			$this->tail->nextAFE = $elt;
1355
			$elt->prevAFE = $this->tail;
1356
		} else {
1357
			$this->head = $elt;
1358
		}
1359
		$this->tail = $elt;
1360
		$this->noahTableStack[] = [];
1361
	}
1362
1363
	/**
1364
	 * Follow the steps required when the spec requires us to "push onto the
1365
	 * list of active formatting elements".
1366
	 * @param BalanceElement $elt
1367
	 */
1368
	public function push( BalanceElement $elt ) {
1369
		// Must not be in the list already
1370
		if ( $elt->prevAFE !== null || $this->head === $elt ) {
1371
			throw new ParameterAssertionException( '$elt',
1372
				'Cannot insert a node into the AFE list twice' );
1373
		}
1374
1375
		// "Noah's Ark clause" -- if there are already three copies of
1376
		// this element before we encounter a marker, then drop the last
1377
		// one.
1378
		$noahKey = $elt->getNoahKey();
1379
		$table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1380
		if ( !isset( $table[$noahKey] ) ) {
1381
			$table[$noahKey] = $elt;
1382
		} else {
1383
			$count = 1;
1384
			$head = $tail = $table[$noahKey];
1385
			while ( $tail->nextNoah ) {
1386
				$tail = $tail->nextNoah;
1387
				$count++;
1388
			}
1389
			if ( $count >= 3 ) {
1390
				$this->remove( $head );
1391
			}
1392
			$tail->nextNoah = $elt;
1393
		}
1394
		// Add to the main AFE list
1395 View Code Duplication
		if ( $this->tail ) {
1396
			$this->tail->nextAFE = $elt;
1397
			$elt->prevAFE = $this->tail;
1398
		} else {
1399
			$this->head = $elt;
1400
		}
1401
		$this->tail = $elt;
1402
	}
1403
1404
	/**
1405
	 * Follow the steps required when the spec asks us to "clear the list of
1406
	 * active formatting elements up to the last marker".
1407
	 */
1408
	public function clearToMarker() {
1409
		// Iterate back through the list starting from the tail
1410
		$tail = $this->tail;
1411
		while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1412
			// Unlink the element
1413
			$prev = $tail->prevAFE;
1414
			$tail->prevAFE = null;
1415
			if ( $prev ) {
1416
				$prev->nextAFE = null;
1417
			}
1418
			$tail->nextNoah = null;
1419
			$tail = $prev;
1420
		}
1421
		// If we finished on a marker, unlink it and pop it off the Noah table stack
1422
		if ( $tail ) {
1423
			$prev = $tail->prevAFE;
1424
			if ( $prev ) {
1425
				$prev->nextAFE = null;
1426
			}
1427
			$tail = $prev;
1428
			array_pop( $this->noahTableStack );
1429
		} else {
1430
			// No marker: wipe the top-level Noah table (which is the only one)
1431
			$this->noahTableStack[0] = [];
1432
		}
1433
		// If we removed all the elements, clear the head pointer
1434
		if ( !$tail ) {
1435
			$this->head = null;
1436
		}
1437
		$this->tail = $tail;
1438
	}
1439
1440
	/**
1441
	 * Find and return the last element with the specified tag between the
1442
	 * end of the list and the last marker on the list.
1443
	 * Used when parsing &lt;a&gt; "in body mode".
1444
	 */
1445
	public function findElementByTag( $tag ) {
1446
		$elt = $this->tail;
1447
		while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1448
			if ( $elt->localName === $tag ) {
1449
				return $elt;
1450
			}
1451
			$elt = $elt->prevAFE;
1452
		}
1453
		return null;
1454
	}
1455
1456
	/**
1457
	 * Determine whether an element is in the list of formatting elements.
1458
	 * @return boolean
1459
	 */
1460
	public function isInList( BalanceElement $elt ) {
1461
		return $this->head === $elt || $elt->prevAFE;
1462
	}
1463
1464
	/**
1465
	 * Find the element $elt in the list and remove it.
1466
	 * Used when parsing &lt;a&gt; in body mode.
1467
	 */
1468
	public function remove( BalanceElement $elt ) {
1469
		if ( $this->head !== $elt && !$elt->prevAFE ) {
1470
			throw new ParameterAssertionException( '$elt',
1471
				"Attempted to remove an element which is not in the AFE list" );
1472
		}
1473
		// Update head and tail pointers
1474
		if ( $this->head === $elt ) {
1475
			$this->head = $elt->nextAFE;
1476
		}
1477
		if ( $this->tail === $elt ) {
1478
			$this->tail = $elt->prevAFE;
1479
		}
1480
		// Update previous element
1481
		if ( $elt->prevAFE ) {
1482
			$elt->prevAFE->nextAFE = $elt->nextAFE;
1483
		}
1484
		// Update next element
1485
		if ( $elt->nextAFE ) {
1486
			$elt->nextAFE->prevAFE = $elt->prevAFE;
1487
		}
1488
		// Clear pointers so that isInList() etc. will work
1489
		$elt->prevAFE = $elt->nextAFE = null;
1490
		// Update Noah list
1491
		$this->removeFromNoahList( $elt );
1492
	}
1493
1494
	private function addToNoahList( BalanceElement $elt ) {
1495
		$noahKey = $elt->getNoahKey();
1496
		$table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1497
		if ( !isset( $table[$noahKey] ) ) {
1498
			$table[$noahKey] = $elt;
1499
		} else {
1500
			$tail = $table[$noahKey];
1501
			while ( $tail->nextNoah ) {
1502
				$tail = $tail->nextNoah;
1503
			}
1504
			$tail->nextNoah = $elt;
1505
		}
1506
	}
1507
1508
	private function removeFromNoahList( BalanceElement $elt ) {
1509
		$table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1510
		$key = $elt->getNoahKey();
1511
		$noahElt = $table[$key];
1512
		if ( $noahElt === $elt ) {
1513
			if ( $noahElt->nextNoah ) {
1514
				$table[$key] = $noahElt->nextNoah;
1515
				$noahElt->nextNoah = null;
1516
			} else {
1517
				unset( $table[$key] );
1518
			}
1519
		} else {
1520
			do {
1521
				$prevNoahElt = $noahElt;
1522
				$noahElt = $prevNoahElt->nextNoah;
1523
				if ( $noahElt === $elt ) {
1524
					// Found it, unlink
1525
					$prevNoahElt->nextNoah = $elt->nextNoah;
1526
					$elt->nextNoah = null;
1527
					break;
1528
				}
1529
			} while ( $noahElt );
1530
		}
1531
	}
1532
1533
	/**
1534
	 * Find element $a in the list and replace it with element $b
1535
	 */
1536
	public function replace( BalanceElement $a, BalanceElement $b ) {
1537
		if ( $this->head !== $a && !$a->prevAFE ) {
1538
			throw new ParameterAssertionException( '$a',
1539
				"Attempted to replace an element which is not in the AFE list" );
1540
		}
1541
		// Update head and tail pointers
1542
		if ( $this->head === $a ) {
1543
			$this->head = $b;
1544
		}
1545
		if ( $this->tail === $a ) {
1546
			$this->tail = $b;
1547
		}
1548
		// Update previous element
1549
		if ( $a->prevAFE ) {
1550
			$a->prevAFE->nextAFE = $b;
1551
		}
1552
		// Update next element
1553
		if ( $a->nextAFE ) {
1554
			$a->nextAFE->prevAFE = $b;
1555
		}
1556
		$b->prevAFE = $a->prevAFE;
1557
		$b->nextAFE = $a->nextAFE;
1558
		$a->nextAFE = $a->prevAFE = null;
1559
		// Update Noah list
1560
		$this->removeFromNoahList( $a );
1561
		$this->addToNoahList( $b );
1562
	}
1563
1564
	/**
1565
	 * Find $a in the list and insert $b after it.
1566
	 */
1567
	public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1568
		if ( $this->head !== $a && !$a->prevAFE ) {
1569
			throw new ParameterAssertionException( '$a',
1570
				"Attempted to insert after an element which is not in the AFE list" );
1571
		}
1572
		if ( $this->tail === $a ) {
1573
			$this->tail = $b;
1574
		}
1575
		if ( $a->nextAFE ) {
1576
			$a->nextAFE->prevAFE = $b;
1577
		}
1578
		$b->nextAFE = $a->nextAFE;
1579
		$b->prevAFE = $a;
1580
		$a->nextAFE = $b;
1581
		$this->addToNoahList( $b );
1582
	}
1583
1584
	// @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1585
	/**
1586
	 * Reconstruct the active formatting elements.
1587
	 * @param BalanceStack $stack The open elements stack
1588
	 * @see https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements
1589
	 */
1590
	// @codingStandardsIgnoreEnd
1591
	public function reconstruct( $stack ) {
1592
		$entry = $this->tail;
1593
		// If there are no entries in the list of active formatting elements,
1594
		// then there is nothing to reconstruct
1595
		if ( !$entry ) {
1596
			return;
1597
		}
1598
		// If the last is a marker, do nothing.
1599
		if ( $entry instanceof BalanceMarker ) {
1600
			return;
1601
		}
1602
		// Or if it is an open element, do nothing.
1603
		if ( $stack->indexOf( $entry ) >= 0 ) {
1604
			return;
1605
		}
1606
1607
		// Loop backward through the list until we find a marker or an
1608
		// open element
1609
		while ( $entry->prevAFE ) {
1610
			$entry = $entry->prevAFE;
1611
			if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1612
				break;
1613
			}
1614
		}
1615
1616
		// Now loop forward, starting from the element after the current one (or
1617
		// the first element if we didn't find a marker or open element),
1618
		// recreating formatting elements and pushing them back onto the list
1619
		// of open elements.
1620
		if ( $entry->prevAFE ) {
1621
			$entry = $entry->nextAFE;
1622
		}
1623
		do {
1624
			$newElement = $stack->insertHTMLElement(
1625
				$entry->localName,
1626
				$entry->attribs );
1627
			$this->replace( $entry, $newElement );
1628
			$entry = $newElement->nextAFE;
1629
		} while ( $entry );
1630
	}
1631
1632
	/**
1633
	 * Get a string representation of the AFE list, for debugging
1634
	 */
1635
	public function __toString() {
1636
		$prev = null;
1637
		$s = '';
1638
		for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1639
			if ( $node instanceof BalanceMarker ) {
1640
				$s .= "MARKER\n";
1641
				continue;
1642
			}
1643
			$s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1644
			if ( $node->nextNoah ) {
1645
				$s .= " (noah sibling: {$node->nextNoah->localName}#" .
1646
					substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1647
					')';
1648
			}
1649
			if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1650
				$s .= " (reverse link is wrong!)";
1651
			}
1652
			$s .= "\n";
1653
		}
1654
		if ( $prev !== $this->tail ) {
1655
			$s .= "(tail pointer is wrong!)\n";
1656
		}
1657
		return $s;
1658
	}
1659
}
1660
1661
/**
1662
 * An implementation of the tree building portion of the HTML5 parsing
1663
 * spec.
1664
 *
1665
 * This is used to balance and tidy output so that the result can
1666
 * always be cleanly serialized/deserialized by an HTML5 parser.  It
1667
 * does *not* guarantee "conforming" output -- the HTML5 spec contains
1668
 * a number of constraints which are not enforced by the HTML5 parsing
1669
 * process.  But the result will be free of gross errors: misnested or
1670
 * unclosed tags, for example, and will be unchanged by spec-complient
1671
 * parsing followed by serialization.
1672
 *
1673
 * The tree building stage is structured as a state machine.
1674
 * When comparing the implementation to
1675
 * https://www.w3.org/TR/html5/syntax.html#tree-construction
1676
 * note that each state is implemented as a function with a
1677
 * name ending in `Mode` (because the HTML spec refers to them
1678
 * as insertion modes).  The current insertion mode is held by
1679
 * the $parseMode property.
1680
 *
1681
 * The following simplifications have been made:
1682
 * - We handle body content only (ie, we start `in body`.)
1683
 * - The document is never in "quirks mode".
1684
 * - All occurrences of < and > have been entity escaped, so we
1685
 *   can parse tags by simply splitting on those two characters.
1686
 *   Similarly, all attributes have been "cleaned" and are double-quoted
1687
 *   and escaped.
1688
 * - All comments and null characters are assumed to have been removed.
1689
 * - We don't alter linefeeds after <pre>/<listing>.
1690
 * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
1691
 *   <form>, <frame>, <plaintext>, <isindex>, <textarea>, <xmp>, <iframe>,
1692
 *   <noembed>, <noscript>, <select>, <script>, <title>.  As a result,
1693
 *   further simplifications can be made:
1694
 *   - `frameset-ok` is not tracked.
1695
 *   - `form element pointer` is not tracked.
1696
 *   - `head element pointer` is not tracked (but presumed non-null)
1697
 *   - Tokenizer has only a single mode.
1698
 *
1699
 *   We generally mark places where we omit cases from the spec due to
1700
 *   disallowed elements with a comment: `# OMITTED: <element-name>`.
1701
 *
1702
 *   The HTML spec keeps a flag during the parsing process to track
1703
 *   whether or not a "parse error" has been encountered.  We don't
1704
 *   bother to track that flag, we just implement the error-handling
1705
 *   process as specified.
1706
 *
1707
 * @ingroup Parser
1708
 * @since 1.27
1709
 * @see https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
1710
 */
1711
class Balancer {
1712
	private $parseMode;
1713
	private $bitsIterator;
1714
	private $allowedHtmlElements;
1715
	private $afe;
1716
	private $stack;
1717
	private $strict;
1718
	private $tidyCompat;
1719
1720
	private $textIntegrationMode = false;
1721
	private $pendingTableText;
1722
	private $originalInsertionMode;
1723
	private $fragmentContext;
1724
1725
	/**
1726
	 * Create a new Balancer.
1727
	 * @param array $config Balancer configuration.  Includes:
1728
	 *     'strict' : boolean, defaults to false.
1729
	 *         When true, enforces syntactic constraints on input:
1730
	 *         all non-tag '<' must be escaped, all attributes must be
1731
	 *         separated by a single space and double-quoted.  This is
1732
	 *         consistent with the output of the Sanitizer.
1733
	 *     'allowedHtmlElements' : array, defaults to null.
1734
	 *         When present, the keys of this associative array give
1735
	 *         the acceptable HTML tag names.  When not present, no
1736
	 *         tag sanitization is done.
1737
	 *     'tidyCompat' : boolean, defaults to false.
1738
	 *         When true, the serialization algorithm is tweaked to
1739
	 *         provide historical compatibility with the old "tidy"
1740
	 *         program: <p>-wrapping is done to the children of
1741
	 *         <body> and <blockquote> elements, and empty elements
1742
	 *         are removed.
1743
	 */
1744
	public function __construct( array $config = [] ) {
1745
		$config = $config + [
1746
			'strict' => false,
1747
			'allowedHtmlElements' => null,
1748
			'tidyCompat' => false,
1749
		];
1750
		$this->allowedHtmlElements = $config['allowedHtmlElements'];
1751
		$this->strict = $config['strict'];
1752
		$this->tidyCompat = $config['tidyCompat'];
1753
		if ( $this->allowedHtmlElements !== null ) {
1754
			# Sanity check!
1755
			$bad = array_uintersect_assoc(
1756
				$this->allowedHtmlElements,
1757
				BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
1758
				function( $a, $b ) {
1759
					// Ignore the values (just intersect the keys) by saying
1760
					// all values are equal to each other.
1761
					return 0;
1762
				}
1763
			);
1764
			if ( count( $bad ) > 0 ) {
1765
				$badstr = implode( array_keys( $bad ), ',' );
1766
				throw new ParameterAssertionException(
1767
					'$config',
1768
					'Balance attempted with sanitization including ' .
1769
					"unsupported elements: {$badstr}"
1770
				);
1771
			}
1772
		}
1773
	}
1774
1775
	/**
1776
	 * Return a balanced HTML string for the HTML fragment given by $text,
1777
	 * subject to the caveats listed in the class description.  The result
1778
	 * will typically be idempotent -- that is, rebalancing the output
1779
	 * would result in no change.
1780
	 *
1781
	 * @param string $text The markup to be balanced
1782
	 * @param callable $processingCallback Callback to do any variable or
1783
	 *   parameter replacements in HTML attributes values
1784
	 * @param array|bool $processingArgs Arguments for the processing callback
1785
	 * @return string The balanced markup
1786
	 */
1787
	public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1788
		$this->parseMode = 'inBodyMode';
1789
		$this->bitsIterator = new ExplodeIterator( '<', $text );
1790
		$this->afe = new BalanceActiveFormattingElements();
1791
		$this->stack = new BalanceStack();
1792
		$this->stack->tidyCompat = $this->tidyCompat;
1793
		$this->processingCallback = $processingCallback;
0 ignored issues
show
Bug introduced by
The property processingCallback does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
1794
		$this->processingArgs = $processingArgs;
0 ignored issues
show
Bug introduced by
The property processingArgs does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
1795
1796
		# The stack is constructed with an <html> element already on it.
1797
		# Set this up as a fragment parsed with <body> as the context.
1798
		$this->fragmentContext =
1799
			new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1800
		$this->resetInsertionMode();
1801
1802
		// First element is text not tag
1803
		$x = $this->bitsIterator->current();
1804
		$this->bitsIterator->next();
1805
		$this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1806
		// Now process each tag.
1807
		while ( $this->bitsIterator->valid() ) {
1808
			$this->advance();
1809
		}
1810
		$this->insertToken( 'eof', null );
1811
		$result = $this->stack->getOutput();
1812
		// Free memory before returning.
1813
		$this->bitsIterator = null;
1814
		$this->afe = null;
1815
		$this->stack = null;
1816
		$this->fragmentContext = null;
1817
		return $result;
1818
	}
1819
1820
	/**
1821
	 * Pass a token to the tree builder.  The $token will be one of the
1822
	 * strings "tag", "endtag", or "text".
1823
	 */
1824
	private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
1825
		// validate tags against $unsupportedSet
1826
		if ( $token === 'tag' || $token === 'endtag' ) {
1827
			if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
1828
				# As described in "simplifications" above, these tags are
1829
				# not supported in the balancer.
1830
				Assert::invariant(
1831
					!$this->strict,
1832
					"Unsupported $token <$value> found."
1833
				);
1834
				return false;
1835
			}
1836
		} elseif ( $token === 'text' && $value === '' ) {
1837
			# Don't actually inject the empty string as a text token.
1838
			return true;
1839
		}
1840
		// Some hoops we have to jump through
1841
		$adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1842
1843
		$isForeign = true;
1844
		if (
1845
			$this->stack->length() === 0 ||
1846
			$adjusted->isHtml() ||
1847
			$token === 'eof'
1848
		) {
1849
			$isForeign = false;
1850
		} elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1851
			if ( $token === 'text' ) {
1852
				$isForeign = false;
1853
			} elseif (
1854
				$token === 'tag' &&
1855
				$value !== 'mglyph' && $value !== 'malignmark'
1856
			) {
1857
				$isForeign = false;
1858
			}
1859
		} elseif (
1860
			$adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
1861
			$adjusted->localName === 'annotation-xml' &&
1862
			$token === 'tag' && $value === 'svg'
1863
		) {
1864
			$isForeign = false;
1865
		} elseif (
1866
			$adjusted->isHtmlIntegrationPoint() &&
1867
			( $token === 'tag' || $token === 'text' )
1868
		) {
1869
			$isForeign = false;
1870
		}
1871
		if ( $isForeign ) {
1872
			return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
1873
		} else {
1874
			$func = $this->parseMode;
1875
			return $this->$func( $token, $value, $attribs, $selfclose );
1876
		}
1877
	}
1878
1879
	private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
1880
		if ( $token === 'text' ) {
1881
			$this->stack->insertText( $value );
1882
			return true;
1883
		} elseif ( $token === 'tag' ) {
1884
			switch ( $value ) {
1885
			case 'font':
1886
				if ( isset( $attribs['color'] )
1887
					|| isset( $attribs['face'] )
1888
					|| isset( $attribs['size'] )
1889
				) {
1890
					break;
1891
				}
1892
				/* otherwise, fall through */
1893
			case 'b':
1894
			case 'big':
1895
			case 'blockquote':
1896
			case 'body':
1897
			case 'br':
1898
			case 'center':
1899
			case 'code':
1900
			case 'dd':
1901
			case 'div':
1902
			case 'dl':
1903
			case 'dt':
1904
			case 'em':
1905
			case 'embed':
1906
			case 'h1':
1907
			case 'h2':
1908
			case 'h3':
1909
			case 'h4':
1910
			case 'h5':
1911
			case 'h6':
1912
			case 'head':
1913
			case 'hr':
1914
			case 'i':
1915
			case 'img':
1916
			case 'li':
1917
			case 'listing':
1918
			case 'menu':
1919
			case 'meta':
1920
			case 'nobr':
1921
			case 'ol':
1922
			case 'p':
1923
			case 'pre':
1924
			case 'ruby':
1925
			case 's':
1926
			case 'small':
1927
			case 'span':
1928
			case 'strong':
1929
			case 'strike':
1930
			case 'sub':
1931
			case 'sup':
1932
			case 'table':
1933
			case 'tt':
1934
			case 'u':
1935
			case 'ul':
1936
			case 'var':
1937
				if ( $this->fragmentContext ) {
1938
					break;
1939
				}
1940
				while ( true ) {
1941
					$this->stack->pop();
1942
					$node = $this->stack->currentNode();
1943
					if (
1944
						$node->isMathmlTextIntegrationPoint() ||
1945
						$node->isHtmlIntegrationPoint() ||
1946
						$node->isHtml()
1947
					) {
1948
						break;
1949
					}
1950
				}
1951
				return $this->insertToken( $token, $value, $attribs, $selfclose );
1952
			}
1953
			// "Any other start tag"
1954
			$adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
1955
				$this->fragmentContext : $this->stack->currentNode();
1956
			$this->stack->insertForeignElement(
1957
				$adjusted->namespaceURI, $value, $attribs
1958
			);
1959
			if ( $selfclose ) {
1960
				$this->stack->pop();
1961
			}
1962
			return true;
1963
		} elseif ( $token === 'endtag' ) {
1964
			$first = true;
1965
			foreach ( $this->stack as $i => $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1966
				if ( $node->isHtml() && !$first ) {
1967
					// process the end tag as HTML
1968
					$func = $this->parseMode;
1969
					return $this->$func( $token, $value, $attribs, $selfclose );
1970
				} elseif ( $i === 0 ) {
1971
					return true;
1972
				} elseif ( $node->localName === $value ) {
1973
					$this->stack->popTag( $node );
1974
					return true;
1975
				}
1976
				$first = false;
1977
			}
1978
		}
1979
	}
1980
1981
	/**
1982
	 * Grab the next "token" from $bitsIterator.  This is either a open/close
1983
	 * tag or text, depending on whether the Sanitizer approves.
1984
	 */
1985
	private function advance() {
1986
		$x = $this->bitsIterator->current();
1987
		$this->bitsIterator->next();
1988
		$regs = [];
1989
		# $slash: Does the current element start with a '/'?
1990
		# $t: Current element name
1991
		# $attribStr: String between element name and >
1992
		# $brace: Ending '>' or '/>'
1993
		# $rest: Everything until the next element from the $bitsIterator
1994
		if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
1995
			list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
1996
			$t = strtolower( $t );
1997
			if ( $this->strict ) {
1998
				/* Verify that attributes are all properly double-quoted */
1999
				Assert::invariant(
2000
					preg_match(
2001
						'/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2002
					),
2003
					"Bad attribute string found"
2004
				);
2005
			}
2006
		} else {
2007
			Assert::invariant(
2008
				!$this->strict, "< found which does not start a valid tag"
2009
			);
2010
			$slash = $t = $attribStr = $brace = $rest = null;
2011
		}
2012
		$goodtag = $t;
2013
		$sanitize = $this->allowedHtmlElements !== null;
2014
		if ( $sanitize ) {
2015
			$goodtag = $t && isset( $this->allowedHtmlElements[$t] );
0 ignored issues
show
Bug Best Practice introduced by
The expression $t of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2016
		}
2017
		if ( $goodtag ) {
2018
			if ( is_callable( $this->processingCallback ) ) {
2019
				call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2020
			}
2021
			if ( $sanitize ) {
2022
				$goodtag = Sanitizer::validateTag( $attribStr, $t );
2023
			}
2024
		}
2025
		if ( $goodtag ) {
2026
			if ( $sanitize ) {
2027
				$attribs = Sanitizer::decodeTagAttributes( $attribStr );
2028
				$attribs = Sanitizer::validateTagAttributes( $attribs, $t );
2029
			} else {
2030
				$attribs = Sanitizer::decodeTagAttributes( $attribStr );
2031
			}
2032
			$goodtag = $this->insertToken(
2033
				$slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2034
			);
2035
		}
2036
		if ( $goodtag ) {
2037
			$rest = str_replace( '>', '&gt;', $rest );
2038
			$this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2039
		} else {
2040
			# bad tag; serialize entire thing as text.
2041
			$this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2042
		}
2043
	}
2044
2045
	private function switchMode( $mode ) {
2046
		Assert::parameter(
2047
			substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
2048
		);
2049
		$oldMode = $this->parseMode;
2050
		$this->parseMode = $mode;
2051
		return $oldMode;
2052
	}
2053
2054
	private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
2055
		$this->switchMode( $mode );
2056
		return $this->insertToken( $token, $value, $attribs, $selfclose );
2057
	}
2058
2059
	private function resetInsertionMode() {
2060
		$last = false;
2061
		foreach ( $this->stack as $i => $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2062
			if ( $i === 0 ) {
2063
				$last = true;
2064
				if ( $this->fragmentContext ) {
2065
					$node = $this->fragmentContext;
2066
				}
2067
			}
2068
			if ( $node->isHtml() ) {
2069
				switch ( $node->localName ) {
2070
				# OMITTED: <select>
2071
				/*
2072
				case 'select':
2073
					$stacklen = $this->stack->length();
2074
					for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
2075
						$ancestor = $this->stack->node( $stacklen-$j-1 );
2076
						if ( $ancestor->isA( 'template' ) ) {
2077
							break;
2078
						}
2079
						if ( $ancestor->isA( 'table' ) ) {
2080
							$this->switchMode( 'inSelectInTableMode' );
2081
							return;
2082
						}
2083
					}
2084
					$this->switchMode( 'inSelectMode' );
2085
					return;
2086
				*/
2087
				case 'tr':
2088
					$this->switchMode( 'inRowMode' );
2089
					return;
2090
				case 'tbody':
2091
				case 'tfoot':
2092
				case 'thead':
2093
					$this->switchMode( 'inTableBodyMode' );
2094
					return;
2095
				case 'caption':
2096
					$this->switchMode( 'inCaptionMode' );
2097
					return;
2098
				case 'colgroup':
2099
					$this->switchMode( 'inColumnGroupMode' );
2100
					return;
2101
				case 'table':
2102
					$this->switchMode( 'inTableMode' );
2103
					return;
2104
				case 'template':
2105
					$this->switchMode(
2106
						array_slice( $this->templateInsertionModes, -1 )[0]
0 ignored issues
show
Bug introduced by
The property templateInsertionModes does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
2107
					);
2108
					return;
2109
				case 'body':
2110
					$this->switchMode( 'inBodyMode' );
2111
					return;
2112
				# OMITTED: <frameset>
2113
				# OMITTED: <html>
2114
				# OMITTED: <head>
2115
				default:
2116
					if ( !$last ) {
2117
						# OMITTED: <head>
2118
						if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2119
							$this->switchMode( 'inCellMode' );
2120
							return;
2121
						}
2122
					}
2123
				}
2124
			}
2125
			if ( $last ) {
2126
				$this->switchMode( 'inBodyMode' );
2127
				return;
2128
			}
2129
		}
2130
	}
2131
2132
	private function stopParsing() {
2133
		# Most of the spec methods are inapplicable, other than step 2:
2134
		# "pop all the nodes off the stack of open elements".
2135
		# We're going to keep the top-most <html> element on the stack, though.
2136
2137
		# Clear the AFE list first, otherwise the element objects will stay live
2138
		# during serialization, potentially using O(N^2) memory. Note that
2139
		# popping the stack will never result in reconstructing the active
2140
		# formatting elements.
2141
		$this->afe = null;
2142
		$this->stack->popTo( 1 );
2143
	}
2144
2145
	private function parseRawText( $value, $attribs = null ) {
2146
		$this->stack->insertHTMLElement( $value, $attribs );
2147
		// XXX switch tokenizer to rawtext state?
2148
		$this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2149
		return true;
2150
	}
2151
2152
	private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
2153
		if ( $token === 'text' ) {
2154
			$this->stack->insertText( $value );
2155
			return true;
2156
		} elseif ( $token === 'eof' ) {
2157
			$this->stack->pop();
2158
			return $this->switchModeAndReprocess(
2159
				$this->originalInsertionMode, $token, $value, $attribs, $selfclose
2160
			);
2161
		} elseif ( $token === 'endtag' ) {
2162
			$this->stack->pop();
2163
			$this->switchMode( $this->originalInsertionMode );
2164
			return true;
2165
		}
2166
		return true;
2167
	}
2168
2169
	private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
2170
		if ( $token === 'text' ) {
2171 View Code Duplication
			if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2172
				$this->stack->insertText( $matches[0] );
2173
				$value = substr( $value, strlen( $matches[0] ) );
2174
			}
2175
			if ( strlen( $value ) === 0 ) {
2176
				return true; // All text handled.
2177
			}
2178
			// Fall through to handle non-whitespace below.
2179
		} elseif ( $token === 'tag' ) {
2180
			switch ( $value ) {
2181
			case 'meta':
2182
				# OMITTED: in a full HTML parser, this might change the encoding.
2183
				/* falls through */
2184
			# OMITTED: <html>
2185
			case 'base':
2186
			case 'basefont':
2187
			case 'bgsound':
2188
			case 'link':
2189
				$this->stack->insertHTMLElement( $value, $attribs );
2190
				$this->stack->pop();
2191
				return true;
2192
			# OMITTED: <title>
2193
			# OMITTED: <noscript>
2194
			case 'noframes':
2195
			case 'style':
2196
				return $this->parseRawText( $value, $attribs );
2197
			# OMITTED: <script>
2198
			case 'template':
2199
				$this->stack->insertHTMLElement( $value, $attribs );
2200
				$this->afe->insertMarker();
2201
				# OMITTED: frameset_ok
2202
				$this->switchMode( 'inTemplateMode' );
2203
				$this->templateInsertionModes[] = $this->parseMode;
2204
				return true;
2205
			# OMITTED: <head>
2206
			}
2207
		} elseif ( $token === 'endtag' ) {
2208
			switch ( $value ) {
2209
			# OMITTED: <head>
2210
			# OMITTED: <body>
2211
			# OMITTED: <html>
2212
			case 'br':
2213
				break; // handle at the bottom of the function
2214
			case 'template':
2215
				if ( $this->stack->indexOf( $value ) < 0 ) {
2216
					return true; // Ignore the token.
2217
				}
2218
				$this->stack->generateImpliedEndTags( null, true /* thorough */ );
2219
				$this->stack->popTag( $value );
2220
				$this->afe->clearToMarker();
2221
				array_pop( $this->templateInsertionModes );
2222
				$this->resetInsertionMode();
2223
				return true;
2224
			default:
2225
				// ignore any other end tag
2226
				return true;
2227
			}
2228
		}
2229
2230
		// If not handled above
2231
		$this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2232
		// Then redo this one
2233
		return $this->insertToken( $token, $value, $attribs, $selfclose );
2234
	}
2235
2236
	private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
2237
		if ( $token === 'text' ) {
2238
			$this->afe->reconstruct( $this->stack );
2239
			$this->stack->insertText( $value );
2240
			return true;
2241
		} elseif ( $token === 'eof' ) {
2242
			if ( !empty( $this->templateInsertionModes ) ) {
2243
				return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
2244
			}
2245
			$this->stopParsing();
2246
			return true;
2247
		} elseif ( $token === 'tag' ) {
2248
			switch ( $value ) {
2249
			# OMITTED: <html>
2250
			case 'base':
2251
			case 'basefont':
2252
			case 'bgsound':
2253
			case 'link':
2254
			case 'meta':
2255
			case 'noframes':
2256
			# OMITTED: <script>
2257
			case 'style':
2258
			case 'template':
2259
			# OMITTED: <title>
2260
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2261
			# OMITTED: <body>
2262
			# OMITTED: <frameset>
2263
2264
			case 'address':
2265
			case 'article':
2266
			case 'aside':
2267
			case 'blockquote':
2268
			case 'center':
2269
			case 'details':
2270
			case 'dialog':
2271
			case 'dir':
2272
			case 'div':
2273
			case 'dl':
2274
			case 'fieldset':
2275
			case 'figcaption':
2276
			case 'figure':
2277
			case 'footer':
2278
			case 'header':
2279
			case 'hgroup':
2280
			case 'main':
2281
			case 'menu':
2282
			case 'nav':
2283
			case 'ol':
2284
			case 'p':
2285
			case 'section':
2286
			case 'summary':
2287
			case 'ul':
2288
				if ( $this->stack->inButtonScope( 'p' ) ) {
2289
					$this->inBodyMode( 'endtag', 'p' );
2290
				}
2291
				$this->stack->insertHTMLElement( $value, $attribs );
2292
				return true;
2293
2294
			case 'h1':
2295
			case 'h2':
2296
			case 'h3':
2297
			case 'h4':
2298
			case 'h5':
2299
			case 'h6':
2300
				if ( $this->stack->inButtonScope( 'p' ) ) {
2301
					$this->inBodyMode( 'endtag', 'p' );
2302
				}
2303
				if ( $this->stack->currentNode()->isA( BalanceSets::$headingSet ) ) {
2304
					$this->stack->pop();
2305
				}
2306
				$this->stack->insertHTMLElement( $value, $attribs );
2307
				return true;
2308
2309
			case 'pre':
2310
			case 'listing':
2311
				if ( $this->stack->inButtonScope( 'p' ) ) {
2312
					$this->inBodyMode( 'endtag', 'p' );
2313
				}
2314
				$this->stack->insertHTMLElement( $value, $attribs );
2315
				# As described in "simplifications" above:
2316
				# 1. We don't touch the next token, even if it's a linefeed.
2317
				# 2. OMITTED: frameset_ok
2318
				return true;
2319
2320
			# OMITTED: <form>
2321
2322
			case 'li':
2323
				# OMITTED: frameset_ok
2324
				foreach ( $this->stack as $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2325
					if ( $node->isA( 'li' ) ) {
2326
						$this->inBodyMode( 'endtag', 'li' );
2327
						break;
2328
					}
2329
					if (
2330
						$node->isA( BalanceSets::$specialSet ) &&
2331
						!$node->isA( BalanceSets::$addressDivPSet )
2332
					) {
2333
						break;
2334
					}
2335
				}
2336
				if ( $this->stack->inButtonScope( 'p' ) ) {
2337
					$this->inBodyMode( 'endtag', 'p' );
2338
				}
2339
				$this->stack->insertHTMLElement( $value, $attribs );
2340
				return true;
2341
2342
			case 'dd':
2343
			case 'dt':
2344
				# OMITTED: frameset_ok
2345
				foreach ( $this->stack as $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2346
					if ( $node->isA( 'dd' ) ) {
2347
						$this->inBodyMode( 'endtag', 'dd' );
2348
						break;
2349
					}
2350
					if ( $node->isA( 'dt' ) ) {
2351
						$this->inBodyMode( 'endtag', 'dt' );
2352
						break;
2353
					}
2354
					if (
2355
						$node->isA( BalanceSets::$specialSet ) &&
2356
						!$node->isA( BalanceSets::$addressDivPSet )
2357
					) {
2358
						break;
2359
					}
2360
				}
2361
				if ( $this->stack->inButtonScope( 'p' ) ) {
2362
					$this->inBodyMode( 'endtag', 'p' );
2363
				}
2364
				$this->stack->insertHTMLElement( $value, $attribs );
2365
				return true;
2366
2367
			# OMITTED: <plaintext>
2368
2369 View Code Duplication
			case 'button':
2370
				if ( $this->stack->inScope( 'button' ) ) {
2371
					$this->inBodyMode( 'endtag', 'button' );
2372
					return $this->insertToken( $token, $value, $attribs, $selfclose );
2373
				}
2374
				$this->afe->reconstruct( $this->stack );
2375
				$this->stack->insertHTMLElement( $value, $attribs );
2376
				return true;
2377
2378
			case 'a':
2379
				$activeElement = $this->afe->findElementByTag( 'a' );
2380
				if ( $activeElement ) {
2381
					$this->inBodyMode( 'endtag', 'a' );
2382
					if ( $this->afe->isInList( $activeElement ) ) {
2383
						$this->afe->remove( $activeElement );
2384
						// Don't flatten here, since when we fall
2385
						// through below we might foster parent
2386
						// the new <a> tag inside this one.
2387
						$this->stack->removeElement( $activeElement, false );
2388
					}
2389
				}
2390
				/* Falls through */
2391
			case 'b':
2392
			case 'big':
2393
			case 'code':
2394
			case 'em':
2395
			case 'font':
2396
			case 'i':
2397
			case 's':
2398
			case 'small':
2399
			case 'strike':
2400
			case 'strong':
2401
			case 'tt':
2402 View Code Duplication
			case 'u':
2403
				$this->afe->reconstruct( $this->stack );
2404
				$this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2405
				return true;
2406
2407
			case 'nobr':
2408
				$this->afe->reconstruct( $this->stack );
2409
				if ( $this->stack->inScope( 'nobr' ) ) {
2410
					$this->inBodyMode( 'endtag', 'nobr' );
2411
					$this->afe->reconstruct( $this->stack );
2412
				}
2413
				$this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2414
				return true;
2415
2416
			case 'applet':
2417
			case 'marquee':
2418
			case 'object':
2419
				$this->afe->reconstruct( $this->stack );
2420
				$this->stack->insertHTMLElement( $value, $attribs );
2421
				$this->afe->insertMarker();
2422
				# OMITTED: frameset_ok
2423
				return true;
2424
2425 View Code Duplication
			case 'table':
2426
				# The document is never in "quirks mode"; see simplifications
2427
				# above.
2428
				if ( $this->stack->inButtonScope( 'p' ) ) {
2429
					$this->inBodyMode( 'endtag', 'p' );
2430
				}
2431
				$this->stack->insertHTMLElement( $value, $attribs );
2432
				# OMITTED: frameset_ok
2433
				$this->switchMode( 'inTableMode' );
2434
				return true;
2435
2436
			case 'area':
2437
			case 'br':
2438
			case 'embed':
2439
			case 'img':
2440
			case 'keygen':
2441 View Code Duplication
			case 'wbr':
2442
				$this->afe->reconstruct( $this->stack );
2443
				$this->stack->insertHTMLElement( $value, $attribs );
2444
				$this->stack->pop();
2445
				# OMITTED: frameset_ok
2446
				return true;
2447
2448 View Code Duplication
			case 'input':
2449
				$this->afe->reconstruct( $this->stack );
2450
				$this->stack->insertHTMLElement( $value, $attribs );
2451
				$this->stack->pop();
2452
				# OMITTED: frameset_ok
2453
				# (hence we don't need to examine the tag's "type" attribute)
2454
				return true;
2455
2456
			case 'menuitem':
2457
			case 'param':
2458
			case 'source':
2459
			case 'track':
2460
				$this->stack->insertHTMLElement( $value, $attribs );
2461
				$this->stack->pop();
2462
				return true;
2463
2464 View Code Duplication
			case 'hr':
2465
				if ( $this->stack->inButtonScope( 'p' ) ) {
2466
					$this->inBodyMode( 'endtag', 'p' );
2467
				}
2468
				$this->stack->insertHTMLElement( $value, $attribs );
2469
				$this->stack->pop();
2470
				return true;
2471
2472
			case 'image':
2473
				# warts!
2474
				return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
2475
2476
			# OMITTED: <isindex>
2477
			# OMITTED: <textarea>
2478
			# OMITTED: <xmp>
2479
			# OMITTED: <iframe>
2480
			# OMITTED: <noembed>
2481
			# OMITTED: <noscript>
2482
2483
			# OMITTED: <select>
2484
			/*
2485
			case 'select':
2486
				$this->afe->reconstruct( $this->stack );
2487
				$this->stack->insertHTMLElement( $value, $attribs );
2488
				switch ( $this->parseMode ) {
2489
				case 'inTableMode':
2490
				case 'inCaptionMode':
2491
				case 'inTableBodyMode':
2492
				case 'inRowMode':
2493
				case 'inCellMode':
2494
					$this->switchMode( 'inSelectInTableMode' );
2495
					return true;
2496
				default:
2497
					$this->switchMode( 'inSelectMode' );
2498
					return true;
2499
				}
2500
			*/
2501
2502
			case 'optgroup':
2503 View Code Duplication
			case 'option':
2504
				if ( $this->stack->currentNode()->isA( 'option' ) ) {
2505
					$this->inBodyMode( 'endtag', 'option' );
2506
				}
2507
				$this->afe->reconstruct( $this->stack );
2508
				$this->stack->insertHTMLElement( $value, $attribs );
2509
				return true;
2510
2511
			case 'rb':
2512
			case 'rtc':
2513
				if ( $this->stack->inScope( 'ruby' ) ) {
2514
					$this->stack->generateImpliedEndTags();
2515
				}
2516
				$this->stack->insertHTMLElement( $value, $attribs );
2517
				return true;
2518
2519
			case 'rp':
2520
			case 'rt':
2521
				if ( $this->stack->inScope( 'ruby' ) ) {
2522
					$this->stack->generateImpliedEndTags( 'rtc' );
2523
				}
2524
				$this->stack->insertHTMLElement( $value, $attribs );
2525
				return true;
2526
2527 View Code Duplication
			case 'math':
2528
				$this->afe->reconstruct( $this->stack );
2529
				# We skip the spec's "adjust MathML attributes" and
2530
				# "adjust foreign attributes" steps, since the browser will
2531
				# do this later when it parses the output and it doesn't affect
2532
				# balancing.
2533
				$this->stack->insertForeignElement(
2534
					BalanceSets::MATHML_NAMESPACE, $value, $attribs
2535
				);
2536
				if ( $selfclose ) {
2537
					# emit explicit </math> tag.
2538
					$this->stack->pop();
2539
				}
2540
				return true;
2541
2542 View Code Duplication
			case 'svg':
2543
				$this->afe->reconstruct( $this->stack );
2544
				# We skip the spec's "adjust SVG attributes" and
2545
				# "adjust foreign attributes" steps, since the browser will
2546
				# do this later when it parses the output and it doesn't affect
2547
				# balancing.
2548
				$this->stack->insertForeignElement(
2549
					BalanceSets::SVG_NAMESPACE, $value, $attribs
2550
				);
2551
				if ( $selfclose ) {
2552
					# emit explicit </svg> tag.
2553
					$this->stack->pop();
2554
				}
2555
				return true;
2556
2557
			case 'caption':
2558
			case 'col':
2559
			case 'colgroup':
2560
			# OMITTED: <frame>
2561
			case 'head':
2562
			case 'tbody':
2563
			case 'td':
2564
			case 'tfoot':
2565
			case 'th':
2566
			case 'thead':
2567
			case 'tr':
2568
				// Ignore table tags if we're not inTableMode
2569
				return true;
2570
			}
2571
2572
			// Handle any other start tag here
2573
			$this->afe->reconstruct( $this->stack );
2574
			$this->stack->insertHTMLElement( $value, $attribs );
2575
			return true;
2576
		} elseif ( $token === 'endtag' ) {
2577
			switch ( $value ) {
2578
			# </body>,</html> are unsupported.
2579
2580
			case 'template':
2581
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2582
2583
			case 'address':
2584
			case 'article':
2585
			case 'aside':
2586
			case 'blockquote':
2587
			case 'button':
2588
			case 'center':
2589
			case 'details':
2590
			case 'dialog':
2591
			case 'dir':
2592
			case 'div':
2593
			case 'dl':
2594
			case 'fieldset':
2595
			case 'figcaption':
2596
			case 'figure':
2597
			case 'footer':
2598
			case 'header':
2599
			case 'hgroup':
2600
			case 'listing':
2601
			case 'main':
2602
			case 'menu':
2603
			case 'nav':
2604
			case 'ol':
2605
			case 'pre':
2606
			case 'section':
2607
			case 'summary':
2608
			case 'ul':
2609
				// Ignore if there is not a matching open tag
2610
				if ( !$this->stack->inScope( $value ) ) {
2611
					return true;
2612
				}
2613
				$this->stack->generateImpliedEndTags();
2614
				$this->stack->popTag( $value );
2615
				return true;
2616
2617
			# OMITTED: <form>
2618
2619 View Code Duplication
			case 'p':
2620
				if ( !$this->stack->inButtonScope( 'p' ) ) {
2621
					$this->inBodyMode( 'tag', 'p', [] );
2622
					return $this->insertToken( $token, $value, $attribs, $selfclose );
2623
				}
2624
				$this->stack->generateImpliedEndTags( $value );
2625
				$this->stack->popTag( $value );
2626
				return true;
2627
2628
			case 'li':
2629
				if ( !$this->stack->inListItemScope( $value ) ) {
2630
					return true; # ignore
2631
				}
2632
				$this->stack->generateImpliedEndTags( $value );
2633
				$this->stack->popTag( $value );
2634
				return true;
2635
2636
			case 'dd':
2637
			case 'dt':
2638
				if ( !$this->stack->inScope( $value ) ) {
2639
					return true; # ignore
2640
				}
2641
				$this->stack->generateImpliedEndTags( $value );
2642
				$this->stack->popTag( $value );
2643
				return true;
2644
2645
			case 'h1':
2646
			case 'h2':
2647
			case 'h3':
2648
			case 'h4':
2649
			case 'h5':
2650
			case 'h6':
2651
				if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2652
					return;
2653
				}
2654
				$this->stack->generateImpliedEndTags();
2655
				$this->stack->popTag( BalanceSets::$headingSet );
2656
				return true;
2657
2658
			case 'sarcasm':
2659
				# Take a deep breath, then:
2660
				break;
2661
2662
			case 'a':
2663
			case 'b':
2664
			case 'big':
2665
			case 'code':
2666
			case 'em':
2667
			case 'font':
2668
			case 'i':
2669
			case 'nobr':
2670
			case 's':
2671
			case 'small':
2672
			case 'strike':
2673
			case 'strong':
2674
			case 'tt':
2675
			case 'u':
2676
				if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
0 ignored issues
show
Bug introduced by
It seems like $this->afe can be null; however, adoptionAgency() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
2677
					return true; # If we did something, we're done.
2678
				}
2679
				break; # Go to the "any other end tag" case.
2680
2681
			case 'applet':
2682
			case 'marquee':
2683 View Code Duplication
			case 'object':
2684
				if ( !$this->stack->inScope( $value ) ) {
2685
					return true; # ignore
2686
				}
2687
				$this->stack->generateImpliedEndTags();
2688
				$this->stack->popTag( $value );
2689
				$this->afe->clearToMarker();
2690
				return true;
2691
2692
			case 'br':
2693
				# Turn </br> into <br>
2694
				return $this->inBodyMode( 'tag', $value, [] );
2695
			}
2696
2697
			// Any other end tag goes here
2698
			foreach ( $this->stack as $i => $node ) {
0 ignored issues
show
Bug introduced by
The expression $this->stack of type object<MediaWiki\Tidy\BalanceStack>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2699
				if ( $node->isA( $value ) ) {
2700
					$this->stack->generateImpliedEndTags( $value );
2701
					$this->stack->popTo( $i ); # including $i
2702
					break;
2703
				} elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2704
					return true; // ignore this close token.
2705
				}
2706
			}
2707
			return true;
2708
		} else {
2709
			Assert::invariant( false, "Bad token type: $token" );
2710
		}
2711
	}
2712
2713
	private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2714
		if ( $token === 'text' ) {
2715
			if ( $this->textIntegrationMode ) {
2716
				return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2717
			} elseif ( $this->stack->currentNode()->isA( BalanceSets::$tableSectionRowSet ) ) {
2718
				$this->pendingTableText = '';
2719
				$this->originalInsertionMode = $this->parseMode;
2720
				return $this->switchModeAndReprocess( 'inTableTextMode', $token, $value, $attribs, $selfclose );
2721
			}
2722
			// fall through to default case.
2723
		} elseif ( $token === 'eof' ) {
2724
			$this->stopParsing();
2725
			return true;
2726
		} elseif ( $token === 'tag' ) {
2727
			switch ( $value ) {
2728
			case 'caption':
2729
				$this->afe->insertMarker();
2730
				$this->stack->insertHTMLElement( $value, $attribs );
2731
				$this->switchMode( 'inCaptionMode' );
2732
				return true;
2733
			case 'colgroup':
2734
				$this->stack->clearToContext( BalanceSets::$tableContextSet );
2735
				$this->stack->insertHTMLElement( $value, $attribs );
2736
				$this->switchMode( 'inColumnGroupMode' );
2737
				return true;
2738
			case 'col':
2739
				$this->inTableMode( 'tag', 'colgroup', [] );
2740
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2741
			case 'tbody':
2742
			case 'tfoot':
2743
			case 'thead':
2744
				$this->stack->clearToContext( BalanceSets::$tableContextSet );
2745
				$this->stack->insertHTMLElement( $value, $attribs );
2746
				$this->switchMode( 'inTableBodyMode' );
2747
				return true;
2748
			case 'td':
2749
			case 'th':
2750
			case 'tr':
2751
				$this->inTableMode( 'tag', 'tbody', [] );
2752
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2753
			case 'table':
2754
				if ( !$this->stack->inTableScope( $value ) ) {
2755
					return true; // Ignore this tag.
2756
				}
2757
				$this->inTableMode( 'endtag', $value );
2758
				return $this->insertToken( $token, $value, $attribs, $selfclose );
2759
2760
			case 'style':
2761
			# OMITTED: <script>
2762
			case 'template':
2763
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2764
2765
			case 'input':
2766
				if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
2767
					break; // Handle this as "everything else"
2768
				}
2769
				$this->stack->insertHTMLElement( $value, $attribs );
2770
				$this->stack->pop();
2771
				return true;
2772
2773
			# OMITTED: <form>
2774
			}
2775
			// Fall through for "anything else" clause.
2776
		} elseif ( $token === 'endtag' ) {
2777
			switch ( $value ) {
2778
			case 'table':
2779
				if ( !$this->stack->inTableScope( $value ) ) {
2780
					return true; // Ignore.
2781
				}
2782
				$this->stack->popTag( $value );
2783
				$this->resetInsertionMode();
2784
				return true;
2785
			# OMITTED: <body>
2786
			case 'caption':
2787
			case 'col':
2788
			case 'colgroup':
2789
			# OMITTED: <html>
2790
			case 'tbody':
2791
			case 'td':
2792
			case 'tfoot':
2793
			case 'th':
2794
			case 'thead':
2795
			case 'tr':
2796
				return true; // Ignore the token.
2797
			case 'template':
2798
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2799
			}
2800
			// Fall through for "anything else" clause.
2801
		}
2802
		// This is the "anything else" case:
2803
		$this->stack->fosterParentMode = true;
2804
		$this->inBodyMode( $token, $value, $attribs, $selfclose );
2805
		$this->stack->fosterParentMode = false;
2806
		return true;
2807
	}
2808
2809
	private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
2810
		if ( $token === 'text' ) {
2811
			$this->pendingTableText .= $value;
2812
			return true;
2813
		}
2814
		// Non-text token:
2815
		$text = $this->pendingTableText;
2816
		$this->pendingTableText = '';
2817
		if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
2818
			// This should match the "anything else" case inTableMode
2819
			$this->stack->fosterParentMode = true;
2820
			$this->inBodyMode( 'text', $text );
2821
			$this->stack->fosterParentMode = false;
2822
		} else {
2823
			// Pending text is just whitespace.
2824
			$this->stack->insertText( $text );
2825
		}
2826
		return $this->switchModeAndReprocess(
2827
			$this->originalInsertionMode, $token, $value, $attribs, $selfclose
2828
		);
2829
	}
2830
2831
	// helper for inCaptionMode
2832
	private function endCaption() {
2833
		if ( !$this->stack->inTableScope( 'caption' ) ) {
2834
			return false;
2835
		}
2836
		$this->stack->generateImpliedEndTags();
2837
		$this->stack->popTag( 'caption' );
2838
		$this->afe->clearToMarker();
2839
		$this->switchMode( 'inTableMode' );
2840
		return true;
2841
	}
2842
2843
	private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
2844
		if ( $token === 'tag' ) {
2845 View Code Duplication
			switch ( $value ) {
2846
			case 'caption':
2847
			case 'col':
2848
			case 'colgroup':
2849
			case 'tbody':
2850
			case 'td':
2851
			case 'tfoot':
2852
			case 'th':
2853
			case 'thead':
2854
			case 'tr':
2855
				if ( $this->endCaption() ) {
2856
					$this->insertToken( $token, $value, $attribs, $selfclose );
2857
				}
2858
				return true;
2859
			}
2860
			// Fall through to "anything else" case.
2861
		} elseif ( $token === 'endtag' ) {
2862
			switch ( $value ) {
2863
			case 'caption':
2864
				$this->endCaption();
2865
				return true;
2866
			case 'table':
2867
				if ( $this->endCaption() ) {
2868
					$this->insertToken( $token, $value, $attribs, $selfclose );
2869
				}
2870
				return true;
2871
			case 'body':
2872
			case 'col':
2873
			case 'colgroup':
2874
			# OMITTED: <html>
2875
			case 'tbody':
2876
			case 'td':
2877
			case 'tfoot':
2878
			case 'th':
2879
			case 'thead':
2880
			case 'tr':
2881
				// Ignore the token
2882
				return true;
2883
			}
2884
			// Fall through to "anything else" case.
2885
		}
2886
		// The Anything Else case
2887
		return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2888
	}
2889
2890
	private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
2891
		if ( $token === 'text' ) {
2892 View Code Duplication
			if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2893
				$this->stack->insertText( $matches[0] );
2894
				$value = substr( $value, strlen( $matches[0] ) );
2895
			}
2896
			if ( strlen( $value ) === 0 ) {
2897
				return true; // All text handled.
2898
			}
2899
			// Fall through to handle non-whitespace below.
2900
		} elseif ( $token === 'tag' ) {
2901
			switch ( $value ) {
2902
			# OMITTED: <html>
2903
			case 'col':
2904
				$this->stack->insertHTMLElement( $value, $attribs );
2905
				$this->stack->pop();
2906
				return true;
2907
			case 'template':
2908
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2909
			}
2910
			// Fall through for "anything else".
2911
		} elseif ( $token === 'endtag' ) {
2912
			switch ( $value ) {
2913
			case 'colgroup':
2914
				if ( !$this->stack->currentNode()->isA( 'colgroup' ) ) {
2915
					return true; // Ignore the token.
2916
				}
2917
				$this->stack->pop();
2918
				$this->switchMode( 'inTableMode' );
2919
				return true;
2920
			case 'col':
2921
				return true; // Ignore the token.
2922
			case 'template':
2923
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2924
			}
2925
			// Fall through for "anything else".
2926
		} elseif ( $token === 'eof' ) {
2927
			return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2928
		}
2929
2930
		// Anything else
2931
		if ( !$this->stack->currentNode()->isA( 'colgroup' ) ) {
2932
			return true; // Ignore the token.
2933
		}
2934
		$this->inColumnGroupMode( 'endtag', 'colgroup' );
2935
		return $this->insertToken( $token, $value, $attribs, $selfclose );
2936
	}
2937
2938
	// Helper function for inTableBodyMode
2939
	private function endSection() {
2940
		if ( !(
2941
			$this->stack->inTableScope( 'tbody' ) ||
2942
			$this->stack->inTableScope( 'thead' ) ||
2943
			$this->stack->inTableScope( 'tfoot' )
2944
		) ) {
2945
			return false;
2946
		}
2947
		$this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
2948
		$this->stack->pop();
2949
		$this->switchMode( 'inTableMode' );
2950
		return true;
2951
	}
2952 View Code Duplication
	private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
2953
		if ( $token === 'tag' ) {
2954
			switch ( $value ) {
2955
			case 'tr':
2956
				$this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
2957
				$this->stack->insertHTMLElement( $value, $attribs );
2958
				$this->switchMode( 'inRowMode' );
2959
				return true;
2960
			case 'th':
2961
			case 'td':
2962
				$this->inTableBodyMode( 'tag', 'tr', [] );
2963
				$this->insertToken( $token, $value, $attribs, $selfclose );
2964
				return true;
2965
			case 'caption':
2966
			case 'col':
2967
			case 'colgroup':
2968
			case 'tbody':
2969
			case 'tfoot':
2970
			case 'thead':
2971
				if ( $this->endSection() ) {
2972
					$this->insertToken( $token, $value, $attribs, $selfclose );
2973
				}
2974
				return true;
2975
			}
2976
		} elseif ( $token === 'endtag' ) {
2977
			switch ( $value ) {
2978
			case 'table':
2979
				if ( $this->endSection() ) {
2980
					$this->insertToken( $token, $value, $attribs, $selfclose );
2981
				}
2982
				return true;
2983
			case 'tbody':
2984
			case 'tfoot':
2985
			case 'thead':
2986
				if ( $this->stack->inTableScope( $value ) ) {
2987
					$this->endSection();
2988
				}
2989
				return true;
2990
			# OMITTED: <body>
2991
			case 'caption':
2992
			case 'col':
2993
			case 'colgroup':
2994
			# OMITTED: <html>
2995
			case 'td':
2996
			case 'th':
2997
			case 'tr':
2998
				return true; // Ignore the token.
2999
			}
3000
		}
3001
		// Anything else:
3002
		return $this->inTableMode( $token, $value, $attribs, $selfclose );
3003
	}
3004
3005
	// Helper function for inRowMode
3006
	private function endRow() {
3007
		if ( !$this->stack->inTableScope( 'tr' ) ) {
3008
			return false;
3009
		}
3010
		$this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3011
		$this->stack->pop();
3012
		$this->switchMode( 'inTableBodyMode' );
3013
		return true;
3014
	}
3015 View Code Duplication
	private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
3016
		if ( $token === 'tag' ) {
3017
			switch ( $value ) {
3018
			case 'th':
3019
			case 'td':
3020
				$this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3021
				$this->stack->insertHTMLElement( $value, $attribs );
3022
				$this->switchMode( 'inCellMode' );
3023
				$this->afe->insertMarker();
3024
				return true;
3025
			case 'caption':
3026
			case 'col':
3027
			case 'colgroup':
3028
			case 'tbody':
3029
			case 'tfoot':
3030
			case 'thead':
3031
			case 'tr':
3032
				if ( $this->endRow() ) {
3033
					$this->insertToken( $token, $value, $attribs, $selfclose );
3034
				}
3035
				return true;
3036
			}
3037
		} elseif ( $token === 'endtag' ) {
3038
			switch ( $value ) {
3039
			case 'tr':
3040
				$this->endRow();
3041
				return true;
3042
			case 'table':
3043
				if ( $this->endRow() ) {
3044
					$this->insertToken( $token, $value, $attribs, $selfclose );
3045
				}
3046
				return true;
3047
			case 'tbody':
3048
			case 'tfoot':
3049
			case 'thead':
3050
				if (
3051
					$this->stack->inTableScope( $value ) &&
3052
					$this->endRow()
3053
				) {
3054
					$this->insertToken( $token, $value, $attribs, $selfclose );
3055
				}
3056
				return true;
3057
			# OMITTED: <body>
3058
			case 'caption':
3059
			case 'col':
3060
			case 'colgroup':
3061
			# OMITTED: <html>
3062
			case 'td':
3063
			case 'th':
3064
				return true; // Ignore the token.
3065
			}
3066
		}
3067
		// Anything else:
3068
		return $this->inTableMode( $token, $value, $attribs, $selfclose );
3069
	}
3070
3071
	// Helper for inCellMode
3072
	private function endCell() {
3073
		if ( $this->stack->inTableScope( 'td' ) ) {
3074
			$this->inCellMode( 'endtag', 'td' );
3075
			return true;
3076
		} elseif ( $this->stack->inTableScope( 'th' ) ) {
3077
			$this->inCellMode( 'endtag', 'th' );
3078
			return true;
3079
		} else {
3080
			return false;
3081
		}
3082
	}
3083
	private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
3084
		if ( $token === 'tag' ) {
3085 View Code Duplication
			switch ( $value ) {
3086
			case 'caption':
3087
			case 'col':
3088
			case 'colgroup':
3089
			case 'tbody':
3090
			case 'td':
3091
			case 'tfoot':
3092
			case 'th':
3093
			case 'thead':
3094
			case 'tr':
3095
				if ( $this->endCell() ) {
3096
					$this->insertToken( $token, $value, $attribs, $selfclose );
3097
				}
3098
				return true;
3099
			}
3100
		} elseif ( $token === 'endtag' ) {
3101
			switch ( $value ) {
3102
			case 'td':
3103 View Code Duplication
			case 'th':
3104
				if ( $this->stack->inTableScope( $value ) ) {
3105
					$this->stack->generateImpliedEndTags();
3106
					$this->stack->popTag( $value );
3107
					$this->afe->clearToMarker();
3108
					$this->switchMode( 'inRowMode' );
3109
				}
3110
				return true;
3111
			# OMITTED: <body>
3112
			case 'caption':
3113
			case 'col':
3114
			case 'colgroup':
3115
			# OMITTED: <html>
3116
				return true;
3117
3118
			case 'table':
3119
			case 'tbody':
3120
			case 'tfoot':
3121
			case 'thead':
3122
			case 'tr':
3123
				if ( $this->stack->inTableScope( $value ) ) {
3124
					$this->stack->generateImpliedEndTags();
3125
					$this->stack->popTag( BalanceSets::$tableCellSet );
3126
					$this->afe->clearToMarker();
3127
					$this->switchMode( 'inRowMode' );
3128
					$this->insertToken( $token, $value, $attribs, $selfclose );
3129
				}
3130
				return true;
3131
			}
3132
		}
3133
		// Anything else:
3134
		return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3135
	}
3136
3137
	# OMITTED: <select>
3138
	/*
3139
	private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
3140
		Assert::invariant( false, 'Unimplemented' );
3141
	}
3142
3143
	private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
3144
		Assert::invariant( false, 'Unimplemented' );
3145
	}
3146
	*/
3147
3148
	private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
3149
		if ( $token === 'text' ) {
3150
			return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3151
		} elseif ( $token === 'eof' ) {
3152
			if ( $this->stack->indexOf( 'template' ) < 0 ) {
3153
				$this->stopParsing();
3154
			} else {
3155
				$this->stack->popTag( 'template' );
3156
				$this->afe->clearToMarker();
3157
				array_pop( $this->templateInsertionModes );
3158
				$this->resetInsertionMode();
3159
				$this->insertToken( $token, $value, $attribs, $selfclose );
3160
			}
3161
			return true;
3162
		} elseif ( $token === 'tag' ) {
3163
			switch ( $value ) {
3164
			case 'base':
3165
			case 'basefont':
3166
			case 'bgsound':
3167
			case 'link':
3168
			case 'meta':
3169
			case 'noframes':
3170
			# OMITTED: <script>
3171
			case 'style':
3172
			case 'template':
3173
			# OMITTED: <title>
3174
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3175
3176
			case 'caption':
3177
			case 'colgroup':
3178
			case 'tbody':
3179
			case 'tfoot':
3180
			case 'thead':
3181
				return $this->switchModeAndReprocess(
3182
					'inTableMode', $token, $value, $attribs, $selfclose
3183
				);
3184
3185
			case 'col':
3186
				return $this->switchModeAndReprocess(
3187
					'inColumnGroupMode', $token, $value, $attribs, $selfclose
3188
				);
3189
3190
			case 'tr':
3191
				return $this->switchModeAndReprocess(
3192
					'inTableBodyMode', $token, $value, $attribs, $selfclose
3193
				);
3194
3195
			case 'td':
3196
			case 'th':
3197
				return $this->switchModeAndReprocess(
3198
					'inRowMode', $token, $value, $attribs, $selfclose
3199
				);
3200
			}
3201
			return $this->switchModeAndReprocess(
3202
				'inBodyMode', $token, $value, $attribs, $selfclose
3203
			);
3204
		} elseif ( $token === 'endtag' ) {
3205
			switch ( $value ) {
3206
			case 'template':
3207
				return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3208
			}
3209
			return true;
3210
		} else {
3211
			Assert::invariant( false, "Bad token type: $token" );
3212
		}
3213
	}
3214
}
3215