Completed
Push — master ( 872339...c28159 )
by Todd
03:07
created

gan_parser_html.php (21 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * @author Niels A.D.
4
 * @author Todd Burry <[email protected]>
5
 * @copyright 2010 Niels A.D., 2014 Todd Burry
6
 * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1
7
 * @package pQuery
8
 */
9
10
namespace pQuery;
11
12
/**
13
 * Parses a HTML document
14
 *
15
 * Functionality can be extended by overriding functions or adjusting the tag map.
16
 * Document may contain small errors, the parser will try to recover and resume parsing.
17
 */
18
class HtmlParserBase extends TokenizerBase {
19
20
	/**
21
	 * Tag open token, used for "<"
22
	 */
23
	const TOK_TAG_OPEN = 100;
24
	/**
25
	 * Tag close token, used for ">"
26
	 */
27
	const TOK_TAG_CLOSE = 101;
28
	/**
29
	 * Forward slash token, used for "/"
30
	 */
31
	const TOK_SLASH_FORWARD = 103;
32
	/**
33
	 * Backslash token, used for "\"
34
	 */
35
	const TOK_SLASH_BACKWARD = 104;
36
	/**
37
	 * String token, used for attribute values (" and ')
38
	 */
39
	const TOK_STRING = 104;
40
	/**
41
	 * Equals token, used for "="
42
	 */
43
	const TOK_EQUALS = 105;
44
45
	/**
46
	 * Sets HTML identifiers, tags/attributes are considered identifiers
47
	 * @see TokenizerBase::$identifiers
48
	 * @access private
49
	 */
50
	var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890:-_!?%';
51
52
	/**
53
	 * Status of the parser (tagname, closing tag, etc)
54
	 * @var array
55
	 */
56
	var $status = array();
57
58
	/**
59
	 * Map characters to match their tokens
60
	 * @see TokenizerBase::$custom_char_map
61
	 * @access private
62
	 */
63
	var $custom_char_map = array(
64
		'<' => self::TOK_TAG_OPEN,
65
		'>' => self::TOK_TAG_CLOSE,
66
		"'" => 'parse_string',
67
		'"' => 'parse_string',
68
		'/' => self::TOK_SLASH_FORWARD,
69
		'\\' => self::TOK_SLASH_BACKWARD,
70
		'=' => self::TOK_EQUALS
71
	);
72
73 37
	function __construct($doc = '', $pos = 0) {
74 37
		parent::__construct($doc, $pos);
75 37
		$this->parse_all();
76 37
	}
77
78
	#php4 PHP4 class constructor compatibility
79
	#function HtmlParserBase($doc = '', $pos = 0) {return $this->__construct($doc, $pos);}
0 ignored issues
show
Unused Code Comprehensibility introduced by
60% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
80
	#php4e
81
82
	/**
83
	 Callback functions for certain tags
84
	 @var array (TAG_NAME => FUNCTION_NAME)
85
	 @internal Function should be a method in the class
86
	 @internal Tagname should be lowercase and is everything after <, e.g. "?php" or "!doctype"
87
	 @access private
88
	 */
89
	var $tag_map = array(
90
		'!doctype' => 'parse_doctype',
91
		'?' => 'parse_php',
92
		'?php' => 'parse_php',
93
		'%' => 'parse_asp',
94
		'style' => 'parse_style',
95
		'script' => 'parse_script'
96
	);
97
98
	/**
99
	 * Parse a HTML string (attributes)
100
	 * @internal Gets called with ' and "
101
	 * @return int
102
	 */
103 33
	protected function parse_string() {
104 33
		if ($this->next_pos($this->doc[$this->pos], false) !== self::TOK_UNKNOWN) {
105
			--$this->pos;
106
		}
107 33
		return self::TOK_STRING;
108
	}
109
110
	/**
111
	 * Parse text between tags
112
	 * @internal Gets called between tags, uses {@link $status}[last_pos]
113
	 * @internal Stores text in {@link $status}[text]
114
	 */
115 37
	function parse_text() {
116 37
		$len = $this->pos - 1 - $this->status['last_pos'];
117 37
		$this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : '');
118 37
	}
119
120
	/**
121
	 * Parse comment tags
122
	 * @internal Gets called with HTML comments ("<!--")
123
	 * @internal Stores text in {@link $status}[comment]
124
	 * @return bool
125
	 */
126 9
	function parse_comment() {
127 9
		$this->pos += 3;
128 9
		if ($this->next_pos('-->', false) !== self::TOK_UNKNOWN) {
129
			$this->status['comment'] = $this->getTokenString(1, -1);
130
			--$this->pos;
131
		} else {
132 9
			$this->status['comment'] = $this->getTokenString(1, -1);
133 9
			$this->pos += 2;
134
		}
135 9
		$this->status['last_pos'] = $this->pos;
136
137 9
		return true;
138
	}
139
140
	/**
141
	 * Parse doctype tag
142
	 * @internal Gets called with doctype ("<!doctype")
143
	 * @internal Stores text in {@link $status}[dtd]
144
	 * @return bool
145
	 */
146 9
	function parse_doctype() {
147 9
		$start = $this->pos;
148 9
		if ($this->next_search('[>', false) === self::TOK_UNKNOWN)  {
149 9
			if ($this->doc[$this->pos] === '[') {
150
				if (($this->next_pos(']', false) !== self::TOK_UNKNOWN) || ($this->next_pos('>', false) !== self::TOK_UNKNOWN)) {
151
					$this->addError('Invalid doctype');
152
					return false;
153
				}
154
			}
155
156 9
			$this->token_start = $start;
157 9
			$this->status['dtd'] = $this->getTokenString(2, -1);
158 9
			$this->status['last_pos'] = $this->pos;
159 9
			return true;
160
		} else {
161
			$this->addError('Invalid doctype');
162
			return false;
163
		}
164
	}
165
166
	/**
167
	 * Parse cdata tag
168
	 * @internal Gets called with cdata ("<![cdata")
169
	 * @internal Stores text in {@link $status}[cdata]
170
	 * @return bool
171
	 */
172
	function parse_cdata() {
173
		if ($this->next_pos(']]>', false) === self::TOK_UNKNOWN) {
174
			$this->status['cdata'] = $this->getTokenString(9, -1);
175
			$this->status['last_pos'] = $this->pos + 2;
176
			return true;
177
		} else {
178
			$this->addError('Invalid cdata tag');
179
			return false;
180
		}
181
	}
182
183
	/**
184
	 * Parse php tags
185
	 * @internal Gets called with php tags ("<?php")
186
	 * @return bool
187
	 */
188
	function parse_php() {
189
		$start = $this->pos;
190
		if ($this->next_pos('?>', false) !== self::TOK_UNKNOWN) {
191
			$this->pos -= 2; //End of file
192
		}
193
194
		$len = $this->pos - 1 - $start;
195
		$this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
196
		$this->status['last_pos'] = ++$this->pos;
197
		return true;
198
	}
199
200
	/**
201
	 * Parse asp tags
202
	 * @internal Gets called with asp tags ("<%")
203
	 * @return bool
204
	 */
205
	function parse_asp() {
206
		$start = $this->pos;
207
		if ($this->next_pos('%>', false) !== self::TOK_UNKNOWN) {
208
			$this->pos -= 2; //End of file
209
		}
210
211
		$len = $this->pos - 1 - $start;
212
		$this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
213
		$this->status['last_pos'] = ++$this->pos;
214
		return true;
215
	}
216
217
	/**
218
	 * Parse style tags
219
	 * @internal Gets called with php tags ("<style>")
220
	 * @return bool
221
	 */
222 9
	function parse_style() {
223 9
		if ($this->parse_attributes() && ($this->token === self::TOK_TAG_CLOSE) && ($start = $this->pos) && ($this->next_pos('</style>', false) === self::TOK_UNKNOWN)) {
224 9
			$len = $this->pos - 1 - $start;
225 9
			$this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
226
227 9
			$this->pos += 7;
228 9
			$this->status['last_pos'] = $this->pos;
229 9
			return true;
230
		} else {
231
			$this->addError('No end for style tag found');
232
			return false;
233
		}
234
	}
235
236
	/**
237
	 * Parse script tags
238
	 * @internal Gets called with php tags ("<script>")
239
	 * @return bool
240
	 */
241
	function parse_script() {
242
		if ($this->parse_attributes() && ($this->token === self::TOK_TAG_CLOSE) && ($start = $this->pos) && ($this->next_pos('</script>', false) === self::TOK_UNKNOWN)) {
243
			$len = $this->pos - 1 - $start;
244
			$this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
245
246
			$this->pos += 8;
247
			$this->status['last_pos'] = $this->pos;
248
			return true;
249
		} else {
250
			$this->addError('No end for script tag found');
251
			return false;
252
		}
253
	}
254
255
	/**
256
	 * Parse conditional tags (+ all conditional tags inside)
257
	 * @internal Gets called with IE conditionals ("<![if]" and "<!--[if]")
258
	 * @internal Stores condition in {@link $status}[tag_condition]
259
	 * @return bool
260
	 */
261
	function parse_conditional() {
262
		if ($this->status['closing_tag']) {
263
			$this->pos += 8;
264
		} else {
265
			$this->pos += (($this->status['comment']) ? 5 : 3);
266
			if ($this->next_pos(']', false) !== self::TOK_UNKNOWN) {
267
				$this->addError('"]" not found in conditional tag');
268
				return false;
269
			}
270
			$this->status['tag_condition'] = $this->getTokenString(0, -1);
271
		}
272
273
		if ($this->next_no_whitespace() !== self::TOK_TAG_CLOSE) {
274
			$this->addError('No ">" tag found 2 for conditional tag');
275
			return false;
276
		}
277
278
		if ($this->status['comment']) {
279
			$this->status['last_pos'] = $this->pos;
280
			if ($this->next_pos('-->', false) !== self::TOK_UNKNOWN) {
281
				$this->addError('No ending tag found for conditional tag');
282
				$this->pos = $this->size - 1;
283
284
				$len = $this->pos - 1 - $this->status['last_pos'];
285
				$this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : '');
286
			} else {
287
				$len = $this->pos - 10 - $this->status['last_pos'];
288
				$this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : '');
289
				$this->pos += 2;
290
			}
291
		}
292
293
		$this->status['last_pos'] = $this->pos;
294
		return true;
295
	}
296
297
	/**
298
	 * Parse attributes (names + value)
299
	 * @internal Stores attributes in {@link $status}[attributes] (array(ATTR => VAL))
300
	 * @return bool
301
	 */
302 37
	function parse_attributes() {
303 37
		$this->status['attributes'] = array();
304
305 37
		while ($this->next_no_whitespace() === self::TOK_IDENTIFIER) {
306 34
			$attr = $this->getTokenString();
307 34
			if (($attr === '?') || ($attr === '%')) {
308
				//Probably closing tags
309
				break;
310
			}
311
312 34
			if ($this->next_no_whitespace() === self::TOK_EQUALS) {
313 34
				if ($this->next_no_whitespace() === self::TOK_STRING) {
314 33
					$val = $this->getTokenString(1, -1);
315 33
				} else {
316 1
					$this->token_start = $this->pos;
317 1
					if (!isset($stop)) {
318 1
						$stop = $this->whitespace;
319 1
						$stop['<'] = true;
320 1
						$stop['>'] = true;
321 1
					}
322
323 1
					while ((++$this->pos < $this->size) && (!isset($stop[$this->doc[$this->pos]]))) {
324
						// Do nothing.
325 1
					}
326 1
					--$this->pos;
327
328 1
					$val = $this->getTokenString();
329
330 1
					if (trim($val) === '') {
331
						$this->addError('Invalid attribute value');
332
						return false;
333
					}
334
				}
335 34
			} else {
336 9
				$val = $attr;
337 9
				$this->pos = (($this->token_start) ? $this->token_start : $this->pos) - 1;
338
			}
339
340 34
			$this->status['attributes'][$attr] = $val;
341 34
		}
342
343 37
		return true;
344
	}
345
346
	/**
347
	 * Default callback for tags
348
	 * @internal Gets called after the tagname (<html*ENTERS_HERE* attribute="value">)
349
	 * @return bool
350
	 */
351 37
	function parse_tag_default() {
352 37
		if ($this->status['closing_tag']) {
353 37
			$this->status['attributes'] = array();
354 37
			$this->next_no_whitespace();
355 37
		} else {
356 37
			if (!$this->parse_attributes()) {
357
				return false;
358
			}
359
		}
360
361 37
		if ($this->token !== self::TOK_TAG_CLOSE) {
362 9
			if ($this->token === self::TOK_SLASH_FORWARD) {
363 9
				$this->status['self_close'] = true;
364 9
				$this->next();
365 9
			} elseif ((($this->status['tag_name'][0] === '?') && ($this->doc[$this->pos] === '?')) || (($this->status['tag_name'][0] === '%') && ($this->doc[$this->pos] === '%'))) {
366
				$this->status['self_close'] = true;
367
				$this->pos++;
368
369
				if (isset($this->char_map[$this->doc[$this->pos]]) && (!is_string($this->char_map[$this->doc[$this->pos]]))) {
370
					$this->token = $this->char_map[$this->doc[$this->pos]];
371
				} else {
372
					$this->token = self::TOK_UNKNOWN;
373
				}
374
			}/* else {
0 ignored issues
show
Unused Code Comprehensibility introduced by
56% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
375
				$this->status['self_close'] = false;
376
			}*/
377 9
		}
378
379 37
		if ($this->token !== self::TOK_TAG_CLOSE) {
380
			$this->addError('Expected ">", but found "'.$this->getTokenString().'"');
381
			if ($this->next_pos('>', false) !== self::TOK_UNKNOWN) {
382
				$this->addError('No ">" tag found for "'.$this->status['tag_name'].'" tag');
383
				return false;
384
			}
385
		}
386
387 37
		return true;
388
	}
389
390
	/**
391
	 * Parse tag
392
	 * @internal Gets called after opening tag (<*ENTERS_HERE*html attribute="value">)
393
	 * @internal Stores information about the tag in {@link $status} (comment, closing_tag, tag_name)
394
	 * @return bool
395
	 */
396 37
	function parse_tag() {
397 37
		$start = $this->pos;
398 37
		$this->status['self_close'] = false;
399 37
		$this->parse_text();
400
401 37
		$next = (($this->pos + 1) < $this->size) ? $this->doc[$this->pos + 1] : '';
402 37
		if ($next === '!') {
403 9
			$this->status['closing_tag'] = false;
404
405 9
			if (substr($this->doc, $this->pos + 2, 2) === '--') {
406 9
				$this->status['comment'] = true;
407
408 9
				if (($this->doc[$this->pos + 4] === '[') && (strcasecmp(substr($this->doc, $this->pos + 5, 2), 'if') === 0)) {
409
					return $this->parse_conditional();
410
				} else {
411 9
					return $this->parse_comment();
412
				}
413
			} else {
414 9
				$this->status['comment'] = false;
415
416 9
				if ($this->doc[$this->pos + 2] === '[') {
417
					if (strcasecmp(substr($this->doc, $this->pos + 3, 2), 'if') === 0) {
418
						return $this->parse_conditional();
419
					} elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), 'endif') === 0) {
420
						$this->status['closing_tag'] = true;
421
						return $this->parse_conditional();
422
					} elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), 'cdata') === 0) {
423
						return $this->parse_cdata();
424
					}
425
				}
426
			}
427 37
		} elseif ($next === '/') {
428 37
			$this->status['closing_tag'] = true;
429 37
			++$this->pos;
430 37
		} else {
431 37
			$this->status['closing_tag'] = false;
432
		}
433
434 37
		if ($this->next() !== self::TOK_IDENTIFIER) {
435
			$this->addError('Tagname expected');
436
			//if ($this->next_pos('>', false) === self::TOK_UNKNOWN) {
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
437
				$this->status['last_pos'] = $start - 1;
438
				return true;
439
			//} else {
440
			//	return false;
441
			//}
442
		}
443
444 37
		$tag = $this->getTokenString();
445 37
		$this->status['tag_name'] = $tag;
446 37
		$tag = strtolower($tag);
447
448 37
		if (isset($this->tag_map[$tag])) {
449 9
			$res = $this->{$this->tag_map[$tag]}();
450 9
		} else {
451 37
			$res = $this->parse_tag_default();
452
		}
453
454 37
		$this->status['last_pos'] = $this->pos;
455 37
		return $res;
456
	}
457
458
	/**
459
	 * Parse full document
460
	 * @return bool
461
	 */
462 37
	function parse_all() {
463 37
		$this->errors = array();
464 37
		$this->status['last_pos'] = -1;
465
466 37
		if (($this->token === self::TOK_TAG_OPEN) || ($this->next_pos('<', false) === self::TOK_UNKNOWN)) {
467
			do {
468 37
				if (!$this->parse_tag()) {
469
					return false;
470
				}
471 37
			} while ($this->next_pos('<') !== self::TOK_NULL);
472 37
		}
473
474 37
		$this->pos = $this->size;
475 37
		$this->parse_text();
476
477 37
		return true;
478
	}
479
}
480
481
/**
482
 * Parses a HTML document into a HTML DOM
483
 */
484
class HtmlParser extends HtmlParserBase {
485
486
	/**
487
	 * Root object
488
	 * @internal If string, then it will create a new instance as root
489
	 * @var DomNode
490
	 */
491
	var $root = 'pQuery\\DomNode';
492
493
	/**
494
	 * Current parsing hierarchy
495
	 * @internal Root is always at index 0, current tag is at the end of the array
496
	 * @var array
497
	 * @access private
498
	 */
499
	var $hierarchy = array();
500
501
	/**
502
	 * Tags that don't need closing tags
503
	 * @var array
504
	 * @access private
505
	 */
506
	var	$tags_selfclose = array(
507
		'area'		=> true,
508
		'base'		=> true,
509
		'basefont'	=> true,
510
		'br'		=> true,
511
		'col'		=> true,
512
		'command'	=> true,
513
		'embed'		=> true,
514
		'frame'		=> true,
515
		'hr'		=> true,
516
		'img'		=> true,
517
		'input'		=> true,
518
		'ins'		=> true,
519
		'keygen'	=> true,
520
		'link'		=> true,
521
		'meta'		=> true,
522
		'param'		=> true,
523
		'source'	=> true,
524
		'track'		=> true,
525
		'wbr'		=> true
526
	);
527
528
	/**
529
	 * Class constructor
530
	 * @param string $doc Document to be tokenized
531
	 * @param int $pos Position to start parsing
532
	 * @param DomNode $root Root node, null to auto create
533
	 */
534 37
	function __construct($doc = '', $pos = 0, $root = null) {
535 37
		if ($root === null) {
536 37
			$root = new $this->root('~root~', null);
537 37
		}
538 37
		$this->root =& $root;
539
540 37
		parent::__construct($doc, $pos);
541 37
	}
542
543
	#php4 PHP4 class constructor compatibility
544
	#function HtmlParser($doc = '', $pos = 0, $root = null) {return $this->__construct($doc, $pos, $root);}
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
545
	#php4e
546
547
	/**
548
	 * Class magic invoke method, performs {@link select()}
549
	 * @return array
550
	 * @access private
551
	 */
552
	function __invoke($query = '*') {
553
		return $this->select($query);
554
	}
555
556
	/**
557
	 * Class magic toString method, performs {@link DomNode::toString()}
558
	 * @return string
559
	 * @access private
560
	 */
561
	function __toString() {
562
		return $this->root->getInnerText();
563
	}
564
565
	/**
566
	 * Performs a css select query on the root node
567
	 * @see DomNode::select()
568
	 * @return array
569
	 */
570
	function select($query = '*', $index = false, $recursive = true, $check_self = false) {
571
		return $this->root->select($query, $index, $recursive, $check_self);
572
	}
573
574
	/**
575
	 * Updates the current hierarchy status and checks for
576
	 * correct opening/closing of tags
577
	 * @param bool $self_close Is current tag self closing? Null to use {@link tags_selfclose}
578
	 * @internal This is were most of the nodes get added
579
	 * @access private
580
	 */
581 37
	protected function parse_hierarchy($self_close = null) {
582 37
		if ($self_close === null) {
583
			$this->status['self_close'] = ($self_close = isset($this->tags_selfclose[strtolower($this->status['tag_name'])]));
584
		}
585
586 37
		if ($self_close) {
587 9
			if ($this->status['closing_tag']) {
588
589
				//$c = end($this->hierarchy)->children
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
590
				$c = $this->hierarchy[count($this->hierarchy) - 1]->children;
591
				$found = false;
592
				for ($count = count($c), $i = $count - 1; $i >= 0; $i--) {
593
					if (strcasecmp($c[$i]->tag, $this->status['tag_name']) === 0) {
594
						for($ii = $i + 1; $ii < $count; $ii++) {
595
							$index = null; //Needs to be passed by ref
596
							$c[$i + 1]->changeParent($c[$i], $index);
597
						}
598
						$c[$i]->self_close = false;
599
600
						$found = true;
601
						break;
602
					}
603
				}
604
605
				if (!$found) {
606
					$this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open');
607
				}
608
609 9
			} elseif ($this->status['tag_name'][0] === '?') {
610
				//end($this->hierarchy)->addXML($this->status['tag_name'], '', $this->status['attributes']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
75% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
611
				$index = null; //Needs to be passed by ref
612
				$this->hierarchy[count($this->hierarchy) - 1]->addXML($this->status['tag_name'], '', $this->status['attributes'], $index);
613 9
			} elseif ($this->status['tag_name'][0] === '%') {
614
				//end($this->hierarchy)->addASP($this->status['tag_name'], '', $this->status['attributes']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
75% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
615
				$index = null; //Needs to be passed by ref
616
				$this->hierarchy[count($this->hierarchy) - 1]->addASP($this->status['tag_name'], '', $this->status['attributes'], $index);
617
			} else {
618
				//end($this->hierarchy)->addChild($this->status);
0 ignored issues
show
Unused Code Comprehensibility introduced by
72% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
619 9
				$index = null; //Needs to be passed by ref
620 9
				$this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
621
			}
622 37
		} elseif ($this->status['closing_tag']) {
623 37
			$found = false;
624 37
			for ($count = count($this->hierarchy), $i = $count - 1; $i >= 0; $i--) {
625 37
				if (strcasecmp($this->hierarchy[$i]->tag, $this->status['tag_name']) === 0) {
626
627 37
					for($ii = ($count - $i - 1); $ii >= 0; $ii--) {
628 37
						$e = array_pop($this->hierarchy);
629 37
						if ($ii > 0) {
630
							$this->addError('Closing tag "'.$this->status['tag_name'].'" while "'.$e->tag.'" is not closed yet');
631
						}
632 37
					}
633
634 37
					$found = true;
635 37
					break;
636
				}
637
			}
638
639 37
			if (!$found) {
640
				$this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open');
641
			}
642
643 37
		} else {
644
			//$this->hierarchy[] = end($this->hierarchy)->addChild($this->status);
0 ignored issues
show
Unused Code Comprehensibility introduced by
64% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
645 37
			$index = null; //Needs to be passed by ref
646 37
			$this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
647
		}
648 37
	}
649
650
	function parse_cdata() {
651
		if (!parent::parse_cdata()) {return false;}
652
653
		//end($this->hierarchy)->addCDATA($this->status['cdata']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
77% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
654
		$index = null; //Needs to be passed by ref
655
		$this->hierarchy[count($this->hierarchy) - 1]->addCDATA($this->status['cdata'], $index);
656
		return true;
657
	}
658
659 9
	function parse_comment() {
660 9
		if (!parent::parse_comment()) {return false;}
661
662
		//end($this->hierarchy)->addComment($this->status['comment']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
77% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
663 9
		$index = null; //Needs to be passed by ref
664 9
		$this->hierarchy[count($this->hierarchy) - 1]->addComment($this->status['comment'], $index);
665 9
		return true;
666
	}
667
668
	function parse_conditional() {
669
		if (!parent::parse_conditional()) {return false;}
670
671
		if ($this->status['comment']) {
672
			//$e = end($this->hierarchy)->addConditional($this->status['tag_condition'], true);
0 ignored issues
show
Unused Code Comprehensibility introduced by
67% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
673
			$index = null; //Needs to be passed by ref
674
			$e = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status['tag_condition'], true, $index);
675
			if ($this->status['text'] !== '') {
676
				$index = null; //Needs to be passed by ref
677
				$e->addText($this->status['text'], $index);
678
			}
679
		} else {
680
			if ($this->status['closing_tag']) {
681
				$this->parse_hierarchy(false);
682
			} else {
683
				//$this->hierarchy[] = end($this->hierarchy)->addConditional($this->status['tag_condition'], false);
0 ignored issues
show
Unused Code Comprehensibility introduced by
68% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
684
				$index = null; //Needs to be passed by ref
685
				$this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status['tag_condition'], false, $index);
686
			}
687
		}
688
689
		return true;
690
	}
691
692 9
	function parse_doctype() {
693 9
		if (!parent::parse_doctype()) {return false;}
694
695
		//end($this->hierarchy)->addDoctype($this->status['dtd']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
77% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
696 9
		$index = null; //Needs to be passed by ref
697 9
		$this->hierarchy[count($this->hierarchy) - 1]->addDoctype($this->status['dtd'], $index);
698 9
		return true;
699
	}
700
701
	function parse_php() {
702
		if (!parent::parse_php()) {return false;}
703
704
		//end($this->hierarchy)->addXML('php', $this->status['text']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
75% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
705
		$index = null; //Needs to be passed by ref
706
		$this->hierarchy[count($this->hierarchy) - 1]->addXML('php', $this->status['text'], $index);
707
		return true;
708
	}
709
710
	function parse_asp() {
711
		if (!parent::parse_asp()) {return false;}
712
713
		//end($this->hierarchy)->addASP('', $this->status['text']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
75% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
714
		$index = null; //Needs to be passed by ref
715
		$this->hierarchy[count($this->hierarchy) - 1]->addASP('', $this->status['text'], $index);
716
		return true;
717
	}
718
719
	function parse_script() {
720
		if (!parent::parse_script()) {return false;}
721
722
		//$e = end($this->hierarchy)->addChild($this->status);
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
723
		$index = null; //Needs to be passed by ref
724
		$e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
725
		if ($this->status['text'] !== '') {
726
			$index = null; //Needs to be passed by ref
727
			$e->addText($this->status['text'], $index);
728
		}
729
		return true;
730
	}
731
732 9
	function parse_style() {
733 9
		if (!parent::parse_style()) {return false;}
734
735
		//$e = end($this->hierarchy)->addChild($this->status);
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
736 9
		$index = null; //Needs to be passed by ref
737 9
		$e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
738 9
		if ($this->status['text'] !== '') {
739 9
			$index = null; //Needs to be passed by ref
740 9
			$e->addText($this->status['text'], $index);
741 9
		}
742 9
		return true;
743
	}
744
745 37
	function parse_tag_default() {
746 37
		if (!parent::parse_tag_default()) {return false;}
747
748 37
		$this->parse_hierarchy(($this->status['self_close']) ? true : null);
749 37
		return true;
750
	}
751
752 37
	function parse_text() {
753 37
		parent::parse_text();
754 37
		if ($this->status['text'] !== '') {
755
			//end($this->hierarchy)->addText($this->status['text']);
0 ignored issues
show
Unused Code Comprehensibility introduced by
77% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
756 37
			$index = null; //Needs to be passed by ref
757 37
			$this->hierarchy[count($this->hierarchy) - 1]->addText($this->status['text'], $index);
758 37
		}
759 37
	}
760
761 37
	function parse_all() {
762 37
		$this->hierarchy = array(&$this->root);
763 37
		return ((parent::parse_all()) ? $this->root : false);
0 ignored issues
show
Bug Compatibility introduced by
The expression parent::parse_all() ? $this->root : false; of type pQuery\DomNode|false adds the type pQuery\DomNode to the return on line 763 which is incompatible with the return type of the parent method pQuery\HtmlParserBase::parse_all of type boolean.
Loading history...
764
	}
765
}
766
767
/**
768
 * HTML5 specific parser (adds support for omittable closing tags)
769
 */
770
class Html5Parser extends HtmlParser {
771
772
	/**
773
	 * Tags with ommitable closing tags
774
	 * @var array array('tag2' => 'tag1') will close tag1 if following (not child) tag is tag2
775
	 * @access private
776
	 */
777
	var $tags_optional_close = array(
778
		//Current tag	=> Previous tag
779
		'li' 			=> array('li' => true),
780
		'dt' 			=> array('dt' => true, 'dd' => true),
781
		'dd' 			=> array('dt' => true, 'dd' => true),
782
		'address' 		=> array('p' => true),
783
		'article' 		=> array('p' => true),
784
		'aside' 		=> array('p' => true),
785
		'blockquote' 	=> array('p' => true),
786
		'dir' 			=> array('p' => true),
787
		'div' 			=> array('p' => true),
788
		'dl' 			=> array('p' => true),
789
		'fieldset' 		=> array('p' => true),
790
		'footer' 		=> array('p' => true),
791
		'form' 			=> array('p' => true),
792
		'h1' 			=> array('p' => true),
793
		'h2' 			=> array('p' => true),
794
		'h3' 			=> array('p' => true),
795
		'h4' 			=> array('p' => true),
796
		'h5' 			=> array('p' => true),
797
		'h6' 			=> array('p' => true),
798
		'header' 		=> array('p' => true),
799
		'hgroup' 		=> array('p' => true),
800
		'hr' 			=> array('p' => true),
801
		'menu' 			=> array('p' => true),
802
		'nav' 			=> array('p' => true),
803
		'ol' 			=> array('p' => true),
804
		'p' 			=> array('p' => true),
805
		'pre' 			=> array('p' => true),
806
		'section' 		=> array('p' => true),
807
		'table' 		=> array('p' => true),
808
		'ul' 			=> array('p' => true),
809
		'rt'			=> array('rt' => true, 'rp' => true),
810
		'rp'			=> array('rt' => true, 'rp' => true),
811
		'optgroup'		=> array('optgroup' => true, 'option' => true),
812
		'option'		=> array('option'),
813
		'tbody'			=> array('thread' => true, 'tbody' => true, 'tfoot' => true),
814
		'tfoot'			=> array('thread' => true, 'tbody' => true),
815
		'tr'			=> array('tr' => true),
816
		'td'			=> array('td' => true, 'th' => true),
817
		'th'			=> array('td' => true, 'th' => true),
818
		'body'			=> array('head' => true)
819
	);
820
821 37
	protected function parse_hierarchy($self_close = null) {
822 37
		$tag_curr = strtolower($this->status['tag_name']);
823 37
		if ($self_close === null) {
824 37
			$this->status['self_close'] = ($self_close = isset($this->tags_selfclose[$tag_curr]));
825 37
		}
826
827 37
		if (! ($self_close || $this->status['closing_tag'])) {
828
			//$tag_prev = strtolower(end($this->hierarchy)->tag);
0 ignored issues
show
Unused Code Comprehensibility introduced by
57% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
829 37
			$tag_prev = strtolower($this->hierarchy[count($this->hierarchy) - 1]->tag);
830 37
			if (isset($this->tags_optional_close[$tag_curr]) && isset($this->tags_optional_close[$tag_curr][$tag_prev])) {
831
				array_pop($this->hierarchy);
832
			}
833 37
		}
834
835 37
		return parent::parse_hierarchy($self_close);
836
	}
837
}
838
839
?>