Completed
Branch Scrutinizer (3da711)
by Josh
03:32
created

Parser::fosterParent()   A

Complexity

Conditions 6
Paths 5

Size

Total Lines 35
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 6

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 35
ccs 15
cts 15
cp 1
rs 9.2222
c 0
b 0
f 0
cc 6
nc 5
nop 1
crap 6
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2019 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\FilterProcessing;
13
use s9e\TextFormatter\Parser\Logger;
14
use s9e\TextFormatter\Parser\Tag;
15
16
class Parser
17
{
18
	/**#@+
19
	* Boolean rules bitfield
20
	*/
21
	const RULE_AUTO_CLOSE        = 1 << 0;
22
	const RULE_AUTO_REOPEN       = 1 << 1;
23
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
24
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
25
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
26
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
27
	const RULE_IGNORE_TAGS       = 1 << 6;
28
	const RULE_IGNORE_TEXT       = 1 << 7;
29
	const RULE_IGNORE_WHITESPACE = 1 << 8;
30
	const RULE_IS_TRANSPARENT    = 1 << 9;
31
	const RULE_PREVENT_BR        = 1 << 10;
32
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
33
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
34
	/**#@-*/
35
36
	/**
37
	* Bitwise disjunction of rules related to automatic line breaks
38
	*/
39
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
40
41
	/**
42
	* Bitwise disjunction of rules that are inherited by subcontexts
43
	*/
44
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
45
46
	/**
47
	* All the characters that are considered whitespace
48
	*/
49
	const WHITESPACE = " \n\t";
50
51
	/**
52
	* @var array Number of open tags for each tag name
53
	*/
54
	protected $cntOpen;
55
56
	/**
57
	* @var array Number of times each tag has been used
58
	*/
59
	protected $cntTotal;
60
61
	/**
62
	* @var array Current context
63
	*/
64
	protected $context;
65
66
	/**
67
	* @var integer How hard the parser has worked on fixing bad markup so far
68
	*/
69
	protected $currentFixingCost;
70
71
	/**
72
	* @var Tag Current tag being processed
73
	*/
74
	protected $currentTag;
75
76
	/**
77
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
78
	*/
79
	protected $isRich;
80
81
	/**
82
	* @var Logger This parser's logger
83
	*/
84
	protected $logger;
85
86
	/**
87
	* @var integer How hard the parser should work on fixing bad markup
88
	*/
89
	public $maxFixingCost = 10000;
90
91
	/**
92
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
93
	*/
94
	protected $namespaces;
95
96
	/**
97
	* @var array Stack of open tags (instances of Tag)
98
	*/
99
	protected $openTags;
100
101
	/**
102
	* @var string This parser's output
103
	*/
104
	protected $output;
105
106
	/**
107
	* @var integer Position of the cursor in the original text
108
	*/
109
	protected $pos;
110
111
	/**
112
	* @var array Array of callbacks, using plugin names as keys
113
	*/
114
	protected $pluginParsers = [];
115
116
	/**
117
	* @var array Associative array of [pluginName => pluginConfig]
118
	*/
119
	protected $pluginsConfig;
120
121
	/**
122
	* @var array Variables registered for use in filters
123
	*/
124
	public $registeredVars = [];
125
126
	/**
127
	* @var array Root context, used at the root of the document
128
	*/
129
	protected $rootContext;
130
131
	/**
132
	* @var array Tags' config
133
	*/
134
	protected $tagsConfig;
135
136
	/**
137
	* @var array Tag storage
138
	*/
139
	protected $tagStack;
140
141
	/**
142
	* @var bool Whether the tags in the stack are sorted
143
	*/
144
	protected $tagStackIsSorted;
145
146
	/**
147
	* @var string Text being parsed
148
	*/
149
	protected $text;
150
151
	/**
152
	* @var integer Length of the text being parsed
153
	*/
154
	protected $textLen;
155
156
	/**
157
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
158
	*              whether the parser was reset during execution
159
	*/
160
	protected $uid = 0;
161
162
	/**
163
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
164
	*/
165
	protected $wsPos;
166
167
	/**
168
	* Constructor
169
	*/
170 190
	public function __construct(array $config)
171
	{
172 190
		$this->pluginsConfig  = $config['plugins'];
173 190
		$this->registeredVars = $config['registeredVars'];
174 190
		$this->rootContext    = $config['rootContext'];
175 190
		$this->tagsConfig     = $config['tags'];
176
177 190
		$this->__wakeup();
178
	}
179
180
	/**
181
	* Serializer
182
	*
183
	* Returns the properties that need to persist through serialization.
184
	*
185
	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
186
	* of the serializer to the user (e.g. igbinary)
187
	*
188
	* @return array
189
	*/
190 2
	public function __sleep()
191
	{
192 2
		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
193
	}
194
195
	/**
196
	* Unserializer
197
	*
198
	* @return void
199
	*/
200 190
	public function __wakeup()
201
	{
202 190
		$this->logger = new Logger;
203
	}
204
205
	/**
206
	* Reset the parser for a new parsing
207
	*
208
	* @param  string $text Text to be parsed
209
	* @return void
210
	*/
211 182
	protected function reset($text)
212
	{
213
		// Reject invalid UTF-8
214 182
		if (!preg_match('//u', $text))
215
		{
216 1
			throw new InvalidArgumentException('Invalid UTF-8 input');
217
		}
218
219
		// Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
220 181
		$text = preg_replace('/\\r\\n?/', "\n", $text);
221 181
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
222
223
		// Clear the logs
224 181
		$this->logger->clear();
225
226
		// Initialize the rest
227 181
		$this->cntOpen           = [];
228 181
		$this->cntTotal          = [];
229 181
		$this->currentFixingCost = 0;
230 181
		$this->currentTag        = null;
231 181
		$this->isRich            = false;
232 181
		$this->namespaces        = [];
233 181
		$this->openTags          = [];
234 181
		$this->output            = '';
235 181
		$this->pos               = 0;
236 181
		$this->tagStack          = [];
237 181
		$this->tagStackIsSorted  = false;
238 181
		$this->text              = $text;
239 181
		$this->textLen           = strlen($text);
240 181
		$this->wsPos             = 0;
241
242
		// Initialize the root context
243 181
		$this->context = $this->rootContext;
244 181
		$this->context['inParagraph'] = false;
245
246
		// Bump the UID
247 181
		++$this->uid;
248
	}
249
250
	/**
251
	* Set a tag's option
252
	*
253
	* This method ensures that the tag's config is a value and not a reference, to prevent
254
	* potential side-effects. References contained *inside* the tag's config are left untouched
255
	*
256
	* @param  string $tagName     Tag's name
257
	* @param  string $optionName  Option's name
258
	* @param  mixed  $optionValue Option's value
259
	* @return void
260
	*/
261 7
	protected function setTagOption($tagName, $optionName, $optionValue)
262
	{
263 7
		if (isset($this->tagsConfig[$tagName]))
264
		{
265
			// Copy the tag's config and remove it. That will destroy the reference
266 7
			$tagConfig = $this->tagsConfig[$tagName];
267 7
			unset($this->tagsConfig[$tagName]);
268
269
			// Set the new value and replace the tag's config
270 7
			$tagConfig[$optionName]     = $optionValue;
271 7
			$this->tagsConfig[$tagName] = $tagConfig;
272
		}
273
	}
274
275
	//==========================================================================
276
	// Public API
277
	//==========================================================================
278
279
	/**
280
	* Disable a tag
281
	*
282
	* @param  string $tagName Name of the tag
283
	* @return void
284
	*/
285 3
	public function disableTag($tagName)
286
	{
287 3
		$this->setTagOption($tagName, 'isDisabled', true);
288
	}
289
290
	/**
291
	* Enable a tag
292
	*
293
	* @param  string $tagName Name of the tag
294
	* @return void
295
	*/
296 1
	public function enableTag($tagName)
297
	{
298 1
		if (isset($this->tagsConfig[$tagName]))
299
		{
300 1
			unset($this->tagsConfig[$tagName]['isDisabled']);
301
		}
302
	}
303
304
	/**
305
	* Get this parser's Logger instance
306
	*
307
	* @return Logger
308
	*/
309 143
	public function getLogger()
310
	{
311 143
		return $this->logger;
312
	}
313
314
	/**
315
	* Return the last text parsed
316
	*
317
	* This method returns the normalized text, which may be slightly different from the original
318
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
319
	* meant to be used in support of processing log entries, which contain offsets based on the
320
	* normalized text
321
	*
322
	* @see Parser::reset()
323
	*
324
	* @return string
325
	*/
326 142
	public function getText()
327
	{
328 142
		return $this->text;
329
	}
330
331
	/**
332
	* Parse a text
333
	*
334
	* @param  string $text Text to parse
335
	* @return string       XML representation
336
	*/
337 182
	public function parse($text)
338
	{
339
		// Reset the parser and save the uid
340 182
		$this->reset($text);
341 181
		$uid = $this->uid;
342
343
		// Do the heavy lifting
344 181
		$this->executePluginParsers();
345 181
		$this->processTags();
346
347
		// Finalize the document
348 181
		$this->finalizeOutput();
349
350
		// Check the uid in case a plugin or a filter reset the parser mid-execution
351 181
		if ($this->uid !== $uid)
352
		{
353 1
			throw new RuntimeException('The parser has been reset during execution');
354
		}
355
356
		// Log a warning if the fixing cost limit was exceeded
357 181
		if ($this->currentFixingCost > $this->maxFixingCost)
358
		{
359 2
			$this->logger->warn('Fixing cost limit exceeded');
360
		}
361
362 181
		return $this->output;
363
	}
364
365
	/**
366
	* Change a tag's tagLimit
367
	*
368
	* NOTE: the default tagLimit should generally be set during configuration instead
369
	*
370
	* @param  string  $tagName  The tag's name, in UPPERCASE
371
	* @param  integer $tagLimit
372
	* @return void
373
	*/
374 2
	public function setTagLimit($tagName, $tagLimit)
375
	{
376 2
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
377
	}
378
379
	/**
380
	* Change a tag's nestingLimit
381
	*
382
	* NOTE: the default nestingLimit should generally be set during configuration instead
383
	*
384
	* @param  string  $tagName      The tag's name, in UPPERCASE
385
	* @param  integer $nestingLimit
386
	* @return void
387
	*/
388 2
	public function setNestingLimit($tagName, $nestingLimit)
389
	{
390 2
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
391
	}
392
393
	//==========================================================================
394
	// Output handling
395
	//==========================================================================
396
397
	/**
398
	* Finalize the output by appending the rest of the unprocessed text and create the root node
399
	*
400
	* @return void
401
	*/
402 181
	protected function finalizeOutput()
403
	{
404
		// Output the rest of the text and close the last paragraph
405 181
		$this->outputText($this->textLen, 0, true);
406
407
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
408
		do
409
		{
410 181
			$this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
411
		}
412 181
		while ($cnt > 0);
413
414
		// Merge consecutive <i> tags
415 181
		if (strpos($this->output, '</i><i>') !== false)
416
		{
417 1
			$this->output = str_replace('</i><i>', '', $this->output);
418
		}
419
420
		// Remove control characters from the output to ensure it's valid XML
421 181
		$this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F])', '', $this->output);
422
423
		// Encode Unicode characters that are outside of the BMP
424 181
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
425
426
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
427 181
		$tagName = ($this->isRich) ? 'r' : 't';
428
429
		// Prepare the root node with all the namespace declarations
430 181
		$tmp = '<' . $tagName;
431 181
		foreach (array_keys($this->namespaces) as $prefix)
432
		{
433 2
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
434
		}
435
436 181
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
437
	}
438
439
	/**
440
	* Append a tag to the output
441
	*
442
	* @param  Tag  $tag Tag to append
443
	* @return void
444
	*/
445 138
	protected function outputTag(Tag $tag)
446
	{
447 138
		$this->isRich = true;
448
449 138
		$tagName  = $tag->getName();
450 138
		$tagPos   = $tag->getPos();
451 138
		$tagLen   = $tag->getLen();
452 138
		$tagFlags = $tag->getFlags();
453
454 138
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
455
		{
456 11
			$skipBefore = 1;
457 11
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
458
		}
459
		else
460
		{
461 133
			$skipBefore = $skipAfter = 0;
462
		}
463
464
		// Current paragraph must end before the tag if:
465
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
466
		//  - the tag is an end tag (but not self-closing)
467 138
		$closeParagraph = false;
468 138
		if ($tag->isStartTag())
469
		{
470 138
			if ($tagFlags & self::RULE_BREAK_PARAGRAPH)
471
			{
472 138
				$closeParagraph = true;
473
			}
474
		}
475
		else
476
		{
477 104
			$closeParagraph = true;
478
		}
479
480
		// Let the cursor catch up with this tag's position
481 138
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
482
483
		// Capture the text consumed by the tag
484 138
		$tagText = ($tagLen)
485 98
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
486 138
		         : '';
487
488
		// Output current tag
489 138
		if ($tag->isStartTag())
490
		{
491
			// Handle paragraphs before opening the tag
492 138
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
493
			{
494 137
				$this->outputParagraphStart($tagPos);
495
			}
496
497
			// Record this tag's namespace, if applicable
498 138
			$colonPos = strpos($tagName, ':');
499 138
			if ($colonPos)
500
			{
501 2
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
502
			}
503
504
			// Open the start tag and add its attributes, but don't close the tag
505 138
			$this->output .= '<' . $tagName;
506
507
			// We output the attributes in lexical order. Helps canonicalizing the output and could
508
			// prove useful someday
509 138
			$attributes = $tag->getAttributes();
510 138
			ksort($attributes);
511
512 138
			foreach ($attributes as $attrName => $attrValue)
513
			{
514 12
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
515
			}
516
517 138
			if ($tag->isSelfClosingTag())
518
			{
519 47
				if ($tagLen)
520
				{
521 36
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
522
				}
523
				else
524
				{
525 47
					$this->output .= '/>';
526
				}
527
			}
528 104
			elseif ($tagLen)
529
			{
530 70
				$this->output .= '><s>' . $tagText . '</s>';
531
			}
532
			else
533
			{
534 138
				$this->output .= '>';
535
			}
536
		}
537
		else
538
		{
539 104
			if ($tagLen)
540
			{
541 58
				$this->output .= '<e>' . $tagText . '</e>';
542
			}
543
544 104
			$this->output .= '</' . $tagName . '>';
545
		}
546
547
		// Move the cursor past the tag
548 138
		$this->pos = $tagPos + $tagLen;
549
550
		// Skip newlines (no other whitespace) after this tag
551 138
		$this->wsPos = $this->pos;
552 138
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
553
		{
554
			// Decrement the number of lines to skip
555 9
			--$skipAfter;
556
557
			// Move the cursor past the newline
558 9
			++$this->wsPos;
559
		}
560
	}
561
562
	/**
563
	* Output the text between the cursor's position (included) and given position (not included)
564
	*
565
	* @param  integer $catchupPos     Position we're catching up to
566
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
567
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
568
	* @return void
569
	*/
570 181
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
571
	{
572 181
		if ($closeParagraph)
573
		{
574 181
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
575
			{
576 170
				$closeParagraph = false;
577
			}
578
			else
579
			{
580
				// Ignore any number of lines at the end if we're closing a paragraph
581 18
				$maxLines = -1;
582
			}
583
		}
584
585 181
		if ($this->pos >= $catchupPos)
586
		{
587
			// We're already there, close the paragraph if applicable and return
588 133
			if ($closeParagraph)
589
			{
590 4
				$this->outputParagraphEnd();
591
			}
592
593 133
			return;
594
		}
595
596
		// Skip over previously identified whitespace if applicable
597 159
		if ($this->wsPos > $this->pos)
598
		{
599 9
			$skipPos       = min($catchupPos, $this->wsPos);
600 9
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
601 9
			$this->pos     = $skipPos;
602
603 9
			if ($this->pos >= $catchupPos)
604
			{
605
				// Skipped everything. Close the paragraph if applicable and return
606 2
				if ($closeParagraph)
607
				{
608 1
					$this->outputParagraphEnd();
609
				}
610
611 2
				return;
612
			}
613
		}
614
615
		// Test whether we're even supposed to output anything
616 159
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
617
		{
618 5
			$catchupLen  = $catchupPos - $this->pos;
619 5
			$catchupText = substr($this->text, $this->pos, $catchupLen);
620
621
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
622
			// tags
623 5
			if (strspn($catchupText, " \n\t") < $catchupLen)
624
			{
625 5
				$catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
626
			}
627
628 5
			$this->output .= $catchupText;
629 5
			$this->pos = $catchupPos;
630
631 5
			if ($closeParagraph)
632
			{
633 1
				$this->outputParagraphEnd();
634
			}
635
636 5
			return;
637
		}
638
639
		// Compute the amount of text to ignore at the end of the output
640 157
		$ignorePos = $catchupPos;
641 157
		$ignoreLen = 0;
642
643
		// Ignore as many lines (including whitespace) as specified
644 157
		while ($maxLines && --$ignorePos >= $this->pos)
645
		{
646 21
			$c = $this->text[$ignorePos];
647 21
			if (strpos(self::WHITESPACE, $c) === false)
648
			{
649 14
				break;
650
			}
651
652 12
			if ($c === "\n")
653
			{
654 10
				--$maxLines;
655
			}
656
657 12
			++$ignoreLen;
658
		}
659
660
		// Adjust $catchupPos to ignore the text at the end
661 157
		$catchupPos -= $ignoreLen;
662
663
		// Break down the text in paragraphs if applicable
664 157
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
665
		{
666 15
			if (!$this->context['inParagraph'])
667
			{
668 13
				$this->outputWhitespace($catchupPos);
669
670 13
				if ($catchupPos > $this->pos)
671
				{
672 10
					$this->outputParagraphStart($catchupPos);
673
				}
674
			}
675
676
			// Look for a paragraph break in this text
677 15
			$pbPos = strpos($this->text, "\n\n", $this->pos);
678
679 15
			while ($pbPos !== false && $pbPos < $catchupPos)
680
			{
681 3
				$this->outputText($pbPos, 0, true);
682 3
				$this->outputParagraphStart($catchupPos);
683
684 3
				$pbPos = strpos($this->text, "\n\n", $this->pos);
685
			}
686
		}
687
688
		// Capture, escape and output the text
689 157
		if ($catchupPos > $this->pos)
690
		{
691 155
			$catchupText = htmlspecialchars(
692 155
				substr($this->text, $this->pos, $catchupPos - $this->pos),
693 155
				ENT_NOQUOTES,
694 155
				'UTF-8'
695
			);
696
697
			// Format line breaks if applicable
698 155
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
699
			{
700 21
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
701
			}
702
703 155
			$this->output .= $catchupText;
704
		}
705
706
		// Close the paragraph if applicable
707 157
		if ($closeParagraph)
708
		{
709 14
			$this->outputParagraphEnd();
710
		}
711
712
		// Add the ignored text if applicable
713 157
		if ($ignoreLen)
714
		{
715 12
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
716
		}
717
718
		// Move the cursor past the text
719 157
		$this->pos = $catchupPos + $ignoreLen;
720
	}
721
722
	/**
723
	* Output a linebreak tag
724
	*
725
	* @param  Tag  $tag
726
	* @return void
727
	*/
728 6
	protected function outputBrTag(Tag $tag)
729
	{
730 6
		$this->outputText($tag->getPos(), 0, false);
731 6
		$this->output .= '<br/>';
732
	}
733
734
	/**
735
	* Output an ignore tag
736
	*
737
	* @param  Tag  $tag
738
	* @return void
739
	*/
740 18
	protected function outputIgnoreTag(Tag $tag)
741
	{
742 18
		$tagPos = $tag->getPos();
743 18
		$tagLen = $tag->getLen();
744
745
		// Capture the text to ignore
746 18
		$ignoreText = substr($this->text, $tagPos, $tagLen);
747
748
		// Catch up with the tag's position then output the tag
749 18
		$this->outputText($tagPos, 0, false);
750 18
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
751 18
		$this->isRich = true;
752
753
		// Move the cursor past this tag
754 18
		$this->pos = $tagPos + $tagLen;
755
	}
756
757
	/**
758
	* Start a paragraph between current position and given position, if applicable
759
	*
760
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
761
	* @return void
762
	*/
763 144
	protected function outputParagraphStart($maxPos)
764
	{
765
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
766 144
		if ($this->context['inParagraph']
767 144
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
768
		{
769 134
			return;
770
		}
771
772
		// Output the whitespace between $this->pos and $maxPos if applicable
773 16
		$this->outputWhitespace($maxPos);
774
775
		// Open the paragraph, but only if it's not at the very end of the text
776 16
		if ($this->pos < $this->textLen)
777
		{
778 16
			$this->output .= '<p>';
779 16
			$this->context['inParagraph'] = true;
780
		}
781
	}
782
783
	/**
784
	* Close current paragraph at current position if applicable
785
	*
786
	* @return void
787
	*/
788 18
	protected function outputParagraphEnd()
789
	{
790
		// Do nothing if we're not in a paragraph
791 18
		if (!$this->context['inParagraph'])
792
		{
793 3
			return;
794
		}
795
796 16
		$this->output .= '</p>';
797 16
		$this->context['inParagraph'] = false;
798
	}
799
800
	/**
801
	* Output the content of a verbatim tag
802
	*
803
	* @param  Tag  $tag
804
	* @return void
805
	*/
806 4
	protected function outputVerbatim(Tag $tag)
807
	{
808 4
		$flags = $this->context['flags'];
809 4
		$this->context['flags'] = $tag->getFlags();
810 4
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
811 4
		$this->context['flags'] = $flags;
812
	}
813
814
	/**
815
	* Skip as much whitespace after current position as possible
816
	*
817
	* @param  integer $maxPos Rightmost character to be skipped
818
	* @return void
819
	*/
820 18
	protected function outputWhitespace($maxPos)
821
	{
822 18
		if ($maxPos > $this->pos)
823
		{
824 13
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
825
826 13
			if ($spn)
827
			{
828 6
				$this->output .= substr($this->text, $this->pos, $spn);
829 6
				$this->pos += $spn;
830
			}
831
		}
832
	}
833
834
	//==========================================================================
835
	// Plugins handling
836
	//==========================================================================
837
838
	/**
839
	* Disable a plugin
840
	*
841
	* @param  string $pluginName Name of the plugin
842
	* @return void
843
	*/
844 5
	public function disablePlugin($pluginName)
845
	{
846 5
		if (isset($this->pluginsConfig[$pluginName]))
847
		{
848
			// Copy the plugin's config to remove the reference
849 4
			$pluginConfig = $this->pluginsConfig[$pluginName];
850 4
			unset($this->pluginsConfig[$pluginName]);
851
852
			// Update the value and replace the plugin's config
853 4
			$pluginConfig['isDisabled'] = true;
854 4
			$this->pluginsConfig[$pluginName] = $pluginConfig;
855
		}
856
	}
857
858
	/**
859
	* Enable a plugin
860
	*
861
	* @param  string $pluginName Name of the plugin
862
	* @return void
863
	*/
864 2
	public function enablePlugin($pluginName)
865
	{
866 2
		if (isset($this->pluginsConfig[$pluginName]))
867
		{
868 1
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
869
		}
870
	}
871
872
	/**
873
	* Execute given plugin
874
	*
875
	* @param  string $pluginName Plugin's name
876
	* @return void
877
	*/
878 182
	protected function executePluginParser($pluginName)
879
	{
880 182
		$pluginConfig = $this->pluginsConfig[$pluginName];
881 182
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
882
		{
883 1
			return;
884
		}
885
886 181
		$matches = [];
887 181
		if (isset($pluginConfig['regexp']))
888
		{
889 8
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
890 8
			if (empty($matches))
891
			{
892 1
				return;
893
			}
894
		}
895
896
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
897 180
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
898
	}
899
900
	/**
901
	* Execute all the plugins
902
	*
903
	* @return void
904
	*/
905 192
	protected function executePluginParsers()
906
	{
907 192
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
908
		{
909 183
			if (empty($pluginConfig['isDisabled']))
910
			{
911 182
				$this->executePluginParser($pluginName);
912
			}
913
		}
914
	}
915
916
	/**
917
	* Execute given regexp and returns as many matches as given limit
918
	*
919
	* @param  string  $regexp
920
	* @param  integer $limit
921
	* @return array
922
	*/
923 8
	protected function getMatches($regexp, $limit)
924
	{
925 8
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
926 8
		if ($cnt > $limit)
927
		{
928 2
			$matches = array_slice($matches, 0, $limit);
929
		}
930
931 8
		return $matches;
932
	}
933
934
	/**
935
	* Get the cached callback for given plugin's parser
936
	*
937
	* @param  string $pluginName Plugin's name
938
	* @return callable
939
	*/
940 180
	protected function getPluginParser($pluginName)
941
	{
942
		// Cache a new instance of this plugin's parser if there isn't one already
943 180
		if (!isset($this->pluginParsers[$pluginName]))
944
		{
945 3
			$pluginConfig = $this->pluginsConfig[$pluginName];
946 3
			$className = (isset($pluginConfig['className']))
947 1
			           ? $pluginConfig['className']
948 3
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
949
950
			// Register the parser as a callback
951 3
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
952
		}
953
954 180
		return $this->pluginParsers[$pluginName];
955
	}
956
957
	/**
958
	* Register a parser
959
	*
960
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
961
	* existing plugin
962
	*
963
	* @param  string   $pluginName
964
	* @param  callback $parser
965
	* @return void
966
	*/
967 175
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
968
	{
969 175
		if (!is_callable($parser))
970
		{
971 1
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
972
		}
973
		// Create an empty config for this plugin to ensure it is executed
974 174
		if (!isset($this->pluginsConfig[$pluginName]))
975
		{
976 173
			$this->pluginsConfig[$pluginName] = [];
977
		}
978 174
		if (isset($regexp))
979
		{
980 2
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
981 2
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
982
		}
983 174
		$this->pluginParsers[$pluginName] = $parser;
984
	}
985
986
	//==========================================================================
987
	// Rules handling
988
	//==========================================================================
989
990
	/**
991
	* Apply closeAncestor rules associated with given tag
992
	*
993
	* @param  Tag  $tag Tag
994
	* @return bool      Whether a new tag has been added
995
	*/
996 137
	protected function closeAncestor(Tag $tag)
997
	{
998 137
		if (!empty($this->openTags))
999
		{
1000 61
			$tagName   = $tag->getName();
1001 61
			$tagConfig = $this->tagsConfig[$tagName];
1002
1003 61
			if (!empty($tagConfig['rules']['closeAncestor']))
1004
			{
1005 5
				$i = count($this->openTags);
1006
1007 5
				while (--$i >= 0)
1008
				{
1009 5
					$ancestor     = $this->openTags[$i];
1010 5
					$ancestorName = $ancestor->getName();
1011
1012 5
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1013
					{
1014 4
						++$this->currentFixingCost;
1015
1016
						// We have to close this ancestor. First we reinsert this tag...
1017 4
						$this->tagStack[] = $tag;
1018
1019
						// ...then we add a new end tag for it with a better priority
1020 4
						$this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1021
1022 4
						return true;
1023
					}
1024
				}
1025
			}
1026
		}
1027
1028 137
		return false;
1029
	}
1030
1031
	/**
1032
	* Apply closeParent rules associated with given tag
1033
	*
1034
	* @param  Tag  $tag Tag
1035
	* @return bool      Whether a new tag has been added
1036
	*/
1037 137
	protected function closeParent(Tag $tag)
1038
	{
1039 137
		if (!empty($this->openTags))
1040
		{
1041 65
			$tagName   = $tag->getName();
1042 65
			$tagConfig = $this->tagsConfig[$tagName];
1043
1044 65
			if (!empty($tagConfig['rules']['closeParent']))
1045
			{
1046 7
				$parent     = end($this->openTags);
1047 7
				$parentName = $parent->getName();
1048
1049 7
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1050
				{
1051 6
					++$this->currentFixingCost;
1052
1053
					// We have to close that parent. First we reinsert the tag...
1054 6
					$this->tagStack[] = $tag;
1055
1056
					// ...then we add a new end tag for it with a better priority
1057 6
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1058
1059 6
					return true;
1060
				}
1061
			}
1062
		}
1063
1064 137
		return false;
1065
	}
1066
1067
	/**
1068
	* Apply the createChild rules associated with given tag
1069
	*
1070
	* @param  Tag  $tag Tag
1071
	* @return void
1072
	*/
1073 138
	protected function createChild(Tag $tag)
1074
	{
1075 138
		$tagConfig = $this->tagsConfig[$tag->getName()];
1076 138
		if (isset($tagConfig['rules']['createChild']))
1077
		{
1078 3
			$priority = -1000;
1079 3
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1080 3
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1081
			{
1082 3
				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1083
			}
1084
		}
1085
	}
1086
1087
	/**
1088
	* Apply fosterParent rules associated with given tag
1089
	*
1090
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1091
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1092
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1093
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1094
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1095
	*       loop from running indefinitely
1096
	*
1097
	* @param  Tag  $tag Tag
1098
	* @return bool      Whether a new tag has been added
1099
	*/
1100 137
	protected function fosterParent(Tag $tag)
1101
	{
1102 137
		if (!empty($this->openTags))
1103
		{
1104 68
			$tagName   = $tag->getName();
1105 68
			$tagConfig = $this->tagsConfig[$tagName];
1106
1107 68
			if (!empty($tagConfig['rules']['fosterParent']))
1108
			{
1109 13
				$parent     = end($this->openTags);
1110 13
				$parentName = $parent->getName();
1111
1112 13
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1113
				{
1114 12
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1115
					{
1116 11
						$this->addFosterTag($tag, $parent);
1117
					}
1118
1119
					// Reinsert current tag
1120 12
					$this->tagStack[] = $tag;
1121
1122
					// And finally close its parent with a priority that ensures it is processed
1123
					// before this tag
1124 12
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1125
1126
					// Adjust the fixing cost to account for the additional tags/processing
1127 12
					$this->currentFixingCost += 4;
1128
1129 12
					return true;
1130
				}
1131
			}
1132
		}
1133
1134 137
		return false;
1135
	}
1136
1137
	/**
1138
	* Apply requireAncestor rules associated with given tag
1139
	*
1140
	* @param  Tag  $tag Tag
1141
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1142
	*/
1143 140
	protected function requireAncestor(Tag $tag)
1144
	{
1145 140
		$tagName   = $tag->getName();
1146 140
		$tagConfig = $this->tagsConfig[$tagName];
1147
1148 140
		if (isset($tagConfig['rules']['requireAncestor']))
1149
		{
1150 3
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1151
			{
1152 3
				if (!empty($this->cntOpen[$ancestorName]))
1153
				{
1154 1
					return false;
1155
				}
1156
			}
1157
1158 2
			$this->logger->err('Tag requires an ancestor', [
1159 2
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1160 2
				'tag'             => $tag
1161
			]);
1162
1163 2
			return true;
1164
		}
1165
1166 138
		return false;
1167
	}
1168
1169
	//==========================================================================
1170
	// Tag processing
1171
	//==========================================================================
1172
1173
	/**
1174
	* Create and add a copy of a tag as a child of a given tag
1175
	*
1176
	* @param  Tag  $tag       Current tag
1177
	* @param  Tag  $fosterTag Tag to foster
1178
	* @return void
1179
	*/
1180 11
	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1181
	{
1182 11
		list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1183
1184
		// Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1185 11
		$childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1186 11
		$tag->cascadeInvalidationTo($childTag);
1187
	}
1188
1189
	/**
1190
	* Create and add an end tag for given start tag at given position
1191
	*
1192
	* @param  Tag     $startTag Start tag
1193
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1194
	* @param  integer $prio     End tag's priority
1195
	* @return Tag
1196
	*/
1197 35
	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1198
	{
1199 35
		$tagName = $startTag->getName();
1200
1201
		// Adjust the end tag's position if whitespace is to be minimized
1202 35
		if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1203
		{
1204 3
			$tagPos = $this->getMagicEndPos($tagPos);
1205
		}
1206
1207
		// Add a 0-width end tag that is paired with the given start tag
1208 35
		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1209 35
		$endTag->pairWith($startTag);
1210
1211 35
		return $endTag;
1212
	}
1213
1214
	/**
1215
	* Compute the position of a magic end tag, adjusted for whitespace
1216
	*
1217
	* @param  integer $tagPos Rightmost possible position for the tag
1218
	* @return integer
1219
	*/
1220 5
	protected function getMagicEndPos($tagPos)
1221
	{
1222
		// Back up from given position to the cursor's position until we find a character that
1223
		// is not whitespace
1224 5
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1225
		{
1226 5
			--$tagPos;
1227
		}
1228
1229 5
		return $tagPos;
1230
	}
1231
1232
	/**
1233
	* Compute the position and priority of a magic start tag, adjusted for whitespace
1234
	*
1235
	* @param  integer   $tagPos Leftmost possible position for the tag
1236
	* @return integer[]         [Tag pos, priority]
1237
	*/
1238 11
	protected function getMagicStartCoords($tagPos)
1239
	{
1240 11
		if (empty($this->tagStack))
1241
		{
1242
			// Set the next position outside the text boundaries
1243 3
			$nextPos  = $this->textLen + 1;
1244 3
			$nextPrio = 0;
1245
		}
1246
		else
1247
		{
1248 10
			$nextTag  = end($this->tagStack);
1249 10
			$nextPos  = $nextTag->getPos();
1250 10
			$nextPrio = $nextTag->getSortPriority();
1251
		}
1252
1253
		// Find the first non-whitespace position before next tag or the end of text
1254 11
		while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1255
		{
1256 1
			++$tagPos;
1257
		}
1258
1259
		// Set a priority that ensures this tag appears before the next tag
1260 11
		$prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1261
1262 11
		return [$tagPos, $prio];
1263
	}
1264
1265
	/**
1266
	* Test whether given start tag is immediately followed by a closing tag
1267
	*
1268
	* @param  Tag  $tag Start tag
1269
	* @return bool
1270
	*/
1271 3
	protected function isFollowedByClosingTag(Tag $tag)
1272
	{
1273 3
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1274
	}
1275
1276
	/**
1277
	* Process all tags in the stack
1278
	*
1279
	* @return void
1280
	*/
1281 181
	protected function processTags()
1282
	{
1283 181
		if (empty($this->tagStack))
1284
		{
1285 24
			return;
1286
		}
1287
1288
		// Initialize the count tables
1289 157
		foreach (array_keys($this->tagsConfig) as $tagName)
1290
		{
1291 143
			$this->cntOpen[$tagName]  = 0;
1292 143
			$this->cntTotal[$tagName] = 0;
1293
		}
1294
1295
		// Process the tag stack, close tags that were left open and repeat until done
1296
		do
1297
		{
1298 157
			while (!empty($this->tagStack))
1299
			{
1300 157
				if (!$this->tagStackIsSorted)
1301
				{
1302 157
					$this->sortTags();
1303
				}
1304
1305 157
				$this->currentTag = array_pop($this->tagStack);
1306 157
				$this->processCurrentTag();
1307
			}
1308
1309
			// Close tags that were left open
1310 157
			foreach ($this->openTags as $startTag)
1311
			{
1312
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1313
				//       the stack is processed in LIFO order, it means that tags get closed in
1314
				//       the correct order, from descendants to ancestors
1315 19
				$this->addMagicEndTag($startTag, $this->textLen);
1316
			}
1317
		}
1318 157
		while (!empty($this->tagStack));
1319
	}
1320
1321
	/**
1322
	* Process current tag
1323
	*
1324
	* @return void
1325
	*/
1326 157
	protected function processCurrentTag()
1327
	{
1328
		// Invalidate current tag if tags are disabled and current tag would not close the last open
1329
		// tag and is not a system tag
1330 157
		if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1331 157
		 && !$this->currentTag->canClose(end($this->openTags))
1332 157
		 && !$this->currentTag->isSystemTag())
1333
		{
1334 4
			$this->currentTag->invalidate();
1335
		}
1336
1337 157
		$tagPos = $this->currentTag->getPos();
1338 157
		$tagLen = $this->currentTag->getLen();
1339
1340
		// Test whether the cursor passed this tag's position already
1341 157
		if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1342
		{
1343
			// Test whether this tag is paired with a start tag and this tag is still open
1344 16
			$startTag = $this->currentTag->getStartTag();
1345
1346 16
			if ($startTag && in_array($startTag, $this->openTags, true))
1347
			{
1348
				// Create an end tag that matches current tag's start tag, which consumes as much of
1349
				// the same text as current tag and is paired with the same start tag
1350 2
				$this->addEndTag(
1351 2
					$startTag->getName(),
1352 2
					$this->pos,
1353 2
					max(0, $tagPos + $tagLen - $this->pos)
1354 2
				)->pairWith($startTag);
1355
1356
				// Note that current tag is not invalidated, it's merely replaced
1357 2
				return;
1358
			}
1359
1360
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1361 14
			if ($this->currentTag->isIgnoreTag())
1362
			{
1363 2
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1364
1365 2
				if ($ignoreLen > 0)
1366
				{
1367
					// Create a new ignore tag and move on
1368 1
					$this->addIgnoreTag($this->pos, $ignoreLen);
1369
1370 1
					return;
1371
				}
1372
			}
1373
1374
			// Skipped tags are invalidated
1375 13
			$this->currentTag->invalidate();
1376
		}
1377
1378 157
		if ($this->currentTag->isInvalid())
1379
		{
1380 18
			return;
1381
		}
1382
1383 157
		if ($this->currentTag->isIgnoreTag())
1384
		{
1385 10
			$this->outputIgnoreTag($this->currentTag);
1386
		}
1387 152
		elseif ($this->currentTag->isBrTag())
1388
		{
1389
			// Output the tag if it's allowed, ignore it otherwise
1390 7
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1391
			{
1392 7
				$this->outputBrTag($this->currentTag);
1393
			}
1394
		}
1395 148
		elseif ($this->currentTag->isParagraphBreak())
1396
		{
1397 4
			$this->outputText($this->currentTag->getPos(), 0, true);
1398
		}
1399 145
		elseif ($this->currentTag->isVerbatim())
1400
		{
1401 4
			$this->outputVerbatim($this->currentTag);
1402
		}
1403 141
		elseif ($this->currentTag->isStartTag())
1404
		{
1405 140
			$this->processStartTag($this->currentTag);
1406
		}
1407
		else
1408
		{
1409 105
			$this->processEndTag($this->currentTag);
1410
		}
1411
	}
1412
1413
	/**
1414
	* Process given start tag (including self-closing tags) at current position
1415
	*
1416
	* @param  Tag  $tag Start tag (including self-closing)
1417
	* @return void
1418
	*/
1419 140
	protected function processStartTag(Tag $tag)
1420
	{
1421 140
		$tagName   = $tag->getName();
1422 140
		$tagConfig = $this->tagsConfig[$tagName];
1423
1424
		// 1. Check that this tag has not reached its global limit tagLimit
1425
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1426
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1427
		// 4. Check for nestingLimit
1428
		// 5. Apply requireAncestor rules
1429
		//
1430
		// This order ensures that the tag is valid and within the set limits before we attempt to
1431
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1432
		// limits, whether this tag is allowed within current context (the context may change
1433
		// as ancestors are closed) or whether the required ancestors are still there (they might
1434
		// have been closed by a rule.)
1435 140
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1436
		{
1437 2
			$this->logger->err(
1438 2
				'Tag limit exceeded',
1439
				[
1440 2
					'tag'      => $tag,
1441 2
					'tagName'  => $tagName,
1442 2
					'tagLimit' => $tagConfig['tagLimit']
1443
				]
1444
			);
1445 2
			$tag->invalidate();
1446
1447 2
			return;
1448
		}
1449
1450 140
		FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1451 140
		if ($tag->isInvalid())
1452
		{
1453 1
			return;
1454
		}
1455
1456 140
		if ($this->currentFixingCost < $this->maxFixingCost)
1457
		{
1458 137
			if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1459
			{
1460
				// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1461 21
				return;
1462
			}
1463
		}
1464
1465 140
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1466
		{
1467 2
			$this->logger->err(
1468 2
				'Nesting limit exceeded',
1469
				[
1470 2
					'tag'          => $tag,
1471 2
					'tagName'      => $tagName,
1472 2
					'nestingLimit' => $tagConfig['nestingLimit']
1473
				]
1474
			);
1475 2
			$tag->invalidate();
1476
1477 2
			return;
1478
		}
1479
1480 140
		if (!$this->tagIsAllowed($tagName))
1481
		{
1482 7
			$msg     = 'Tag is not allowed in this context';
1483 7
			$context = ['tag' => $tag, 'tagName' => $tagName];
1484 7
			if ($tag->getLen() > 0)
1485
			{
1486 6
				$this->logger->warn($msg, $context);
1487
			}
1488
			else
1489
			{
1490 1
				$this->logger->debug($msg, $context);
1491
			}
1492 7
			$tag->invalidate();
1493
1494 7
			return;
1495
		}
1496
1497 140
		if ($this->requireAncestor($tag))
1498
		{
1499 2
			$tag->invalidate();
1500
1501 2
			return;
1502
		}
1503
1504
		// If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1505
		// immediately followed by an end tag, we replace it with a self-closing tag with the same
1506
		// properties
1507 138
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1508 138
		 && !$tag->isSelfClosingTag()
1509 138
		 && !$tag->getEndTag()
1510 138
		 && !$this->isFollowedByClosingTag($tag))
1511
		{
1512 2
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1513 2
			$newTag->setAttributes($tag->getAttributes());
1514 2
			$newTag->setFlags($tag->getFlags());
1515
1516 2
			$tag = $newTag;
1517
		}
1518
1519 138
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1520 138
		 && !$tag->getEndTag()
1521 138
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1522
		{
1523 1
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1524
		}
1525
1526
		// This tag is valid, output it and update the context
1527 138
		$this->outputTag($tag);
1528 138
		$this->pushContext($tag);
1529
1530
		// Apply the createChild rules if applicable
1531 138
		$this->createChild($tag);
1532
	}
1533
1534
	/**
1535
	* Process given end tag at current position
1536
	*
1537
	* @param  Tag  $tag end tag
1538
	* @return void
1539
	*/
1540 105
	protected function processEndTag(Tag $tag)
1541
	{
1542 105
		$tagName = $tag->getName();
1543
1544 105
		if (empty($this->cntOpen[$tagName]))
1545
		{
1546
			// This is an end tag with no start tag
1547 9
			return;
1548
		}
1549
1550
		/**
1551
		* @var array List of tags need to be closed before given tag
1552
		*/
1553 104
		$closeTags = [];
1554
1555
		// Iterate through all open tags from last to first to find a match for our tag
1556 104
		$i = count($this->openTags);
1557 104
		while (--$i >= 0)
1558
		{
1559 104
			$openTag = $this->openTags[$i];
1560
1561 104
			if ($tag->canClose($openTag))
1562
			{
1563 104
				break;
1564
			}
1565
1566 26
			$closeTags[] = $openTag;
1567 26
			++$this->currentFixingCost;
1568
		}
1569
1570 104
		if ($i < 0)
1571
		{
1572
			// Did not find a matching tag
1573 2
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1574
1575 2
			return;
1576
		}
1577
1578
		// Accumulate flags to determine whether whitespace should be trimmed
1579 104
		$flags = $tag->getFlags();
1580 104
		foreach ($closeTags as $openTag)
1581
		{
1582 25
			$flags |= $openTag->getFlags();
1583
		}
1584 104
		$ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1585
1586
		// Only reopen tags if we haven't exceeded our "fixing" budget
1587 104
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1588
1589
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1590
		// reopened
1591 104
		$reopenTags = [];
1592 104
		foreach ($closeTags as $openTag)
1593
		{
1594 25
			$openTagName = $openTag->getName();
1595
1596
			// Test whether this tag should be reopened automatically
1597 25
			if ($keepReopening)
1598
			{
1599 23
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1600
				{
1601 12
					$reopenTags[] = $openTag;
1602
				}
1603
				else
1604
				{
1605 11
					$keepReopening = false;
1606
				}
1607
			}
1608
1609
			// Find the earliest position we can close this open tag
1610 25
			$tagPos = $tag->getPos();
1611 25
			if ($ignoreWhitespace)
1612
			{
1613 5
				$tagPos = $this->getMagicEndPos($tagPos);
1614
			}
1615
1616
			// Output an end tag to close this start tag, then update the context
1617 25
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1618 25
			$endTag->setFlags($openTag->getFlags());
1619 25
			$this->outputTag($endTag);
1620 25
			$this->popContext();
1621
		}
1622
1623
		// Output our tag, moving the cursor past it, then update the context
1624 104
		$this->outputTag($tag);
1625 104
		$this->popContext();
1626
1627
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1628
		// close tags that are already being closed now. Also, filter our list of tags being
1629
		// reopened by removing those that would immediately be closed
1630 104
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1631
		{
1632
			/**
1633
			* @var integer Rightmost position of the portion of text to ignore
1634
			*/
1635 23
			$ignorePos = $this->pos;
1636
1637 23
			$i = count($this->tagStack);
1638 23
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1639
			{
1640 15
				$upcomingTag = $this->tagStack[$i];
1641
1642
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1643
				// strictly an end tag (not a start tag or a self-closing tag)
1644 15
				if ($upcomingTag->getPos() > $ignorePos
1645 15
				 || $upcomingTag->isStartTag())
1646
				{
1647 9
					break;
1648
				}
1649
1650
				// Test whether this tag would close any of the tags we're about to reopen
1651 10
				$j = count($closeTags);
1652
1653 10
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1654
				{
1655 10
					if ($upcomingTag->canClose($closeTags[$j]))
1656
					{
1657
						// Remove the tag from the lists and reset the keys
1658 9
						array_splice($closeTags, $j, 1);
1659
1660 9
						if (isset($reopenTags[$j]))
1661
						{
1662 7
							array_splice($reopenTags, $j, 1);
1663
						}
1664
1665
						// Extend the ignored text to cover this tag
1666 9
						$ignorePos = max(
1667 9
							$ignorePos,
1668 9
							$upcomingTag->getPos() + $upcomingTag->getLen()
1669
						);
1670
1671 9
						break;
1672
					}
1673
				}
1674
			}
1675
1676 23
			if ($ignorePos > $this->pos)
1677
			{
1678
				/**
1679
				* @todo have a method that takes (pos,len) rather than a Tag
1680
				*/
1681 8
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1682
			}
1683
		}
1684
1685
		// Re-add tags that need to be reopened, at current cursor position
1686 104
		foreach ($reopenTags as $startTag)
1687
		{
1688 8
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1689
1690
			// Re-pair the new tag
1691 8
			$endTag = $startTag->getEndTag();
1692 8
			if ($endTag)
1693
			{
1694 1
				$newTag->pairWith($endTag);
1695
			}
1696
		}
1697
	}
1698
1699
	/**
1700
	* Update counters and replace current context with its parent context
1701
	*
1702
	* @return void
1703
	*/
1704 104
	protected function popContext()
1705
	{
1706 104
		$tag = array_pop($this->openTags);
1707 104
		--$this->cntOpen[$tag->getName()];
1708 104
		$this->context = $this->context['parentContext'];
1709
	}
1710
1711
	/**
1712
	* Update counters and replace current context with a new context based on given tag
1713
	*
1714
	* If given tag is a self-closing tag, the context won't change
1715
	*
1716
	* @param  Tag  $tag Start tag (including self-closing)
1717
	* @return void
1718
	*/
1719 138
	protected function pushContext(Tag $tag)
1720
	{
1721 138
		$tagName   = $tag->getName();
1722 138
		$tagFlags  = $tag->getFlags();
1723 138
		$tagConfig = $this->tagsConfig[$tagName];
1724
1725 138
		++$this->cntTotal[$tagName];
1726
1727
		// If this is a self-closing tag, the context remains the same
1728 138
		if ($tag->isSelfClosingTag())
1729
		{
1730 47
			return;
1731
		}
1732
1733
		// Recompute the allowed tags
1734 104
		$allowed = [];
1735 104
		if ($tagFlags & self::RULE_IS_TRANSPARENT)
1736
		{
1737 48
			foreach ($this->context['allowed'] as $k => $v)
1738
			{
1739 48
				$allowed[] = $tagConfig['allowed'][$k] & $v;
1740
			}
1741
		}
1742
		else
1743
		{
1744 57
			foreach ($this->context['allowed'] as $k => $v)
1745
			{
1746 57
				$allowed[] = $tagConfig['allowed'][$k] & (($v & 0xFF00) | ($v >> 8));
1747
			}
1748
		}
1749
1750
		// Use this tag's flags as a base for this context and add inherited rules
1751 104
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1752
1753
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1754 104
		if ($flags & self::RULE_DISABLE_AUTO_BR)
1755
		{
1756 2
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1757
		}
1758
1759 104
		++$this->cntOpen[$tagName];
1760 104
		$this->openTags[] = $tag;
1761 104
		$this->context = [
1762 104
			'allowed'       => $allowed,
1763 104
			'flags'         => $flags,
1764
			'inParagraph'   => false,
1765 104
			'parentContext' => $this->context
1766
		];
1767
	}
1768
1769
	/**
1770
	* Return whether given tag is allowed in current context
1771
	*
1772
	* @param  string $tagName
1773
	* @return bool
1774
	*/
1775 140
	protected function tagIsAllowed($tagName)
1776
	{
1777 140
		$n = $this->tagsConfig[$tagName]['bitNumber'];
1778
1779 140
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1780
	}
1781
1782
	//==========================================================================
1783
	// Tag stack
1784
	//==========================================================================
1785
1786
	/**
1787
	* Add a start tag
1788
	*
1789
	* @param  string  $name Name of the tag
1790
	* @param  integer $pos  Position of the tag in the text
1791
	* @param  integer $len  Length of text consumed by the tag
1792
	* @param  integer $prio Tag's priority
1793
	* @return Tag
1794
	*/
1795 123
	public function addStartTag($name, $pos, $len, $prio = 0)
1796
	{
1797 123
		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1798
	}
1799
1800
	/**
1801
	* Add an end tag
1802
	*
1803
	* @param  string  $name Name of the tag
1804
	* @param  integer $pos  Position of the tag in the text
1805
	* @param  integer $len  Length of text consumed by the tag
1806
	* @param  integer $prio Tag's priority
1807
	* @return Tag
1808
	*/
1809 110
	public function addEndTag($name, $pos, $len, $prio = 0)
1810
	{
1811 110
		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1812
	}
1813
1814
	/**
1815
	* Add a self-closing tag
1816
	*
1817
	* @param  string  $name Name of the tag
1818
	* @param  integer $pos  Position of the tag in the text
1819
	* @param  integer $len  Length of text consumed by the tag
1820
	* @param  integer $prio Tag's priority
1821
	* @return Tag
1822
	*/
1823 68
	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1824
	{
1825 68
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1826
	}
1827
1828
	/**
1829
	* Add a 0-width "br" tag to force a line break at given position
1830
	*
1831
	* @param  integer $pos  Position of the tag in the text
1832
	* @param  integer $prio Tag's priority
1833
	* @return Tag
1834
	*/
1835 9
	public function addBrTag($pos, $prio = 0)
1836
	{
1837 9
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1838
	}
1839
1840
	/**
1841
	* Add an "ignore" tag
1842
	*
1843
	* @param  integer $pos  Position of the tag in the text
1844
	* @param  integer $len  Length of text consumed by the tag
1845
	* @param  integer $prio Tag's priority
1846
	* @return Tag
1847
	*/
1848 12
	public function addIgnoreTag($pos, $len, $prio = 0)
1849
	{
1850 12
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1851
	}
1852
1853
	/**
1854
	* Add a paragraph break at given position
1855
	*
1856
	* Uses a zero-width tag that is actually never output in the result
1857
	*
1858
	* @param  integer $pos  Position of the tag in the text
1859
	* @param  integer $prio Tag's priority
1860
	* @return Tag
1861
	*/
1862 5
	public function addParagraphBreak($pos, $prio = 0)
1863
	{
1864 5
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1865
	}
1866
1867
	/**
1868
	* Add a copy of given tag at given position and length
1869
	*
1870
	* @param  Tag     $tag  Original tag
1871
	* @param  integer $pos  Copy's position
1872
	* @param  integer $len  Copy's length
1873
	* @param  integer $prio Copy's priority (same as original by default)
1874
	* @return Tag           Copy tag
1875
	*/
1876 22
	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1877
	{
1878 22
		if (!isset($prio))
1879
		{
1880 11
			$prio = $tag->getSortPriority();
1881
		}
1882 22
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1883 22
		$copy->setAttributes($tag->getAttributes());
1884
1885 22
		return $copy;
1886
	}
1887
1888
	/**
1889
	* Add a tag
1890
	*
1891
	* @param  integer $type Tag's type
1892
	* @param  string  $name Name of the tag
1893
	* @param  integer $pos  Position of the tag in the text
1894
	* @param  integer $len  Length of text consumed by the tag
1895
	* @param  integer $prio Tag's priority
1896
	* @return Tag
1897
	*/
1898 186
	protected function addTag($type, $name, $pos, $len, $prio)
1899
	{
1900
		// Create the tag
1901 186
		$tag = new Tag($type, $name, $pos, $len, $prio);
1902
1903
		// Set this tag's rules bitfield
1904 186
		if (isset($this->tagsConfig[$name]))
1905
		{
1906 166
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1907
		}
1908
1909
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1910
		// position is negative or if it's out of bounds
1911 186
		if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1912 186
		 || $this->isInvalidTextSpan($pos, $len))
1913
		{
1914 10
			$tag->invalidate();
1915
		}
1916 178
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1917
		{
1918 1
			$this->logger->warn(
1919 1
				'Tag is disabled',
1920
				[
1921 1
					'tag'     => $tag,
1922 1
					'tagName' => $name
1923
				]
1924
			);
1925 1
			$tag->invalidate();
1926
		}
1927
		else
1928
		{
1929 177
			$this->insertTag($tag);
1930
		}
1931
1932 186
		return $tag;
1933
	}
1934
1935
	/**
1936
	* Test whether given text span is outside text boundaries or an invalid UTF sequence
1937
	*
1938
	* @param  integer $pos Start of text
1939
	* @param  integer $len Length of text
1940
	* @return bool
1941
	*/
1942 184
	protected function isInvalidTextSpan($pos, $len)
1943
	{
1944 184
		return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1945
	}
1946
1947
	/**
1948
	* Insert given tag in the tag stack
1949
	*
1950
	* @param  Tag  $tag
1951
	* @return void
1952
	*/
1953 177
	protected function insertTag(Tag $tag)
1954
	{
1955 177
		if (!$this->tagStackIsSorted)
1956
		{
1957 177
			$this->tagStack[] = $tag;
1958
		}
1959
		else
1960
		{
1961
			// Scan the stack and copy every tag to the next slot until we find the correct index
1962 49
			$i   = count($this->tagStack);
1963 49
			$key = $this->getSortKey($tag);
1964 49
			while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1965
			{
1966 3
				$this->tagStack[$i] = $this->tagStack[$i - 1];
1967 3
				--$i;
1968
			}
1969 49
			$this->tagStack[$i] = $tag;
1970
		}
1971
	}
1972
1973
	/**
1974
	* Add a pair of tags
1975
	*
1976
	* @param  string  $name     Name of the tags
1977
	* @param  integer $startPos Position of the start tag
1978
	* @param  integer $startLen Length of the start tag
1979
	* @param  integer $endPos   Position of the start tag
1980
	* @param  integer $endLen   Length of the start tag
1981
	* @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1982
	* @return Tag               Start tag
1983
	*/
1984 25
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1985
	{
1986
		// NOTE: the end tag is added first to try to keep the stack in the correct order
1987 25
		$endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1988 25
		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1989 25
		$startTag->pairWith($endTag);
1990
1991 25
		return $startTag;
1992
	}
1993
1994
	/**
1995
	* Add a tag that represents a verbatim copy of the original text
1996
	*
1997
	* @param  integer $pos  Position of the tag in the text
1998
	* @param  integer $len  Length of text consumed by the tag
1999
	* @param  integer $prio Tag's priority
2000
	* @return Tag
2001
	*/
2002 4
	public function addVerbatim($pos, $len, $prio = 0)
2003
	{
2004 4
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
2005
	}
2006
2007
	/**
2008
	* Sort tags by position and precedence
2009
	*
2010
	* @return void
2011
	*/
2012 164
	protected function sortTags()
2013
	{
2014 164
		$arr = [];
2015 164
		foreach ($this->tagStack as $i => $tag)
2016
		{
2017 164
			$key       = $this->getSortKey($tag, $i);
2018 164
			$arr[$key] = $tag;
2019
		}
2020 164
		krsort($arr);
2021
2022 164
		$this->tagStack         = array_values($arr);
2023 164
		$this->tagStackIsSorted = true;
2024
	}
2025
2026
	/**
2027
	* Generate a key for given tag that can be used to compare its position using lexical comparisons
2028
	*
2029
	* Tags are sorted by position first, then by priority, then by whether they consume any text,
2030
	* then by length, and finally in order of their creation.
2031
	*
2032
	* The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2033
	* are at the end of the array.
2034
	*
2035
	* @param  Tag     $tag
2036
	* @param  integer $tagIndex
2037
	* @return string
2038
	*/
2039 164
	protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2040
	{
2041
		// Ensure that negative values are sorted correctly by flagging them and making them positive
2042 164
		$prioFlag = ($tag->getSortPriority() >= 0);
2043 164
		$prio     = $tag->getSortPriority();
2044 164
		if (!$prioFlag)
2045
		{
2046 33
			$prio += (1 << 30);
2047
		}
2048
2049
		// Sort 0-width tags separately from the rest
2050 164
		$lenFlag = ($tag->getLen() > 0);
2051 164
		if ($lenFlag)
2052
		{
2053
			// Inverse their length so that longest matches are processed first
2054 117
			$lenOrder = $this->textLen - $tag->getLen();
2055
		}
2056
		else
2057
		{
2058
			// Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2059
			// pairs
2060
			$order = [
2061 88
				Tag::END_TAG          => 0,
2062 88
				Tag::SELF_CLOSING_TAG => 1,
2063 88
				Tag::START_TAG        => 2
2064
			];
2065 88
			$lenOrder = $order[$tag->getType()];
2066
		}
2067
2068 164
		return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2069
	}
2070
}