Parser::outputTag()   F
last analyzed

Complexity

Conditions 16
Paths 432

Size

Total Lines 103
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 41
CRAP Score 16

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 44
c 1
b 0
f 0
dl 0
loc 103
ccs 41
cts 41
cp 1
rs 2.1888
cc 16
nc 432
nop 1
crap 16

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\FilterProcessing;
13
use s9e\TextFormatter\Parser\Logger;
14
use s9e\TextFormatter\Parser\Tag;
15
16
class Parser
17
{
18
	/**#@+
19
	* Boolean rules bitfield
20
	*/
21
	const RULE_AUTO_CLOSE        = 1 << 0;
22
	const RULE_AUTO_REOPEN       = 1 << 1;
23
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
24
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
25
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
26
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
27
	const RULE_IGNORE_TAGS       = 1 << 6;
28
	const RULE_IGNORE_TEXT       = 1 << 7;
29
	const RULE_IGNORE_WHITESPACE = 1 << 8;
30
	const RULE_IS_TRANSPARENT    = 1 << 9;
31
	const RULE_PREVENT_BR        = 1 << 10;
32
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
33
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
34
	/**#@-*/
35
36
	/**
37
	* Bitwise disjunction of rules related to automatic line breaks
38
	*/
39
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
40
41
	/**
42
	* Bitwise disjunction of rules that are inherited by subcontexts
43
	*/
44
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
45
46
	/**
47
	* All the characters that are considered whitespace
48
	*/
49
	const WHITESPACE = " \n\t";
50
51
	/**
52
	* @var array Number of open tags for each tag name
53
	*/
54
	protected $cntOpen;
55
56
	/**
57
	* @var array Number of times each tag has been used
58
	*/
59
	protected $cntTotal;
60
61
	/**
62
	* @var array Current context
63
	*/
64
	protected $context;
65
66
	/**
67
	* @var integer How hard the parser has worked on fixing bad markup so far
68
	*/
69
	protected $currentFixingCost;
70
71
	/**
72
	* @var Tag Current tag being processed
73
	*/
74
	protected $currentTag;
75
76
	/**
77
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
78
	*/
79
	protected $isRich;
80
81
	/**
82
	* @var Logger This parser's logger
83
	*/
84
	protected $logger;
85
86
	/**
87
	* @var integer How hard the parser should work on fixing bad markup
88
	*/
89
	public $maxFixingCost = 10000;
90
91
	/**
92
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
93
	*/
94
	protected $namespaces;
95
96
	/**
97
	* @var array Stack of open tags (instances of Tag)
98
	*/
99
	protected $openTags;
100
101
	/**
102
	* @var string This parser's output
103
	*/
104
	protected $output;
105
106
	/**
107
	* @var integer Position of the cursor in the original text
108
	*/
109
	protected $pos;
110
111
	/**
112
	* @var array Array of callbacks, using plugin names as keys
113
	*/
114
	protected $pluginParsers = [];
115
116
	/**
117
	* @var array Associative array of [pluginName => pluginConfig]
118
	*/
119
	protected $pluginsConfig;
120
121
	/**
122
	* @var array Variables registered for use in filters
123
	*/
124
	public $registeredVars = [];
125
126
	/**
127
	* @var array Root context, used at the root of the document
128
	*/
129
	protected $rootContext;
130
131
	/**
132
	* @var array Tags' config
133
	*/
134
	protected $tagsConfig;
135
136
	/**
137
	* @var array Tag storage
138
	*/
139
	protected $tagStack;
140
141
	/**
142
	* @var bool Whether the tags in the stack are sorted
143
	*/
144
	protected $tagStackIsSorted;
145
146
	/**
147
	* @var string Text being parsed
148
	*/
149
	protected $text;
150
151
	/**
152
	* @var integer Length of the text being parsed
153
	*/
154
	protected $textLen;
155
156
	/**
157
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
158
	*              whether the parser was reset during execution
159
	*/
160
	protected $uid = 0;
161
162
	/**
163
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
164
	*/
165
	protected $wsPos;
166
167
	/**
168
	* Constructor
169
	*/
170 191
	public function __construct(array $config)
171
	{
172 191
		$this->pluginsConfig  = $config['plugins'];
173 191
		$this->registeredVars = $config['registeredVars'];
174 191
		$this->rootContext    = $config['rootContext'];
175 191
		$this->tagsConfig     = $config['tags'];
176
177 191
		$this->__wakeup();
178
	}
179
180
	/**
181
	* Serializer
182
	*
183
	* Returns the properties that need to persist through serialization.
184
	*
185
	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
186
	* of the serializer to the user (e.g. igbinary)
187
	*
188
	* @return array
189
	*/
190 2
	public function __sleep()
191
	{
192 2
		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
193
	}
194
195
	/**
196
	* Unserializer
197
	*
198
	* @return void
199
	*/
200 191
	public function __wakeup()
201
	{
202 191
		$this->logger = new Logger;
203
	}
204
205
	/**
206
	* Reset the parser for a new parsing
207
	*
208
	* @param  string $text Text to be parsed
209
	* @return void
210
	*/
211 183
	protected function reset($text)
212
	{
213
		// Reject invalid UTF-8
214 183
		if (!preg_match('//u', $text))
215
		{
216 1
			throw new InvalidArgumentException('Invalid UTF-8 input');
217
		}
218
219
		// Normalize CR/CRLF to LF, remove characters that aren't allowed in XML
220 182
		$text = preg_replace('/\\r\\n?/', "\n", $text);
221 182
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]|\\xEF\\xBF[\\xBE\\xBF]/', '', $text);
222
223
		// Clear the logs
224 182
		$this->logger->clear();
225
226
		// Initialize the rest
227 182
		$this->cntOpen           = [];
228 182
		$this->cntTotal          = [];
229 182
		$this->currentFixingCost = 0;
230 182
		$this->currentTag        = null;
231 182
		$this->isRich            = false;
232 182
		$this->namespaces        = [];
233 182
		$this->openTags          = [];
234 182
		$this->output            = '';
235 182
		$this->pos               = 0;
236 182
		$this->tagStack          = [];
237 182
		$this->tagStackIsSorted  = false;
238 182
		$this->text              = $text;
239 182
		$this->textLen           = strlen($text);
240 182
		$this->wsPos             = 0;
241
242
		// Initialize the root context
243 182
		$this->context = $this->rootContext;
244 182
		$this->context['inParagraph'] = false;
245
246
		// Bump the UID
247 182
		++$this->uid;
248
	}
249
250
	/**
251
	* Set a tag's option
252
	*
253
	* This method ensures that the tag's config is a value and not a reference, to prevent
254
	* potential side-effects. References contained *inside* the tag's config are left untouched
255
	*
256
	* @param  string $tagName     Tag's name
257
	* @param  string $optionName  Option's name
258
	* @param  mixed  $optionValue Option's value
259
	* @return void
260
	*/
261 7
	protected function setTagOption($tagName, $optionName, $optionValue)
262
	{
263 7
		if (isset($this->tagsConfig[$tagName]))
264
		{
265
			// Copy the tag's config and remove it. That will destroy the reference
266 7
			$tagConfig = $this->tagsConfig[$tagName];
267 7
			unset($this->tagsConfig[$tagName]);
268
269
			// Set the new value and replace the tag's config
270 7
			$tagConfig[$optionName]     = $optionValue;
271 7
			$this->tagsConfig[$tagName] = $tagConfig;
272
		}
273
	}
274
275
	//==========================================================================
276
	// Public API
277
	//==========================================================================
278
279
	/**
280
	* Disable a tag
281
	*
282
	* @param  string $tagName Name of the tag
283
	* @return void
284
	*/
285 3
	public function disableTag($tagName)
286
	{
287 3
		$this->setTagOption($tagName, 'isDisabled', true);
288
	}
289
290
	/**
291
	* Enable a tag
292
	*
293
	* @param  string $tagName Name of the tag
294
	* @return void
295
	*/
296 1
	public function enableTag($tagName)
297
	{
298 1
		if (isset($this->tagsConfig[$tagName]))
299
		{
300 1
			unset($this->tagsConfig[$tagName]['isDisabled']);
301
		}
302
	}
303
304
	/**
305
	* Get this parser's Logger instance
306
	*
307
	* @return Logger
308
	*/
309 144
	public function getLogger()
310
	{
311 144
		return $this->logger;
312
	}
313
314
	/**
315
	* Return the last text parsed
316
	*
317
	* This method returns the normalized text, which may be slightly different from the original
318
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
319
	* meant to be used in support of processing log entries, which contain offsets based on the
320
	* normalized text
321
	*
322
	* @see Parser::reset()
323
	*
324
	* @return string
325
	*/
326 143
	public function getText()
327
	{
328 143
		return $this->text;
329
	}
330
331
	/**
332
	* Parse a text
333
	*
334
	* @param  string $text Text to parse
335
	* @return string       XML representation
336
	*/
337 183
	public function parse($text)
338
	{
339
		// Reset the parser and save the uid
340 183
		$this->reset($text);
341 182
		$uid = $this->uid;
342
343
		// Do the heavy lifting
344 182
		$this->executePluginParsers();
345 182
		$this->processTags();
346
347
		// Finalize the document
348 182
		$this->finalizeOutput();
349
350
		// Check the uid in case a plugin or a filter reset the parser mid-execution
351 182
		if ($this->uid !== $uid)
352
		{
353 1
			throw new RuntimeException('The parser has been reset during execution');
354
		}
355
356
		// Log a warning if the fixing cost limit was exceeded
357 182
		if ($this->currentFixingCost > $this->maxFixingCost)
358
		{
359 2
			$this->logger->warn('Fixing cost limit exceeded');
360
		}
361
362 182
		return $this->output;
363
	}
364
365
	/**
366
	* Change a tag's tagLimit
367
	*
368
	* NOTE: the default tagLimit should generally be set during configuration instead
369
	*
370
	* @param  string  $tagName  The tag's name, in UPPERCASE
371
	* @param  integer $tagLimit
372
	* @return void
373
	*/
374 2
	public function setTagLimit($tagName, $tagLimit)
375
	{
376 2
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
377
	}
378
379
	/**
380
	* Change a tag's nestingLimit
381
	*
382
	* NOTE: the default nestingLimit should generally be set during configuration instead
383
	*
384
	* @param  string  $tagName      The tag's name, in UPPERCASE
385
	* @param  integer $nestingLimit
386
	* @return void
387
	*/
388 2
	public function setNestingLimit($tagName, $nestingLimit)
389
	{
390 2
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
391
	}
392
393
	//==========================================================================
394
	// Output handling
395
	//==========================================================================
396
397
	/**
398
	* Finalize the output by appending the rest of the unprocessed text and create the root node
399
	*
400
	* @return void
401
	*/
402 182
	protected function finalizeOutput()
403
	{
404
		// Output the rest of the text and close the last paragraph
405 182
		$this->outputText($this->textLen, 0, true);
406
407
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
408
		do
409
		{
410 182
			$this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
411
		}
412 182
		while ($cnt > 0);
413
414
		// Merge consecutive <i> tags
415 182
		if (strpos($this->output, '</i><i>') !== false)
416
		{
417 1
			$this->output = str_replace('</i><i>', '', $this->output);
418
		}
419
420
		// Remove illegal characters from the output to ensure it's valid XML
421 182
		$this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F]|\\xEF\\xBF[\\xBE\\xBF])', '', $this->output);
422
423
		// Encode Unicode characters that are outside of the BMP
424 182
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
425
426
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
427 182
		$tagName = ($this->isRich) ? 'r' : 't';
428
429
		// Prepare the root node with all the namespace declarations
430 182
		$tmp = '<' . $tagName;
431 182
		foreach (array_keys($this->namespaces) as $prefix)
432
		{
433 2
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
434
		}
435
436 182
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
437
	}
438
439
	/**
440
	* Append a tag to the output
441
	*
442
	* @param  Tag  $tag Tag to append
443
	* @return void
444
	*/
445 139
	protected function outputTag(Tag $tag)
446
	{
447 139
		$this->isRich = true;
448
449 139
		$tagName  = $tag->getName();
450 139
		$tagPos   = $tag->getPos();
451 139
		$tagLen   = $tag->getLen();
452 139
		$tagFlags = $tag->getFlags();
453
454 139
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
455
		{
456 11
			$skipBefore = 1;
457 11
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
458
		}
459
		else
460
		{
461 134
			$skipBefore = $skipAfter = 0;
462
		}
463
464
		// Current paragraph must end before the tag if:
465
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
466
		//  - the tag is an end tag (but not self-closing)
467 139
		$closeParagraph = (!$tag->isStartTag() || ($tagFlags & self::RULE_BREAK_PARAGRAPH));
468
469
		// Let the cursor catch up with this tag's position
470 139
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
471
472
		// Capture the text consumed by the tag
473 139
		$tagText = ($tagLen)
474 99
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
475 139
		         : '';
476
477
		// Output current tag
478 139
		if ($tag->isStartTag())
479
		{
480
			// Handle paragraphs before opening the tag
481 139
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
482
			{
483 138
				$this->outputParagraphStart($tagPos);
484
			}
485
486
			// Record this tag's namespace, if applicable
487 139
			$colonPos = strpos($tagName, ':');
488 139
			if ($colonPos)
489
			{
490 2
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
491
			}
492
493
			// Open the start tag and add its attributes, but don't close the tag
494 139
			$this->output .= '<' . $tagName;
495
496
			// We output the attributes in lexical order. Helps canonicalizing the output and could
497
			// prove useful someday
498 139
			$attributes = $tag->getAttributes();
499 139
			ksort($attributes);
500
501 139
			foreach ($attributes as $attrName => $attrValue)
502
			{
503 12
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
504
			}
505
506 139
			if ($tag->isSelfClosingTag())
507
			{
508 47
				if ($tagLen)
509
				{
510 36
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
511
				}
512
				else
513
				{
514 47
					$this->output .= '/>';
515
				}
516
			}
517 105
			elseif ($tagLen)
518
			{
519 71
				$this->output .= '><s>' . $tagText . '</s>';
520
			}
521
			else
522
			{
523 139
				$this->output .= '>';
524
			}
525
		}
526
		else
527
		{
528 105
			if ($tagLen)
529
			{
530 59
				$this->output .= '<e>' . $tagText . '</e>';
531
			}
532
533 105
			$this->output .= '</' . $tagName . '>';
534
		}
535
536
		// Move the cursor past the tag
537 139
		$this->pos = $tagPos + $tagLen;
538
539
		// Skip newlines (no other whitespace) after this tag
540 139
		$this->wsPos = $this->pos;
541 139
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
542
		{
543
			// Decrement the number of lines to skip
544 9
			--$skipAfter;
545
546
			// Move the cursor past the newline
547 9
			++$this->wsPos;
548
		}
549
	}
550
551
	/**
552
	* Output the text between the cursor's position (included) and given position (not included)
553
	*
554
	* @param  integer $catchupPos     Position we're catching up to
555
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
556
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
557
	* @return void
558
	*/
559 182
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
560
	{
561 182
		if ($closeParagraph)
562
		{
563 182
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
564
			{
565 171
				$closeParagraph = false;
566
			}
567
			else
568
			{
569
				// Ignore any number of lines at the end if we're closing a paragraph
570 18
				$maxLines = -1;
571
			}
572
		}
573
574 182
		if ($this->pos >= $catchupPos)
575
		{
576
			// We're already there, close the paragraph if applicable and return
577 134
			if ($closeParagraph)
578
			{
579 4
				$this->outputParagraphEnd();
580
			}
581
582 134
			return;
583
		}
584
585
		// Skip over previously identified whitespace if applicable
586 160
		if ($this->wsPos > $this->pos)
587
		{
588 9
			$skipPos       = min($catchupPos, $this->wsPos);
589 9
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
590 9
			$this->pos     = $skipPos;
591
592 9
			if ($this->pos >= $catchupPos)
593
			{
594
				// Skipped everything. Close the paragraph if applicable and return
595 2
				if ($closeParagraph)
596
				{
597 1
					$this->outputParagraphEnd();
598
				}
599
600 2
				return;
601
			}
602
		}
603
604
		// Test whether we're even supposed to output anything
605 160
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
606
		{
607 5
			$catchupLen  = $catchupPos - $this->pos;
608 5
			$catchupText = substr($this->text, $this->pos, $catchupLen);
609
610
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
611
			// tags
612 5
			if (strspn($catchupText, " \n\t") < $catchupLen)
613
			{
614 5
				$catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
615
			}
616
617 5
			$this->output .= $catchupText;
618 5
			$this->pos = $catchupPos;
619
620 5
			if ($closeParagraph)
621
			{
622 1
				$this->outputParagraphEnd();
623
			}
624
625 5
			return;
626
		}
627
628
		// Compute the amount of text to ignore at the end of the output
629 158
		$ignorePos = $catchupPos;
630 158
		$ignoreLen = 0;
631
632
		// Ignore as many lines (including whitespace) as specified
633 158
		while ($maxLines && --$ignorePos >= $this->pos)
634
		{
635 21
			$c = $this->text[$ignorePos];
636 21
			if (strpos(self::WHITESPACE, $c) === false)
637
			{
638 14
				break;
639
			}
640
641 12
			if ($c === "\n")
642
			{
643 10
				--$maxLines;
644
			}
645
646 12
			++$ignoreLen;
647
		}
648
649
		// Adjust $catchupPos to ignore the text at the end
650 158
		$catchupPos -= $ignoreLen;
651
652
		// Break down the text in paragraphs if applicable
653 158
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
654
		{
655 15
			if (!$this->context['inParagraph'])
656
			{
657 13
				$this->outputWhitespace($catchupPos);
658
659 13
				if ($catchupPos > $this->pos)
660
				{
661 10
					$this->outputParagraphStart($catchupPos);
662
				}
663
			}
664
665
			// Look for a paragraph break in this text
666 15
			$pbPos = strpos($this->text, "\n\n", $this->pos);
667
668 15
			while ($pbPos !== false && $pbPos < $catchupPos)
669
			{
670 3
				$this->outputText($pbPos, 0, true);
671 3
				$this->outputParagraphStart($catchupPos);
672
673 3
				$pbPos = strpos($this->text, "\n\n", $this->pos);
674
			}
675
		}
676
677
		// Capture, escape and output the text
678 158
		if ($catchupPos > $this->pos)
679
		{
680 156
			$catchupText = htmlspecialchars(
681 156
				substr($this->text, $this->pos, $catchupPos - $this->pos),
682 156
				ENT_NOQUOTES,
683 156
				'UTF-8'
684
			);
685
686
			// Format line breaks if applicable
687 156
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
688
			{
689 21
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
690
			}
691
692 156
			$this->output .= $catchupText;
693
		}
694
695
		// Close the paragraph if applicable
696 158
		if ($closeParagraph)
697
		{
698 14
			$this->outputParagraphEnd();
699
		}
700
701
		// Add the ignored text if applicable
702 158
		if ($ignoreLen)
703
		{
704 12
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
705
		}
706
707
		// Move the cursor past the text
708 158
		$this->pos = $catchupPos + $ignoreLen;
709
	}
710
711
	/**
712
	* Output a linebreak tag
713
	*
714
	* @param  Tag  $tag
715
	* @return void
716
	*/
717 6
	protected function outputBrTag(Tag $tag)
718
	{
719 6
		$this->outputText($tag->getPos(), 0, false);
720 6
		$this->output .= '<br/>';
721
	}
722
723
	/**
724
	* Output an ignore tag
725
	*
726
	* @param  Tag  $tag
727
	* @return void
728
	*/
729 19
	protected function outputIgnoreTag(Tag $tag)
730
	{
731 19
		$tagPos = $tag->getPos();
732 19
		$tagLen = $tag->getLen();
733
734
		// Capture the text to ignore
735 19
		$ignoreText = substr($this->text, $tagPos, $tagLen);
736
737
		// Catch up with the tag's position then output the tag
738 19
		$this->outputText($tagPos, 0, false);
739 19
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
740 19
		$this->isRich = true;
741
742
		// Move the cursor past this tag
743 19
		$this->pos = $tagPos + $tagLen;
744
	}
745
746
	/**
747
	* Start a paragraph between current position and given position, if applicable
748
	*
749
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
750
	* @return void
751
	*/
752 145
	protected function outputParagraphStart($maxPos)
753
	{
754
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
755 145
		if ($this->context['inParagraph']
756 145
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
757
		{
758 135
			return;
759
		}
760
761
		// Output the whitespace between $this->pos and $maxPos if applicable
762 16
		$this->outputWhitespace($maxPos);
763
764
		// Open the paragraph, but only if it's not at the very end of the text
765 16
		if ($this->pos < $this->textLen)
766
		{
767 16
			$this->output .= '<p>';
768 16
			$this->context['inParagraph'] = true;
769
		}
770
	}
771
772
	/**
773
	* Close current paragraph at current position if applicable
774
	*
775
	* @return void
776
	*/
777 18
	protected function outputParagraphEnd()
778
	{
779
		// Do nothing if we're not in a paragraph
780 18
		if (!$this->context['inParagraph'])
781
		{
782 3
			return;
783
		}
784
785 16
		$this->output .= '</p>';
786 16
		$this->context['inParagraph'] = false;
787
	}
788
789
	/**
790
	* Output the content of a verbatim tag
791
	*
792
	* @param  Tag  $tag
793
	* @return void
794
	*/
795 4
	protected function outputVerbatim(Tag $tag)
796
	{
797 4
		$flags = $this->context['flags'];
798 4
		$this->context['flags'] = $tag->getFlags();
799 4
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
800 4
		$this->context['flags'] = $flags;
801
	}
802
803
	/**
804
	* Skip as much whitespace after current position as possible
805
	*
806
	* @param  integer $maxPos Rightmost character to be skipped
807
	* @return void
808
	*/
809 18
	protected function outputWhitespace($maxPos)
810
	{
811 18
		if ($maxPos > $this->pos)
812
		{
813 13
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
814
815 13
			if ($spn)
816
			{
817 6
				$this->output .= substr($this->text, $this->pos, $spn);
818 6
				$this->pos += $spn;
819
			}
820
		}
821
	}
822
823
	//==========================================================================
824
	// Plugins handling
825
	//==========================================================================
826
827
	/**
828
	* Disable a plugin
829
	*
830
	* @param  string $pluginName Name of the plugin
831
	* @return void
832
	*/
833 5
	public function disablePlugin($pluginName)
834
	{
835 5
		if (isset($this->pluginsConfig[$pluginName]))
836
		{
837
			// Copy the plugin's config to remove the reference
838 4
			$pluginConfig = $this->pluginsConfig[$pluginName];
839 4
			unset($this->pluginsConfig[$pluginName]);
840
841
			// Update the value and replace the plugin's config
842 4
			$pluginConfig['isDisabled'] = true;
843 4
			$this->pluginsConfig[$pluginName] = $pluginConfig;
844
		}
845
	}
846
847
	/**
848
	* Enable a plugin
849
	*
850
	* @param  string $pluginName Name of the plugin
851
	* @return void
852
	*/
853 2
	public function enablePlugin($pluginName)
854
	{
855 2
		if (isset($this->pluginsConfig[$pluginName]))
856
		{
857 1
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
858
		}
859
	}
860
861
	/**
862
	* Execute given plugin
863
	*
864
	* @param  string $pluginName Plugin's name
865
	* @return void
866
	*/
867 183
	protected function executePluginParser($pluginName)
868
	{
869 183
		$pluginConfig = $this->pluginsConfig[$pluginName];
870 183
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
871
		{
872 1
			return;
873
		}
874
875 182
		$matches = [];
876 182
		if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit']))
877
		{
878 8
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
879 8
			if (empty($matches))
880
			{
881 1
				return;
882
			}
883
		}
884
885
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
886 181
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
887
	}
888
889
	/**
890
	* Execute all the plugins
891
	*
892
	* @return void
893
	*/
894 193
	protected function executePluginParsers()
895
	{
896 193
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
897
		{
898 184
			if (empty($pluginConfig['isDisabled']))
899
			{
900 183
				$this->executePluginParser($pluginName);
901
			}
902
		}
903
	}
904
905
	/**
906
	* Execute given regexp and returns as many matches as given limit
907
	*
908
	* @param  string  $regexp
909
	* @param  integer $limit
910
	* @return array
911
	*/
912 8
	protected function getMatches($regexp, $limit)
913
	{
914 8
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
915 8
		if ($cnt > $limit)
916
		{
917 2
			$matches = array_slice($matches, 0, $limit);
918
		}
919
920 8
		return $matches;
921
	}
922
923
	/**
924
	* Get the cached callback for given plugin's parser
925
	*
926
	* @param  string $pluginName Plugin's name
927
	* @return callable
928
	*/
929 181
	protected function getPluginParser($pluginName)
930
	{
931
		// Cache a new instance of this plugin's parser if there isn't one already
932 181
		if (!isset($this->pluginParsers[$pluginName]))
933
		{
934 3
			$pluginConfig = $this->pluginsConfig[$pluginName];
935 3
			$className = (isset($pluginConfig['className']))
936 1
			           ? $pluginConfig['className']
937 3
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
938
939
			// Register the parser as a callback
940 3
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
941
		}
942
943 181
		return $this->pluginParsers[$pluginName];
944
	}
945
946
	/**
947
	* Register a parser
948
	*
949
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
950
	* existing plugin
951
	*
952
	* @param  string   $pluginName
953
	* @param  callable $parser
954
	* @param  string   $regexp
955
	* @param  integer  $limit
956
	* @return void
957
	*/
958 176
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
959
	{
960 176
		if (!is_callable($parser))
961
		{
962 1
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
963
		}
964
		// Create an empty config for this plugin to ensure it is executed
965 175
		if (!isset($this->pluginsConfig[$pluginName]))
966
		{
967 174
			$this->pluginsConfig[$pluginName] = [];
968
		}
969 175
		if (isset($regexp))
970
		{
971 2
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
972 2
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
973
		}
974 175
		$this->pluginParsers[$pluginName] = $parser;
975
	}
976
977
	//==========================================================================
978
	// Rules handling
979
	//==========================================================================
980
981
	/**
982
	* Apply closeAncestor rules associated with given tag
983
	*
984
	* @param  Tag  $tag Tag
985
	* @return bool      Whether a new tag has been added
986
	*/
987 138
	protected function closeAncestor(Tag $tag)
988
	{
989 138
		if (!empty($this->openTags))
990
		{
991 61
			$tagName   = $tag->getName();
992 61
			$tagConfig = $this->tagsConfig[$tagName];
993
994 61
			if (!empty($tagConfig['rules']['closeAncestor']))
995
			{
996 5
				$i = count($this->openTags);
997
998 5
				while (--$i >= 0)
999
				{
1000 5
					$ancestor     = $this->openTags[$i];
1001 5
					$ancestorName = $ancestor->getName();
1002
1003 5
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1004
					{
1005 4
						++$this->currentFixingCost;
1006
1007
						// We have to close this ancestor. First we reinsert this tag...
1008 4
						$this->tagStack[] = $tag;
1009
1010
						// ...then we add a new end tag for it with a better priority
1011 4
						$this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1012
1013 4
						return true;
1014
					}
1015
				}
1016
			}
1017
		}
1018
1019 138
		return false;
1020
	}
1021
1022
	/**
1023
	* Apply closeParent rules associated with given tag
1024
	*
1025
	* @param  Tag  $tag Tag
1026
	* @return bool      Whether a new tag has been added
1027
	*/
1028 138
	protected function closeParent(Tag $tag)
1029
	{
1030 138
		if (!empty($this->openTags))
1031
		{
1032 65
			$tagName   = $tag->getName();
1033 65
			$tagConfig = $this->tagsConfig[$tagName];
1034
1035 65
			if (!empty($tagConfig['rules']['closeParent']))
1036
			{
1037 7
				$parent     = end($this->openTags);
1038 7
				$parentName = $parent->getName();
1039
1040 7
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1041
				{
1042 6
					++$this->currentFixingCost;
1043
1044
					// We have to close that parent. First we reinsert the tag...
1045 6
					$this->tagStack[] = $tag;
1046
1047
					// ...then we add a new end tag for it with a better priority
1048 6
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1049
1050 6
					return true;
1051
				}
1052
			}
1053
		}
1054
1055 138
		return false;
1056
	}
1057
1058
	/**
1059
	* Apply the createChild rules associated with given tag
1060
	*
1061
	* @param  Tag  $tag Tag
1062
	* @return void
1063
	*/
1064 139
	protected function createChild(Tag $tag)
1065
	{
1066 139
		$tagConfig = $this->tagsConfig[$tag->getName()];
1067 139
		if (isset($tagConfig['rules']['createChild']))
1068
		{
1069 3
			$priority = -1000;
1070 3
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1071 3
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1072
			{
1073 3
				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1074
			}
1075
		}
1076
	}
1077
1078
	/**
1079
	* Apply fosterParent rules associated with given tag
1080
	*
1081
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1082
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1083
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1084
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1085
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1086
	*       loop from running indefinitely
1087
	*
1088
	* @param  Tag  $tag Tag
1089
	* @return bool      Whether a new tag has been added
1090
	*/
1091 138
	protected function fosterParent(Tag $tag)
1092
	{
1093 138
		if (!empty($this->openTags))
1094
		{
1095 68
			$tagName   = $tag->getName();
1096 68
			$tagConfig = $this->tagsConfig[$tagName];
1097
1098 68
			if (!empty($tagConfig['rules']['fosterParent']))
1099
			{
1100 13
				$parent     = end($this->openTags);
1101 13
				$parentName = $parent->getName();
1102
1103 13
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1104
				{
1105 12
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1106
					{
1107 11
						$this->addFosterTag($tag, $parent);
1108
					}
1109
1110
					// Reinsert current tag
1111 12
					$this->tagStack[] = $tag;
1112
1113
					// And finally close its parent with a priority that ensures it is processed
1114
					// before this tag
1115 12
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1116
1117
					// Adjust the fixing cost to account for the additional tags/processing
1118 12
					$this->currentFixingCost += 4;
1119
1120 12
					return true;
1121
				}
1122
			}
1123
		}
1124
1125 138
		return false;
1126
	}
1127
1128
	/**
1129
	* Apply requireAncestor rules associated with given tag
1130
	*
1131
	* @param  Tag  $tag Tag
1132
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1133
	*/
1134 141
	protected function requireAncestor(Tag $tag)
1135
	{
1136 141
		$tagName   = $tag->getName();
1137 141
		$tagConfig = $this->tagsConfig[$tagName];
1138
1139 141
		if (isset($tagConfig['rules']['requireAncestor']))
1140
		{
1141 3
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1142
			{
1143 3
				if (!empty($this->cntOpen[$ancestorName]))
1144
				{
1145 1
					return false;
1146
				}
1147
			}
1148
1149 2
			$this->logger->err('Tag requires an ancestor', [
1150 2
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1151 2
				'tag'             => $tag
1152
			]);
1153
1154 2
			return true;
1155
		}
1156
1157 139
		return false;
1158
	}
1159
1160
	//==========================================================================
1161
	// Tag processing
1162
	//==========================================================================
1163
1164
	/**
1165
	* Create and add a copy of a tag as a child of a given tag
1166
	*
1167
	* @param  Tag  $tag       Current tag
1168
	* @param  Tag  $fosterTag Tag to foster
1169
	* @return void
1170
	*/
1171 11
	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1172
	{
1173 11
		list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1174
1175
		// Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1176 11
		$childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1177 11
		$tag->cascadeInvalidationTo($childTag);
1178
	}
1179
1180
	/**
1181
	* Create and add an end tag for given start tag at given position
1182
	*
1183
	* @param  Tag     $startTag Start tag
1184
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1185
	* @param  integer $prio     End tag's priority
1186
	* @return Tag
1187
	*/
1188 35
	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1189
	{
1190 35
		$tagName = $startTag->getName();
1191
1192
		// Adjust the end tag's position if whitespace is to be minimized
1193 35
		if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1194
		{
1195 3
			$tagPos = $this->getMagicEndPos($tagPos);
1196
		}
1197
1198
		// Add a 0-width end tag that is paired with the given start tag
1199 35
		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1200 35
		$endTag->pairWith($startTag);
1201
1202 35
		return $endTag;
1203
	}
1204
1205
	/**
1206
	* Compute the position of a magic end tag, adjusted for whitespace
1207
	*
1208
	* @param  integer $tagPos Rightmost possible position for the tag
1209
	* @return integer
1210
	*/
1211 5
	protected function getMagicEndPos($tagPos)
1212
	{
1213
		// Back up from given position to the cursor's position until we find a character that
1214
		// is not whitespace
1215 5
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1216
		{
1217 5
			--$tagPos;
1218
		}
1219
1220 5
		return $tagPos;
1221
	}
1222
1223
	/**
1224
	* Compute the position and priority of a magic start tag, adjusted for whitespace
1225
	*
1226
	* @param  integer   $tagPos Leftmost possible position for the tag
1227
	* @return integer[]         [Tag pos, priority]
1228
	*/
1229 11
	protected function getMagicStartCoords($tagPos)
1230
	{
1231 11
		if (empty($this->tagStack))
1232
		{
1233
			// Set the next position outside the text boundaries
1234 3
			$nextPos  = $this->textLen + 1;
1235 3
			$nextPrio = 0;
1236
		}
1237
		else
1238
		{
1239 10
			$nextTag  = end($this->tagStack);
1240 10
			$nextPos  = $nextTag->getPos();
1241 10
			$nextPrio = $nextTag->getSortPriority();
1242
		}
1243
1244
		// Find the first non-whitespace position before next tag or the end of text
1245 11
		while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1246
		{
1247 1
			++$tagPos;
1248
		}
1249
1250
		// Set a priority that ensures this tag appears before the next tag
1251 11
		$prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1252
1253 11
		return [$tagPos, $prio];
1254
	}
1255
1256
	/**
1257
	* Test whether given start tag is immediately followed by a closing tag
1258
	*
1259
	* @param  Tag  $tag Start tag
1260
	* @return bool
1261
	*/
1262 3
	protected function isFollowedByClosingTag(Tag $tag)
1263
	{
1264 3
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1265
	}
1266
1267
	/**
1268
	* Process all tags in the stack
1269
	*
1270
	* @return void
1271
	*/
1272 182
	protected function processTags()
1273
	{
1274 182
		if (empty($this->tagStack))
1275
		{
1276 24
			return;
1277
		}
1278
1279
		// Initialize the count tables
1280 158
		foreach (array_keys($this->tagsConfig) as $tagName)
1281
		{
1282 144
			$this->cntOpen[$tagName]  = 0;
1283 144
			$this->cntTotal[$tagName] = 0;
1284
		}
1285
1286
		// Process the tag stack, close tags that were left open and repeat until done
1287
		do
1288
		{
1289 158
			while (!empty($this->tagStack))
1290
			{
1291 158
				if (!$this->tagStackIsSorted)
1292
				{
1293 158
					$this->sortTags();
1294
				}
1295
1296 158
				$this->currentTag = array_pop($this->tagStack);
1297 158
				$this->processCurrentTag();
1298
			}
1299
1300
			// Close tags that were left open
1301 158
			foreach ($this->openTags as $startTag)
1302
			{
1303
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1304
				//       the stack is processed in LIFO order, it means that tags get closed in
1305
				//       the correct order, from descendants to ancestors
1306 19
				$this->addMagicEndTag($startTag, $this->textLen);
1307
			}
1308
		}
1309 158
		while (!empty($this->tagStack));
1310
	}
1311
1312
	/**
1313
	* Process current tag
1314
	*
1315
	* @return void
1316
	*/
1317 158
	protected function processCurrentTag()
1318
	{
1319
		// Invalidate current tag if tags are disabled and current tag would not close the last open
1320
		// tag and is not a system tag
1321 158
		if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1322 158
		 && !$this->currentTag->canClose(end($this->openTags))
1323 158
		 && !$this->currentTag->isSystemTag())
1324
		{
1325 4
			$this->currentTag->invalidate();
1326
		}
1327
1328 158
		$tagPos = $this->currentTag->getPos();
1329 158
		$tagLen = $this->currentTag->getLen();
1330
1331
		// Test whether the cursor passed this tag's position already
1332 158
		if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1333
		{
1334
			// Test whether this tag is paired with a start tag and this tag is still open
1335 16
			$startTag = $this->currentTag->getStartTag();
1336
1337 16
			if ($startTag && in_array($startTag, $this->openTags, true))
1338
			{
1339
				// Create an end tag that matches current tag's start tag, which consumes as much of
1340
				// the same text as current tag and is paired with the same start tag
1341 2
				$this->addEndTag(
1342 2
					$startTag->getName(),
1343 2
					$this->pos,
1344 2
					max(0, $tagPos + $tagLen - $this->pos)
1345 2
				)->pairWith($startTag);
1346
1347
				// Note that current tag is not invalidated, it's merely replaced
1348 2
				return;
1349
			}
1350
1351
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1352 14
			if ($this->currentTag->isIgnoreTag())
1353
			{
1354 2
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1355
1356 2
				if ($ignoreLen > 0)
1357
				{
1358
					// Create a new ignore tag and move on
1359 1
					$this->addIgnoreTag($this->pos, $ignoreLen);
1360
1361 1
					return;
1362
				}
1363
			}
1364
1365
			// Skipped tags are invalidated
1366 13
			$this->currentTag->invalidate();
1367
		}
1368
1369 158
		if ($this->currentTag->isInvalid())
1370
		{
1371 18
			return;
1372
		}
1373
1374 158
		if ($this->currentTag->isIgnoreTag())
1375
		{
1376 11
			$this->outputIgnoreTag($this->currentTag);
1377
		}
1378 153
		elseif ($this->currentTag->isBrTag())
1379
		{
1380
			// Output the tag if it's allowed, ignore it otherwise
1381 7
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1382
			{
1383 7
				$this->outputBrTag($this->currentTag);
1384
			}
1385
		}
1386 149
		elseif ($this->currentTag->isParagraphBreak())
1387
		{
1388 4
			$this->outputText($this->currentTag->getPos(), 0, true);
1389
		}
1390 146
		elseif ($this->currentTag->isVerbatim())
1391
		{
1392 4
			$this->outputVerbatim($this->currentTag);
1393
		}
1394 142
		elseif ($this->currentTag->isStartTag())
1395
		{
1396 141
			$this->processStartTag($this->currentTag);
1397
		}
1398
		else
1399
		{
1400 106
			$this->processEndTag($this->currentTag);
1401
		}
1402
	}
1403
1404
	/**
1405
	* Process given start tag (including self-closing tags) at current position
1406
	*
1407
	* @param  Tag  $tag Start tag (including self-closing)
1408
	* @return void
1409
	*/
1410 141
	protected function processStartTag(Tag $tag)
1411
	{
1412 141
		$tagName   = $tag->getName();
1413 141
		$tagConfig = $this->tagsConfig[$tagName];
1414
1415
		// 1. Check that this tag has not reached its global limit tagLimit
1416
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1417
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1418
		// 4. Check for nestingLimit
1419
		// 5. Apply requireAncestor rules
1420
		//
1421
		// This order ensures that the tag is valid and within the set limits before we attempt to
1422
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1423
		// limits, whether this tag is allowed within current context (the context may change
1424
		// as ancestors are closed) or whether the required ancestors are still there (they might
1425
		// have been closed by a rule.)
1426 141
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1427
		{
1428 2
			$this->logger->err(
1429 2
				'Tag limit exceeded',
1430
				[
1431 2
					'tag'      => $tag,
1432 2
					'tagName'  => $tagName,
1433 2
					'tagLimit' => $tagConfig['tagLimit']
1434
				]
1435
			);
1436 2
			$tag->invalidate();
1437
1438 2
			return;
1439
		}
1440
1441 141
		FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1442 141
		if ($tag->isInvalid())
1443
		{
1444 1
			return;
1445
		}
1446
1447 141
		if ($this->currentFixingCost < $this->maxFixingCost)
1448
		{
1449 138
			if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1450
			{
1451
				// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1452 21
				return;
1453
			}
1454
		}
1455
1456 141
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1457
		{
1458 2
			$this->logger->err(
1459 2
				'Nesting limit exceeded',
1460
				[
1461 2
					'tag'          => $tag,
1462 2
					'tagName'      => $tagName,
1463 2
					'nestingLimit' => $tagConfig['nestingLimit']
1464
				]
1465
			);
1466 2
			$tag->invalidate();
1467
1468 2
			return;
1469
		}
1470
1471 141
		if (!$this->tagIsAllowed($tagName))
1472
		{
1473 7
			$msg     = 'Tag is not allowed in this context';
1474 7
			$context = ['tag' => $tag, 'tagName' => $tagName];
1475 7
			if ($tag->getLen() > 0)
1476
			{
1477 6
				$this->logger->warn($msg, $context);
1478
			}
1479
			else
1480
			{
1481 1
				$this->logger->debug($msg, $context);
1482
			}
1483 7
			$tag->invalidate();
1484
1485 7
			return;
1486
		}
1487
1488 141
		if ($this->requireAncestor($tag))
1489
		{
1490 2
			$tag->invalidate();
1491
1492 2
			return;
1493
		}
1494
1495
		// If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1496
		// immediately followed by an end tag, we replace it with a self-closing tag with the same
1497
		// properties
1498 139
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1499 139
		 && !$tag->isSelfClosingTag()
1500 139
		 && !$tag->getEndTag()
1501 139
		 && !$this->isFollowedByClosingTag($tag))
1502
		{
1503 2
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1504 2
			$newTag->setAttributes($tag->getAttributes());
1505 2
			$newTag->setFlags($tag->getFlags());
1506
1507 2
			$tag = $newTag;
1508
		}
1509
1510 139
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1511 139
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1512
		{
1513 2
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1514
		}
1515
1516
		// This tag is valid, output it and update the context
1517 139
		$this->outputTag($tag);
1518 139
		$this->pushContext($tag);
1519
1520
		// Apply the createChild rules if applicable
1521 139
		$this->createChild($tag);
1522
	}
1523
1524
	/**
1525
	* Process given end tag at current position
1526
	*
1527
	* @param  Tag  $tag end tag
1528
	* @return void
1529
	*/
1530 106
	protected function processEndTag(Tag $tag)
1531
	{
1532 106
		$tagName = $tag->getName();
1533
1534 106
		if (empty($this->cntOpen[$tagName]))
1535
		{
1536
			// This is an end tag with no start tag
1537 9
			return;
1538
		}
1539
1540
		/**
1541
		* @var array List of tags need to be closed before given tag
1542
		*/
1543 105
		$closeTags = [];
1544
1545
		// Iterate through all open tags from last to first to find a match for our tag
1546 105
		$i = count($this->openTags);
1547 105
		while (--$i >= 0)
1548
		{
1549 105
			$openTag = $this->openTags[$i];
1550
1551 105
			if ($tag->canClose($openTag))
1552
			{
1553 105
				break;
1554
			}
1555
1556 26
			$closeTags[] = $openTag;
1557 26
			++$this->currentFixingCost;
1558
		}
1559
1560 105
		if ($i < 0)
1561
		{
1562
			// Did not find a matching tag
1563 2
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1564
1565 2
			return;
1566
		}
1567
1568
		// Accumulate flags to determine whether whitespace should be trimmed
1569 105
		$flags = $tag->getFlags();
1570 105
		foreach ($closeTags as $openTag)
1571
		{
1572 25
			$flags |= $openTag->getFlags();
1573
		}
1574 105
		$ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1575
1576
		// Only reopen tags if we haven't exceeded our "fixing" budget
1577 105
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1578
1579
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1580
		// reopened
1581 105
		$reopenTags = [];
1582 105
		foreach ($closeTags as $openTag)
1583
		{
1584 25
			$openTagName = $openTag->getName();
1585
1586
			// Test whether this tag should be reopened automatically
1587 25
			if ($keepReopening)
1588
			{
1589 23
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1590
				{
1591 12
					$reopenTags[] = $openTag;
1592
				}
1593
				else
1594
				{
1595 11
					$keepReopening = false;
1596
				}
1597
			}
1598
1599
			// Find the earliest position we can close this open tag
1600 25
			$tagPos = $tag->getPos();
1601 25
			if ($ignoreWhitespace)
1602
			{
1603 5
				$tagPos = $this->getMagicEndPos($tagPos);
1604
			}
1605
1606
			// Output an end tag to close this start tag, then update the context
1607 25
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1608 25
			$endTag->setFlags($openTag->getFlags());
1609 25
			$this->outputTag($endTag);
1610 25
			$this->popContext();
1611
		}
1612
1613
		// Output our tag, moving the cursor past it, then update the context
1614 105
		$this->outputTag($tag);
1615 105
		$this->popContext();
1616
1617
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1618
		// close tags that are already being closed now. Also, filter our list of tags being
1619
		// reopened by removing those that would immediately be closed
1620 105
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1621
		{
1622
			/**
1623
			* @var integer Rightmost position of the portion of text to ignore
1624
			*/
1625 23
			$ignorePos = $this->pos;
1626
1627 23
			$i = count($this->tagStack);
1628 23
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1629
			{
1630 15
				$upcomingTag = $this->tagStack[$i];
1631
1632
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1633
				// strictly an end tag (not a start tag or a self-closing tag)
1634 15
				if ($upcomingTag->getPos() > $ignorePos
1635 15
				 || $upcomingTag->isStartTag())
1636
				{
1637 9
					break;
1638
				}
1639
1640
				// Test whether this tag would close any of the tags we're about to reopen
1641 10
				$j = count($closeTags);
1642
1643 10
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1644
				{
1645 10
					if ($upcomingTag->canClose($closeTags[$j]))
1646
					{
1647
						// Remove the tag from the lists and reset the keys
1648 9
						array_splice($closeTags, $j, 1);
1649
1650 9
						if (isset($reopenTags[$j]))
1651
						{
1652 7
							array_splice($reopenTags, $j, 1);
1653
						}
1654
1655
						// Extend the ignored text to cover this tag
1656 9
						$ignorePos = max(
1657 9
							$ignorePos,
1658 9
							$upcomingTag->getPos() + $upcomingTag->getLen()
1659
						);
1660
1661 9
						break;
1662
					}
1663
				}
1664
			}
1665
1666 23
			if ($ignorePos > $this->pos)
1667
			{
1668
				/**
1669
				* @todo have a method that takes (pos,len) rather than a Tag
1670
				*/
1671 8
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1672
			}
1673
		}
1674
1675
		// Re-add tags that need to be reopened, at current cursor position
1676 105
		foreach ($reopenTags as $startTag)
1677
		{
1678 8
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1679
1680
			// Re-pair the new tag
1681 8
			$endTag = $startTag->getEndTag();
1682 8
			if ($endTag)
1683
			{
1684 1
				$newTag->pairWith($endTag);
1685
			}
1686
		}
1687
	}
1688
1689
	/**
1690
	* Update counters and replace current context with its parent context
1691
	*
1692
	* @return void
1693
	*/
1694 105
	protected function popContext()
1695
	{
1696 105
		$tag = array_pop($this->openTags);
1697 105
		--$this->cntOpen[$tag->getName()];
1698 105
		$this->context = $this->context['parentContext'];
1699
	}
1700
1701
	/**
1702
	* Update counters and replace current context with a new context based on given tag
1703
	*
1704
	* If given tag is a self-closing tag, the context won't change
1705
	*
1706
	* @param  Tag  $tag Start tag (including self-closing)
1707
	* @return void
1708
	*/
1709 139
	protected function pushContext(Tag $tag)
1710
	{
1711 139
		$tagName   = $tag->getName();
1712 139
		$tagFlags  = $tag->getFlags();
1713 139
		$tagConfig = $this->tagsConfig[$tagName];
1714
1715 139
		++$this->cntTotal[$tagName];
1716
1717
		// If this is a self-closing tag, the context remains the same
1718 139
		if ($tag->isSelfClosingTag())
1719
		{
1720 47
			return;
1721
		}
1722
1723
		// Recompute the allowed tags
1724 105
		$allowed = [];
1725 105
		foreach ($this->context['allowed'] as $k => $v)
1726
		{
1727
			// If the current tag is not transparent, override the low bits (allowed children) of
1728
			// current context with its high bits (allowed descendants)
1729 105
			if (!($tagFlags & self::RULE_IS_TRANSPARENT))
1730
			{
1731 57
				$v = ($v & 0xFF00) | ($v >> 8);
1732
			}
1733 105
			$allowed[] = $tagConfig['allowed'][$k] & $v;
1734
		}
1735
1736
		// Use this tag's flags as a base for this context and add inherited rules
1737 105
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1738
1739
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1740 105
		if ($flags & self::RULE_DISABLE_AUTO_BR)
1741
		{
1742 2
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1743
		}
1744
1745 105
		++$this->cntOpen[$tagName];
1746 105
		$this->openTags[] = $tag;
1747 105
		$this->context = [
1748 105
			'allowed'       => $allowed,
1749 105
			'flags'         => $flags,
1750
			'inParagraph'   => false,
1751 105
			'parentContext' => $this->context
1752
		];
1753
	}
1754
1755
	/**
1756
	* Return whether given tag is allowed in current context
1757
	*
1758
	* @param  string $tagName
1759
	* @return bool
1760
	*/
1761 141
	protected function tagIsAllowed($tagName)
1762
	{
1763 141
		$n = $this->tagsConfig[$tagName]['bitNumber'];
1764
1765 141
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1766
	}
1767
1768
	//==========================================================================
1769
	// Tag stack
1770
	//==========================================================================
1771
1772
	/**
1773
	* Add a start tag
1774
	*
1775
	* @param  string  $name Name of the tag
1776
	* @param  integer $pos  Position of the tag in the text
1777
	* @param  integer $len  Length of text consumed by the tag
1778
	* @param  integer $prio Tag's priority
1779
	* @return Tag
1780
	*/
1781 124
	public function addStartTag($name, $pos, $len, $prio = 0)
1782
	{
1783 124
		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1784
	}
1785
1786
	/**
1787
	* Add an end tag
1788
	*
1789
	* @param  string  $name Name of the tag
1790
	* @param  integer $pos  Position of the tag in the text
1791
	* @param  integer $len  Length of text consumed by the tag
1792
	* @param  integer $prio Tag's priority
1793
	* @return Tag
1794
	*/
1795 111
	public function addEndTag($name, $pos, $len, $prio = 0)
1796
	{
1797 111
		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1798
	}
1799
1800
	/**
1801
	* Add a self-closing tag
1802
	*
1803
	* @param  string  $name Name of the tag
1804
	* @param  integer $pos  Position of the tag in the text
1805
	* @param  integer $len  Length of text consumed by the tag
1806
	* @param  integer $prio Tag's priority
1807
	* @return Tag
1808
	*/
1809 68
	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1810
	{
1811 68
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1812
	}
1813
1814
	/**
1815
	* Add a 0-width "br" tag to force a line break at given position
1816
	*
1817
	* @param  integer $pos  Position of the tag in the text
1818
	* @param  integer $prio Tag's priority
1819
	* @return Tag
1820
	*/
1821 9
	public function addBrTag($pos, $prio = 0)
1822
	{
1823 9
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1824
	}
1825
1826
	/**
1827
	* Add an "ignore" tag
1828
	*
1829
	* @param  integer $pos  Position of the tag in the text
1830
	* @param  integer $len  Length of text consumed by the tag
1831
	* @param  integer $prio Tag's priority
1832
	* @return Tag
1833
	*/
1834 13
	public function addIgnoreTag($pos, $len, $prio = 0)
1835
	{
1836 13
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1837
	}
1838
1839
	/**
1840
	* Add a paragraph break at given position
1841
	*
1842
	* Uses a zero-width tag that is actually never output in the result
1843
	*
1844
	* @param  integer $pos  Position of the tag in the text
1845
	* @param  integer $prio Tag's priority
1846
	* @return Tag
1847
	*/
1848 5
	public function addParagraphBreak($pos, $prio = 0)
1849
	{
1850 5
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1851
	}
1852
1853
	/**
1854
	* Add a copy of given tag at given position and length
1855
	*
1856
	* @param  Tag     $tag  Original tag
1857
	* @param  integer $pos  Copy's position
1858
	* @param  integer $len  Copy's length
1859
	* @param  integer $prio Copy's priority (same as original by default)
1860
	* @return Tag           Copy tag
1861
	*/
1862 22
	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1863
	{
1864 22
		if (!isset($prio))
1865
		{
1866 11
			$prio = $tag->getSortPriority();
1867
		}
1868 22
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1869 22
		$copy->setAttributes($tag->getAttributes());
1870
1871 22
		return $copy;
1872
	}
1873
1874
	/**
1875
	* Add a tag
1876
	*
1877
	* @param  integer $type Tag's type
1878
	* @param  string  $name Name of the tag
1879
	* @param  integer $pos  Position of the tag in the text
1880
	* @param  integer $len  Length of text consumed by the tag
1881
	* @param  integer $prio Tag's priority
1882
	* @return Tag
1883
	*/
1884 187
	protected function addTag($type, $name, $pos, $len, $prio)
1885
	{
1886
		// Create the tag
1887 187
		$tag = new Tag($type, $name, $pos, $len, $prio);
1888
1889
		// Set this tag's rules bitfield
1890 187
		if (isset($this->tagsConfig[$name]))
1891
		{
1892 167
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1893
		}
1894
1895
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1896
		// position is negative or if it's out of bounds
1897 187
		if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1898 187
		 || $this->isInvalidTextSpan($pos, $len))
1899
		{
1900 10
			$tag->invalidate();
1901
		}
1902 179
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1903
		{
1904 1
			$this->logger->warn(
1905 1
				'Tag is disabled',
1906
				[
1907 1
					'tag'     => $tag,
1908 1
					'tagName' => $name
1909
				]
1910
			);
1911 1
			$tag->invalidate();
1912
		}
1913
		else
1914
		{
1915 178
			$this->insertTag($tag);
1916
		}
1917
1918 187
		return $tag;
1919
	}
1920
1921
	/**
1922
	* Test whether given text span is outside text boundaries or an invalid UTF sequence
1923
	*
1924
	* @param  integer $pos Start of text
1925
	* @param  integer $len Length of text
1926
	* @return bool
1927
	*/
1928 185
	protected function isInvalidTextSpan($pos, $len)
1929
	{
1930 185
		return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1931
	}
1932
1933
	/**
1934
	* Insert given tag in the tag stack
1935
	*
1936
	* @param  Tag  $tag
1937
	* @return void
1938
	*/
1939 178
	protected function insertTag(Tag $tag)
1940
	{
1941 178
		if (!$this->tagStackIsSorted)
1942
		{
1943 178
			$this->tagStack[] = $tag;
1944
		}
1945
		else
1946
		{
1947
			// Scan the stack and copy every tag to the next slot until we find the correct index
1948 50
			$i   = count($this->tagStack);
1949 50
			$key = $this->getSortKey($tag);
1950 50
			while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1951
			{
1952 3
				$this->tagStack[$i] = $this->tagStack[$i - 1];
1953 3
				--$i;
1954
			}
1955 50
			$this->tagStack[$i] = $tag;
1956
		}
1957
	}
1958
1959
	/**
1960
	* Add a pair of tags
1961
	*
1962
	* @param  string  $name     Name of the tags
1963
	* @param  integer $startPos Position of the start tag
1964
	* @param  integer $startLen Length of the start tag
1965
	* @param  integer $endPos   Position of the start tag
1966
	* @param  integer $endLen   Length of the start tag
1967
	* @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1968
	* @return Tag               Start tag
1969
	*/
1970 26
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1971
	{
1972
		// NOTE: the end tag is added first to try to keep the stack in the correct order
1973 26
		$endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1974 26
		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1975 26
		$startTag->pairWith($endTag);
1976
1977 26
		return $startTag;
1978
	}
1979
1980
	/**
1981
	* Add a tag that represents a verbatim copy of the original text
1982
	*
1983
	* @param  integer $pos  Position of the tag in the text
1984
	* @param  integer $len  Length of text consumed by the tag
1985
	* @param  integer $prio Tag's priority
1986
	* @return Tag
1987
	*/
1988 4
	public function addVerbatim($pos, $len, $prio = 0)
1989
	{
1990 4
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
1991
	}
1992
1993
	/**
1994
	* Sort tags by position and precedence
1995
	*
1996
	* @return void
1997
	*/
1998 165
	protected function sortTags()
1999
	{
2000 165
		$arr = [];
2001 165
		foreach ($this->tagStack as $i => $tag)
2002
		{
2003 165
			$key       = $this->getSortKey($tag, $i);
2004 165
			$arr[$key] = $tag;
2005
		}
2006 165
		krsort($arr);
2007
2008 165
		$this->tagStack         = array_values($arr);
2009 165
		$this->tagStackIsSorted = true;
2010
	}
2011
2012
	/**
2013
	* Generate a key for given tag that can be used to compare its position using lexical comparisons
2014
	*
2015
	* Tags are sorted by position first, then by priority, then by whether they consume any text,
2016
	* then by length, and finally in order of their creation.
2017
	*
2018
	* The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2019
	* are at the end of the array.
2020
	*
2021
	* @param  Tag     $tag
2022
	* @param  integer $tagIndex
2023
	* @return string
2024
	*/
2025 165
	protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2026
	{
2027
		// Ensure that negative values are sorted correctly by flagging them and making them positive
2028 165
		$prioFlag = ($tag->getSortPriority() >= 0);
2029 165
		$prio     = $tag->getSortPriority();
2030 165
		if (!$prioFlag)
2031
		{
2032 33
			$prio += (1 << 30);
2033
		}
2034
2035
		// Sort 0-width tags separately from the rest
2036 165
		$lenFlag = ($tag->getLen() > 0);
2037 165
		if ($lenFlag)
2038
		{
2039
			// Inverse their length so that longest matches are processed first
2040 118
			$lenOrder = $this->textLen - $tag->getLen();
2041
		}
2042
		else
2043
		{
2044
			// Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2045
			// pairs
2046
			$order = [
2047 88
				Tag::END_TAG          => 0,
2048 88
				Tag::SELF_CLOSING_TAG => 1,
2049 88
				Tag::START_TAG        => 2
2050
			];
2051 88
			$lenOrder = $order[$tag->getType()];
2052
		}
2053
2054 165
		return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2055
	}
2056
}