Completed
Push — master ( d91fed...fd66aa )
by Josh
17:36
created

Parser::addVerbatim()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
ccs 2
cts 2
cp 1
cc 1
nc 1
nop 3
crap 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\FilterProcessing;
13
use s9e\TextFormatter\Parser\Logger;
14
use s9e\TextFormatter\Parser\Tag;
15
16
class Parser
17
{
18
	/**#@+
19
	* Boolean rules bitfield
20
	*/
21
	const RULE_AUTO_CLOSE        = 1 << 0;
22
	const RULE_AUTO_REOPEN       = 1 << 1;
23
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
24
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
25
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
26
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
27
	const RULE_IGNORE_TAGS       = 1 << 6;
28
	const RULE_IGNORE_TEXT       = 1 << 7;
29
	const RULE_IGNORE_WHITESPACE = 1 << 8;
30
	const RULE_IS_TRANSPARENT    = 1 << 9;
31
	const RULE_PREVENT_BR        = 1 << 10;
32
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
33
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
34
	/**#@-*/
35
36
	/**
37
	* Bitwise disjunction of rules related to automatic line breaks
38
	*/
39
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
40
41
	/**
42
	* Bitwise disjunction of rules that are inherited by subcontexts
43
	*/
44
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
45
46
	/**
47
	* All the characters that are considered whitespace
48
	*/
49
	const WHITESPACE = " \n\t";
50
51
	/**
52
	* @var array Number of open tags for each tag name
53
	*/
54
	protected $cntOpen;
55
56
	/**
57
	* @var array Number of times each tag has been used
58
	*/
59
	protected $cntTotal;
60
61
	/**
62
	* @var array Current context
63
	*/
64
	protected $context;
65
66
	/**
67
	* @var integer How hard the parser has worked on fixing bad markup so far
68
	*/
69
	protected $currentFixingCost;
70
71
	/**
72
	* @var Tag Current tag being processed
73
	*/
74
	protected $currentTag;
75
76
	/**
77
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
78
	*/
79
	protected $isRich;
80
81
	/**
82
	* @var Logger This parser's logger
83
	*/
84
	protected $logger;
85
86
	/**
87
	* @var integer How hard the parser should work on fixing bad markup
88
	*/
89
	public $maxFixingCost = 10000;
90
91
	/**
92
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
93
	*/
94
	protected $namespaces;
95
96
	/**
97
	* @var array Stack of open tags (instances of Tag)
98
	*/
99
	protected $openTags;
100
101
	/**
102
	* @var string This parser's output
103
	*/
104
	protected $output;
105
106
	/**
107
	* @var integer Position of the cursor in the original text
108
	*/
109
	protected $pos;
110
111
	/**
112
	* @var array Array of callbacks, using plugin names as keys
113
	*/
114
	protected $pluginParsers = [];
115
116
	/**
117
	* @var array Associative array of [pluginName => pluginConfig]
118
	*/
119
	protected $pluginsConfig;
120
121
	/**
122
	* @var array Variables registered for use in filters
123
	*/
124
	public $registeredVars = [];
125
126
	/**
127
	* @var array Root context, used at the root of the document
128
	*/
129
	protected $rootContext;
130
131
	/**
132
	* @var array Tags' config
133
	*/
134
	protected $tagsConfig;
135
136
	/**
137
	* @var array Tag storage
138
	*/
139
	protected $tagStack;
140
141
	/**
142
	* @var bool Whether the tags in the stack are sorted
143
	*/
144
	protected $tagStackIsSorted;
145
146
	/**
147
	* @var string Text being parsed
148
	*/
149
	protected $text;
150
151
	/**
152
	* @var integer Length of the text being parsed
153
	*/
154
	protected $textLen;
155
156
	/**
157
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
158
	*              whether the parser was reset during execution
159
	*/
160
	protected $uid = 0;
161
162
	/**
163
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
164
	*/
165
	protected $wsPos;
166
167
	/**
168
	* Constructor
169
	*/
170 183
	public function __construct(array $config)
171
	{
172 183
		$this->pluginsConfig  = $config['plugins'];
173 183
		$this->registeredVars = $config['registeredVars'];
174 183
		$this->rootContext    = $config['rootContext'];
175 183
		$this->tagsConfig     = $config['tags'];
176
177 183
		$this->__wakeup();
178 183
	}
179
180
	/**
181
	* Serializer
182
	*
183
	* Returns the properties that need to persist through serialization.
184
	*
185
	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
186
	* of the serializer to the user (e.g. igbinary)
187
	*
188
	* @return array
189
	*/
190 2
	public function __sleep()
191
	{
192 2
		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
193
	}
194
195
	/**
196
	* Unserializer
197
	*
198
	* @return void
199
	*/
200 183
	public function __wakeup()
201
	{
202 183
		$this->logger = new Logger;
203 183
	}
204
205
	/**
206
	* Reset the parser for a new parsing
207
	*
208
	* @param  string $text Text to be parsed
209
	* @return void
210
	*/
211 175
	protected function reset($text)
212
	{
213
		// Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
214 175
		$text = preg_replace('/\\r\\n?/', "\n", $text);
215 175
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
216
217
		// Clear the logs
218 175
		$this->logger->clear();
219
220
		// Initialize the rest
221 175
		$this->cntOpen           = [];
222 175
		$this->cntTotal          = [];
223 175
		$this->currentFixingCost = 0;
224 175
		$this->currentTag        = null;
225 175
		$this->isRich            = false;
226 175
		$this->namespaces        = [];
227 175
		$this->openTags          = [];
228 175
		$this->output            = '';
229 175
		$this->pos               = 0;
230 175
		$this->tagStack          = [];
231 175
		$this->tagStackIsSorted  = false;
232 175
		$this->text              = $text;
233 175
		$this->textLen           = strlen($text);
234 175
		$this->wsPos             = 0;
235
236
		// Initialize the root context
237 175
		$this->context = $this->rootContext;
238 175
		$this->context['inParagraph'] = false;
239
240
		// Bump the UID
241 175
		++$this->uid;
242 175
	}
243
244
	/**
245
	* Set a tag's option
246
	*
247
	* This method ensures that the tag's config is a value and not a reference, to prevent
248
	* potential side-effects. References contained *inside* the tag's config are left untouched
249
	*
250
	* @param  string $tagName     Tag's name
251
	* @param  string $optionName  Option's name
252
	* @param  mixed  $optionValue Option's value
253
	* @return void
254
	*/
255 7
	protected function setTagOption($tagName, $optionName, $optionValue)
256
	{
257 7
		if (isset($this->tagsConfig[$tagName]))
258
		{
259
			// Copy the tag's config and remove it. That will destroy the reference
260 7
			$tagConfig = $this->tagsConfig[$tagName];
261 7
			unset($this->tagsConfig[$tagName]);
262
263
			// Set the new value and replace the tag's config
264 7
			$tagConfig[$optionName]     = $optionValue;
265 7
			$this->tagsConfig[$tagName] = $tagConfig;
266
		}
267 7
	}
268
269
	//==========================================================================
270
	// Public API
271
	//==========================================================================
272
273
	/**
274
	* Disable a tag
275
	*
276
	* @param  string $tagName Name of the tag
277
	* @return void
278
	*/
279 3
	public function disableTag($tagName)
280
	{
281 3
		$this->setTagOption($tagName, 'isDisabled', true);
282 3
	}
283
284
	/**
285
	* Enable a tag
286
	*
287
	* @param  string $tagName Name of the tag
288
	* @return void
289
	*/
290 1
	public function enableTag($tagName)
291
	{
292 1
		if (isset($this->tagsConfig[$tagName]))
293
		{
294 1
			unset($this->tagsConfig[$tagName]['isDisabled']);
295
		}
296 1
	}
297
298
	/**
299
	* Get this parser's Logger instance
300
	*
301
	* @return Logger
302
	*/
303 139
	public function getLogger()
304
	{
305 139
		return $this->logger;
306
	}
307
308
	/**
309
	* Return the last text parsed
310
	*
311
	* This method returns the normalized text, which may be slightly different from the original
312
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
313
	* meant to be used in support of processing log entries, which contain offsets based on the
314
	* normalized text
315
	*
316
	* @see Parser::reset()
317
	*
318
	* @return string
319
	*/
320 138
	public function getText()
321
	{
322 138
		return $this->text;
323
	}
324
325
	/**
326
	* Parse a text
327
	*
328
	* @param  string $text Text to parse
329
	* @return string       XML representation
330
	*/
331 175
	public function parse($text)
332
	{
333
		// Reset the parser and save the uid
334 175
		$this->reset($text);
335 175
		$uid = $this->uid;
336
337
		// Do the heavy lifting
338 175
		$this->executePluginParsers();
339 175
		$this->processTags();
340
341
		// Finalize the document
342 175
		$this->finalizeOutput();
343
344
		// Check the uid in case a plugin or a filter reset the parser mid-execution
345 175
		if ($this->uid !== $uid)
346
		{
347 1
			throw new RuntimeException('The parser has been reset during execution');
348
		}
349
350
		// Log a warning if the fixing cost limit was exceeded
351 175
		if ($this->currentFixingCost > $this->maxFixingCost)
352
		{
353 2
			$this->logger->warn('Fixing cost limit exceeded');
354
		}
355
356 175
		return $this->output;
357
	}
358
359
	/**
360
	* Change a tag's tagLimit
361
	*
362
	* NOTE: the default tagLimit should generally be set during configuration instead
363
	*
364
	* @param  string  $tagName  The tag's name, in UPPERCASE
365
	* @param  integer $tagLimit
366
	* @return void
367
	*/
368 2
	public function setTagLimit($tagName, $tagLimit)
369
	{
370 2
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
371 2
	}
372
373
	/**
374
	* Change a tag's nestingLimit
375
	*
376
	* NOTE: the default nestingLimit should generally be set during configuration instead
377
	*
378
	* @param  string  $tagName      The tag's name, in UPPERCASE
379
	* @param  integer $nestingLimit
380
	* @return void
381
	*/
382 2
	public function setNestingLimit($tagName, $nestingLimit)
383
	{
384 2
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
385 2
	}
386
387
	//==========================================================================
388
	// Output handling
389
	//==========================================================================
390
391
	/**
392
	* Finalize the output by appending the rest of the unprocessed text and create the root node
393
	*
394
	* @return void
395
	*/
396 175
	protected function finalizeOutput()
397
	{
398
		// Output the rest of the text and close the last paragraph
399 175
		$this->outputText($this->textLen, 0, true);
400
401
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
402
		do
403
		{
404 175
			$this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
405
		}
406 175
		while ($cnt > 0);
407
408
		// Merge consecutive <i> tags
409 175
		if (strpos($this->output, '</i><i>') !== false)
410
		{
411 1
			$this->output = str_replace('</i><i>', '', $this->output);
412
		}
413
414
		// Encode Unicode characters that are outside of the BMP
415 175
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
416
417
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
418 175
		$tagName = ($this->isRich) ? 'r' : 't';
419
420
		// Prepare the root node with all the namespace declarations
421 175
		$tmp = '<' . $tagName;
422 175
		foreach (array_keys($this->namespaces) as $prefix)
423
		{
424 2
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
425
		}
426
427 175
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
428 175
	}
429
430
	/**
431
	* Append a tag to the output
432
	*
433
	* @param  Tag  $tag Tag to append
434
	* @return void
435
	*/
436 134
	protected function outputTag(Tag $tag)
437
	{
438 134
		$this->isRich = true;
439
440 134
		$tagName  = $tag->getName();
441 134
		$tagPos   = $tag->getPos();
442 134
		$tagLen   = $tag->getLen();
443 134
		$tagFlags = $tag->getFlags();
444
445 134
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
446
		{
447 11
			$skipBefore = 1;
448 11
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
449
		}
450
		else
451
		{
452 129
			$skipBefore = $skipAfter = 0;
453
		}
454
455
		// Current paragraph must end before the tag if:
456
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
457
		//  - the tag is an end tag (but not self-closing)
458 134
		$closeParagraph = false;
459 134
		if ($tag->isStartTag())
460
		{
461 134
			if ($tagFlags & self::RULE_BREAK_PARAGRAPH)
462
			{
463 134
				$closeParagraph = true;
464
			}
465
		}
466
		else
467
		{
468 102
			$closeParagraph = true;
469
		}
470
471
		// Let the cursor catch up with this tag's position
472 134
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
473
474
		// Capture the text consumed by the tag
475 134
		$tagText = ($tagLen)
476 95
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
477 134
		         : '';
478
479
		// Output current tag
480 134
		if ($tag->isStartTag())
481
		{
482
			// Handle paragraphs before opening the tag
483 134
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
484
			{
485 133
				$this->outputParagraphStart($tagPos);
486
			}
487
488
			// Record this tag's namespace, if applicable
489 134
			$colonPos = strpos($tagName, ':');
490 134
			if ($colonPos)
491
			{
492 2
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
493
			}
494
495
			// Open the start tag and add its attributes, but don't close the tag
496 134
			$this->output .= '<' . $tagName;
497
498
			// We output the attributes in lexical order. Helps canonicalizing the output and could
499
			// prove useful someday
500 134
			$attributes = $tag->getAttributes();
501 134
			ksort($attributes);
502
503 134
			foreach ($attributes as $attrName => $attrValue)
504
			{
505 9
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
506
			}
507
508 134
			if ($tag->isSelfClosingTag())
509
			{
510 45
				if ($tagLen)
511
				{
512 35
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
513
				}
514
				else
515
				{
516 45
					$this->output .= '/>';
517
				}
518
			}
519 102
			elseif ($tagLen)
520
			{
521 68
				$this->output .= '><s>' . $tagText . '</s>';
522
			}
523
			else
524
			{
525 134
				$this->output .= '>';
526
			}
527
		}
528
		else
529
		{
530 102
			if ($tagLen)
531
			{
532 56
				$this->output .= '<e>' . $tagText . '</e>';
533
			}
534
535 102
			$this->output .= '</' . $tagName . '>';
536
		}
537
538
		// Move the cursor past the tag
539 134
		$this->pos = $tagPos + $tagLen;
540
541
		// Skip newlines (no other whitespace) after this tag
542 134
		$this->wsPos = $this->pos;
543 134
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
544
		{
545
			// Decrement the number of lines to skip
546 9
			--$skipAfter;
547
548
			// Move the cursor past the newline
549 9
			++$this->wsPos;
550
		}
551 134
	}
552
553
	/**
554
	* Output the text between the cursor's position (included) and given position (not included)
555
	*
556
	* @param  integer $catchupPos     Position we're catching up to
557
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
558
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
559
	* @return void
560
	*/
561 175
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
562
	{
563 175
		if ($closeParagraph)
564
		{
565 175
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
566
			{
567 164
				$closeParagraph = false;
568
			}
569
			else
570
			{
571
				// Ignore any number of lines at the end if we're closing a paragraph
572 18
				$maxLines = -1;
573
			}
574
		}
575
576 175
		if ($this->pos >= $catchupPos)
577
		{
578
			// We're already there, close the paragraph if applicable and return
579 132
			if ($closeParagraph)
580
			{
581 4
				$this->outputParagraphEnd();
582
			}
583
584 132
			return;
585
		}
586
587
		// Skip over previously identified whitespace if applicable
588 154
		if ($this->wsPos > $this->pos)
589
		{
590 9
			$skipPos       = min($catchupPos, $this->wsPos);
591 9
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
592 9
			$this->pos     = $skipPos;
593
594 9
			if ($this->pos >= $catchupPos)
595
			{
596
				// Skipped everything. Close the paragraph if applicable and return
597 2
				if ($closeParagraph)
598
				{
599 1
					$this->outputParagraphEnd();
600
				}
601
602 2
				return;
603
			}
604
		}
605
606
		// Test whether we're even supposed to output anything
607 154
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
608
		{
609 5
			$catchupLen  = $catchupPos - $this->pos;
610 5
			$catchupText = substr($this->text, $this->pos, $catchupLen);
611
612
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
613
			// tags
614 5
			if (strspn($catchupText, " \n\t") < $catchupLen)
615
			{
616 5
				$catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
617
			}
618
619 5
			$this->output .= $catchupText;
620 5
			$this->pos = $catchupPos;
621
622 5
			if ($closeParagraph)
623
			{
624 1
				$this->outputParagraphEnd();
625
			}
626
627 5
			return;
628
		}
629
630
		// Compute the amount of text to ignore at the end of the output
631 152
		$ignorePos = $catchupPos;
632 152
		$ignoreLen = 0;
633
634
		// Ignore as many lines (including whitespace) as specified
635 152
		while ($maxLines && --$ignorePos >= $this->pos)
636
		{
637 21
			$c = $this->text[$ignorePos];
638 21
			if (strpos(self::WHITESPACE, $c) === false)
639
			{
640 14
				break;
641
			}
642
643 12
			if ($c === "\n")
644
			{
645 10
				--$maxLines;
646
			}
647
648 12
			++$ignoreLen;
649
		}
650
651
		// Adjust $catchupPos to ignore the text at the end
652 152
		$catchupPos -= $ignoreLen;
653
654
		// Break down the text in paragraphs if applicable
655 152
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
656
		{
657 15
			if (!$this->context['inParagraph'])
658
			{
659 13
				$this->outputWhitespace($catchupPos);
660
661 13
				if ($catchupPos > $this->pos)
662
				{
663 10
					$this->outputParagraphStart($catchupPos);
664
				}
665
			}
666
667
			// Look for a paragraph break in this text
668 15
			$pbPos = strpos($this->text, "\n\n", $this->pos);
669
670 15
			while ($pbPos !== false && $pbPos < $catchupPos)
671
			{
672 3
				$this->outputText($pbPos, 0, true);
673 3
				$this->outputParagraphStart($catchupPos);
674
675 3
				$pbPos = strpos($this->text, "\n\n", $this->pos);
676
			}
677
		}
678
679
		// Capture, escape and output the text
680 152
		if ($catchupPos > $this->pos)
681
		{
682 150
			$catchupText = htmlspecialchars(
683 150
				substr($this->text, $this->pos, $catchupPos - $this->pos),
684 150
				ENT_NOQUOTES,
685 150
				'UTF-8'
686
			);
687
688
			// Format line breaks if applicable
689 150
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
690
			{
691 21
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
692
			}
693
694 150
			$this->output .= $catchupText;
695
		}
696
697
		// Close the paragraph if applicable
698 152
		if ($closeParagraph)
699
		{
700 14
			$this->outputParagraphEnd();
701
		}
702
703
		// Add the ignored text if applicable
704 152
		if ($ignoreLen)
705
		{
706 12
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
707
		}
708
709
		// Move the cursor past the text
710 152
		$this->pos = $catchupPos + $ignoreLen;
711 152
	}
712
713
	/**
714
	* Output a linebreak tag
715
	*
716
	* @param  Tag  $tag
717
	* @return void
718
	*/
719 6
	protected function outputBrTag(Tag $tag)
720
	{
721 6
		$this->outputText($tag->getPos(), 0, false);
722 6
		$this->output .= '<br/>';
723 6
	}
724
725
	/**
726
	* Output an ignore tag
727
	*
728
	* @param  Tag  $tag
729
	* @return void
730
	*/
731 18
	protected function outputIgnoreTag(Tag $tag)
732
	{
733 18
		$tagPos = $tag->getPos();
734 18
		$tagLen = $tag->getLen();
735
736
		// Capture the text to ignore
737 18
		$ignoreText = substr($this->text, $tagPos, $tagLen);
738
739
		// Catch up with the tag's position then output the tag
740 18
		$this->outputText($tagPos, 0, false);
741 18
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
742 18
		$this->isRich = true;
743
744
		// Move the cursor past this tag
745 18
		$this->pos = $tagPos + $tagLen;
746 18
	}
747
748
	/**
749
	* Start a paragraph between current position and given position, if applicable
750
	*
751
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
752
	* @return void
753
	*/
754 140
	protected function outputParagraphStart($maxPos)
755
	{
756
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
757 140
		if ($this->context['inParagraph']
758 140
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
759
		{
760 130
			return;
761
		}
762
763
		// Output the whitespace between $this->pos and $maxPos if applicable
764 16
		$this->outputWhitespace($maxPos);
765
766
		// Open the paragraph, but only if it's not at the very end of the text
767 16
		if ($this->pos < $this->textLen)
768
		{
769 16
			$this->output .= '<p>';
770 16
			$this->context['inParagraph'] = true;
771
		}
772 16
	}
773
774
	/**
775
	* Close current paragraph at current position if applicable
776
	*
777
	* @return void
778
	*/
779 18
	protected function outputParagraphEnd()
780
	{
781
		// Do nothing if we're not in a paragraph
782 18
		if (!$this->context['inParagraph'])
783
		{
784 3
			return;
785
		}
786
787 16
		$this->output .= '</p>';
788 16
		$this->context['inParagraph'] = false;
789 16
	}
790
791
	/**
792
	* Output the content of a verbatim tag
793
	*
794
	* @param  Tag  $tag
795
	* @return void
796
	*/
797 4
	protected function outputVerbatim(Tag $tag)
798
	{
799 4
		$flags = $this->context['flags'];
800 4
		$this->context['flags'] = $tag->getFlags();
801 4
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
802 4
		$this->context['flags'] = $flags;
803 4
	}
804
805
	/**
806
	* Skip as much whitespace after current position as possible
807
	*
808
	* @param  integer $maxPos Rightmost character to be skipped
809
	* @return void
810
	*/
811 18
	protected function outputWhitespace($maxPos)
812
	{
813 18
		if ($maxPos > $this->pos)
814
		{
815 13
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
816
817 13
			if ($spn)
818
			{
819 6
				$this->output .= substr($this->text, $this->pos, $spn);
820 6
				$this->pos += $spn;
821
			}
822
		}
823 18
	}
824
825
	//==========================================================================
826
	// Plugins handling
827
	//==========================================================================
828
829
	/**
830
	* Disable a plugin
831
	*
832
	* @param  string $pluginName Name of the plugin
833
	* @return void
834
	*/
835 5
	public function disablePlugin($pluginName)
836
	{
837 5
		if (isset($this->pluginsConfig[$pluginName]))
838
		{
839
			// Copy the plugin's config to remove the reference
840 4
			$pluginConfig = $this->pluginsConfig[$pluginName];
841 4
			unset($this->pluginsConfig[$pluginName]);
842
843
			// Update the value and replace the plugin's config
844 4
			$pluginConfig['isDisabled'] = true;
845 4
			$this->pluginsConfig[$pluginName] = $pluginConfig;
846
		}
847 5
	}
848
849
	/**
850
	* Enable a plugin
851
	*
852
	* @param  string $pluginName Name of the plugin
853
	* @return void
854
	*/
855 2
	public function enablePlugin($pluginName)
856
	{
857 2
		if (isset($this->pluginsConfig[$pluginName]))
858
		{
859 1
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
860
		}
861 2
	}
862
863
	/**
864
	* Execute given plugin
865
	*
866
	* @param  string $pluginName Plugin's name
867
	* @return void
868
	*/
869 176
	protected function executePluginParser($pluginName)
870
	{
871 176
		$pluginConfig = $this->pluginsConfig[$pluginName];
872 176
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
873
		{
874 1
			return;
875
		}
876
877 175
		$matches = [];
878 175
		if (isset($pluginConfig['regexp']))
879
		{
880 6
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
881 6
			if (empty($matches))
882
			{
883 1
				return;
884
			}
885
		}
886
887
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
888 174
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
889 174
	}
890
891
	/**
892
	* Execute all the plugins
893
	*
894
	* @return void
895
	*/
896 186
	protected function executePluginParsers()
897
	{
898 186
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
899
		{
900 177
			if (empty($pluginConfig['isDisabled']))
901
			{
902 177
				$this->executePluginParser($pluginName);
903
			}
904
		}
905 186
	}
906
907
	/**
908
	* Execute given regexp and returns as many matches as given limit
909
	*
910
	* @param  string  $regexp
911
	* @param  integer $limit
912
	* @return array
913
	*/
914 6
	protected function getMatches($regexp, $limit)
915
	{
916 6
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
917 6
		if ($cnt > $limit)
918
		{
919 2
			$matches = array_slice($matches, 0, $limit);
920
		}
921
922 6
		return $matches;
923
	}
924
925
	/**
926
	* Get the cached callback for given plugin's parser
927
	*
928
	* @param  string $pluginName Plugin's name
929
	* @return callable
930
	*/
931 174
	protected function getPluginParser($pluginName)
932
	{
933
		// Cache a new instance of this plugin's parser if there isn't one already
934 174
		if (!isset($this->pluginParsers[$pluginName]))
935
		{
936 1
			$pluginConfig = $this->pluginsConfig[$pluginName];
937 1
			$className = (isset($pluginConfig['className']))
938 1
			           ? $pluginConfig['className']
939 1
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
940
941
			// Register the parser as a callback
942 1
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
943
		}
944
945 174
		return $this->pluginParsers[$pluginName];
946
	}
947
948
	/**
949
	* Register a parser
950
	*
951
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
952
	* existing plugin
953
	*
954
	* @param  string   $pluginName
955
	* @param  callback $parser
956
	* @return void
957
	*/
958 169
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
959
	{
960 169
		if (!is_callable($parser))
961
		{
962 1
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
963
		}
964
		// Create an empty config for this plugin to ensure it is executed
965 168
		if (!isset($this->pluginsConfig[$pluginName]))
966
		{
967 167
			$this->pluginsConfig[$pluginName] = [];
968
		}
969 168
		if (isset($regexp))
970
		{
971 2
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
972 2
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
973
		}
974 168
		$this->pluginParsers[$pluginName] = $parser;
975 168
	}
976
977
	//==========================================================================
978
	// Rules handling
979
	//==========================================================================
980
981
	/**
982
	* Apply closeAncestor rules associated with given tag
983
	*
984
	* @param  Tag  $tag Tag
985
	* @return bool      Whether a new tag has been added
986
	*/
987 133
	protected function closeAncestor(Tag $tag)
988
	{
989 133
		if (!empty($this->openTags))
990
		{
991 61
			$tagName   = $tag->getName();
992 61
			$tagConfig = $this->tagsConfig[$tagName];
993
994 61
			if (!empty($tagConfig['rules']['closeAncestor']))
995
			{
996 5
				$i = count($this->openTags);
997
998 5
				while (--$i >= 0)
999
				{
1000 5
					$ancestor     = $this->openTags[$i];
1001 5
					$ancestorName = $ancestor->getName();
1002
1003 5
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1004
					{
1005 4
						++$this->currentFixingCost;
1006
1007
						// We have to close this ancestor. First we reinsert this tag...
1008 4
						$this->tagStack[] = $tag;
1009
1010
						// ...then we add a new end tag for it with a better priority
1011 4
						$this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1012
1013 4
						return true;
1014
					}
1015
				}
1016
			}
1017
		}
1018
1019 133
		return false;
1020
	}
1021
1022
	/**
1023
	* Apply closeParent rules associated with given tag
1024
	*
1025
	* @param  Tag  $tag Tag
1026
	* @return bool      Whether a new tag has been added
1027
	*/
1028 133
	protected function closeParent(Tag $tag)
1029
	{
1030 133
		if (!empty($this->openTags))
1031
		{
1032 65
			$tagName   = $tag->getName();
1033 65
			$tagConfig = $this->tagsConfig[$tagName];
1034
1035 65
			if (!empty($tagConfig['rules']['closeParent']))
1036
			{
1037 7
				$parent     = end($this->openTags);
1038 7
				$parentName = $parent->getName();
1039
1040 7
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1041
				{
1042 6
					++$this->currentFixingCost;
1043
1044
					// We have to close that parent. First we reinsert the tag...
1045 6
					$this->tagStack[] = $tag;
1046
1047
					// ...then we add a new end tag for it with a better priority
1048 6
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1049
1050 6
					return true;
1051
				}
1052
			}
1053
		}
1054
1055 133
		return false;
1056
	}
1057
1058
	/**
1059
	* Apply the createChild rules associated with given tag
1060
	*
1061
	* @param  Tag  $tag Tag
1062
	* @return void
1063
	*/
1064 134
	protected function createChild(Tag $tag)
1065
	{
1066 134
		$tagConfig = $this->tagsConfig[$tag->getName()];
1067 134
		if (isset($tagConfig['rules']['createChild']))
1068
		{
1069 3
			$priority = -1000;
1070 3
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1071 3
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1072
			{
1073 3
				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1074
			}
1075
		}
1076 134
	}
1077
1078
	/**
1079
	* Apply fosterParent rules associated with given tag
1080
	*
1081
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1082
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1083
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1084
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1085
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1086
	*       loop from running indefinitely
1087
	*
1088
	* @param  Tag  $tag Tag
1089
	* @return bool      Whether a new tag has been added
1090
	*/
1091 133
	protected function fosterParent(Tag $tag)
1092
	{
1093 133
		if (!empty($this->openTags))
1094
		{
1095 68
			$tagName   = $tag->getName();
1096 68
			$tagConfig = $this->tagsConfig[$tagName];
1097
1098 68
			if (!empty($tagConfig['rules']['fosterParent']))
1099
			{
1100 13
				$parent     = end($this->openTags);
1101 13
				$parentName = $parent->getName();
1102
1103 13
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1104
				{
1105 12
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1106
					{
1107 11
						$this->addFosterTag($tag, $parent);
1108
					}
1109
1110
					// Reinsert current tag
1111 12
					$this->tagStack[] = $tag;
1112
1113
					// And finally close its parent with a priority that ensures it is processed
1114
					// before this tag
1115 12
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1116
1117
					// Adjust the fixing cost to account for the additional tags/processing
1118 12
					$this->currentFixingCost += 4;
1119
1120 12
					return true;
1121
				}
1122
			}
1123
		}
1124
1125 133
		return false;
1126
	}
1127
1128
	/**
1129
	* Apply requireAncestor rules associated with given tag
1130
	*
1131
	* @param  Tag  $tag Tag
1132
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1133
	*/
1134 136
	protected function requireAncestor(Tag $tag)
1135
	{
1136 136
		$tagName   = $tag->getName();
1137 136
		$tagConfig = $this->tagsConfig[$tagName];
1138
1139 136
		if (isset($tagConfig['rules']['requireAncestor']))
1140
		{
1141 3
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1142
			{
1143 3
				if (!empty($this->cntOpen[$ancestorName]))
1144
				{
1145 3
					return false;
1146
				}
1147
			}
1148
1149 2
			$this->logger->err('Tag requires an ancestor', [
1150 2
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1151 2
				'tag'             => $tag
1152
			]);
1153
1154 2
			return true;
1155
		}
1156
1157 134
		return false;
1158
	}
1159
1160
	//==========================================================================
1161
	// Tag processing
1162
	//==========================================================================
1163
1164
	/**
1165
	* Create and add a copy of a tag as a child of a given tag
1166
	*
1167
	* @param  Tag  $tag       Current tag
1168
	* @param  Tag  $fosterTag Tag to foster
1169
	* @return void
1170
	*/
1171 11
	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1172
	{
1173 11
		list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1174
1175
		// Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1176 11
		$childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1177 11
		$tag->cascadeInvalidationTo($childTag);
1178 11
	}
1179
1180
	/**
1181
	* Create and add an end tag for given start tag at given position
1182
	*
1183
	* @param  Tag     $startTag Start tag
1184
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1185
	* @param  integer $prio     End tag's priority
1186
	* @return Tag
1187
	*/
1188 35
	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1189
	{
1190 35
		$tagName = $startTag->getName();
1191
1192
		// Adjust the end tag's position if whitespace is to be minimized
1193 35
		if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1194
		{
1195 3
			$tagPos = $this->getMagicEndPos($tagPos);
1196
		}
1197
1198
		// Add a 0-width end tag that is paired with the given start tag
1199 35
		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1200 35
		$endTag->pairWith($startTag);
1201
1202 35
		return $endTag;
1203
	}
1204
1205
	/**
1206
	* Compute the position of a magic end tag, adjusted for whitespace
1207
	*
1208
	* @param  integer $tagPos Rightmost possible position for the tag
1209
	* @return integer
1210
	*/
1211 5
	protected function getMagicEndPos($tagPos)
1212
	{
1213
		// Back up from given position to the cursor's position until we find a character that
1214
		// is not whitespace
1215 5
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1216
		{
1217 5
			--$tagPos;
1218
		}
1219
1220 5
		return $tagPos;
1221
	}
1222
1223
	/**
1224
	* Compute the position and priority of a magic start tag, adjusted for whitespace
1225
	*
1226
	* @param  integer   $tagPos Leftmost possible position for the tag
1227
	* @return integer[]         [Tag pos, priority]
1228
	*/
1229 11
	protected function getMagicStartCoords($tagPos)
1230
	{
1231 11
		if (empty($this->tagStack))
1232
		{
1233
			// Set the next position outside the text boundaries
1234 3
			$nextPos  = $this->textLen + 1;
1235 3
			$nextPrio = 0;
1236
		}
1237
		else
1238
		{
1239 10
			$nextTag  = end($this->tagStack);
1240 10
			$nextPos  = $nextTag->getPos();
1241 10
			$nextPrio = $nextTag->getSortPriority();
1242
		}
1243
1244
		// Find the first non-whitespace position before next tag or the end of text
1245 11
		while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1246
		{
1247 1
			++$tagPos;
1248
		}
1249
1250
		// Set a priority that ensures this tag appears before the next tag
1251 11
		$prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1252
1253 11
		return [$tagPos, $prio];
1254
	}
1255
1256
	/**
1257
	* Test whether given start tag is immediately followed by a closing tag
1258
	*
1259
	* @param  Tag  $tag Start tag
1260
	* @return bool
1261
	*/
1262 3
	protected function isFollowedByClosingTag(Tag $tag)
1263
	{
1264 3
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1265
	}
1266
1267
	/**
1268
	* Process all tags in the stack
1269
	*
1270
	* @return void
1271
	*/
1272 175
	protected function processTags()
1273
	{
1274 175
		if (empty($this->tagStack))
1275
		{
1276 22
			return;
1277
		}
1278
1279
		// Initialize the count tables
1280 153
		foreach (array_keys($this->tagsConfig) as $tagName)
1281
		{
1282 139
			$this->cntOpen[$tagName]  = 0;
1283 139
			$this->cntTotal[$tagName] = 0;
1284
		}
1285
1286
		// Process the tag stack, close tags that were left open and repeat until done
1287
		do
1288
		{
1289 153
			while (!empty($this->tagStack))
1290
			{
1291 153
				if (!$this->tagStackIsSorted)
1292
				{
1293 153
					$this->sortTags();
1294
				}
1295
1296 153
				$this->currentTag = array_pop($this->tagStack);
1297 153
				$this->processCurrentTag();
1298
			}
1299
1300
			// Close tags that were left open
1301 153
			foreach ($this->openTags as $startTag)
1302
			{
1303
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1304
				//       the stack is processed in LIFO order, it means that tags get closed in
1305
				//       the correct order, from descendants to ancestors
1306 19
				$this->addMagicEndTag($startTag, $this->textLen);
1307
			}
1308
		}
1309 153
		while (!empty($this->tagStack));
1310 153
	}
1311
1312
	/**
1313
	* Process current tag
1314
	*
1315
	* @return void
1316
	*/
1317 153
	protected function processCurrentTag()
1318
	{
1319
		// Invalidate current tag if tags are disabled and current tag would not close the last open
1320
		// tag and is not a system tag
1321 153
		if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1322 153
		 && !$this->currentTag->canClose(end($this->openTags))
1323 153
		 && !$this->currentTag->isSystemTag())
1324
		{
1325 4
			$this->currentTag->invalidate();
1326
		}
1327
1328 153
		$tagPos = $this->currentTag->getPos();
1329 153
		$tagLen = $this->currentTag->getLen();
1330
1331
		// Test whether the cursor passed this tag's position already
1332 153
		if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1333
		{
1334
			// Test whether this tag is paired with a start tag and this tag is still open
1335 15
			$startTag = $this->currentTag->getStartTag();
1336
1337 15
			if ($startTag && in_array($startTag, $this->openTags, true))
1338
			{
1339
				// Create an end tag that matches current tag's start tag, which consumes as much of
1340
				// the same text as current tag and is paired with the same start tag
1341 2
				$this->addEndTag(
1342 2
					$startTag->getName(),
1343 2
					$this->pos,
1344 2
					max(0, $tagPos + $tagLen - $this->pos)
1345 2
				)->pairWith($startTag);
1346
1347
				// Note that current tag is not invalidated, it's merely replaced
1348 2
				return;
1349
			}
1350
1351
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1352 13
			if ($this->currentTag->isIgnoreTag())
1353
			{
1354 2
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1355
1356 2
				if ($ignoreLen > 0)
1357
				{
1358
					// Create a new ignore tag and move on
1359 1
					$this->addIgnoreTag($this->pos, $ignoreLen);
1360
1361 1
					return;
1362
				}
1363
			}
1364
1365
			// Skipped tags are invalidated
1366 12
			$this->currentTag->invalidate();
1367
		}
1368
1369 153
		if ($this->currentTag->isInvalid())
1370
		{
1371 17
			return;
1372
		}
1373
1374 153
		if ($this->currentTag->isIgnoreTag())
1375
		{
1376 10
			$this->outputIgnoreTag($this->currentTag);
1377
		}
1378 148
		elseif ($this->currentTag->isBrTag())
1379
		{
1380
			// Output the tag if it's allowed, ignore it otherwise
1381 7
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1382
			{
1383 7
				$this->outputBrTag($this->currentTag);
1384
			}
1385
		}
1386 144
		elseif ($this->currentTag->isParagraphBreak())
1387
		{
1388 4
			$this->outputText($this->currentTag->getPos(), 0, true);
1389
		}
1390 141
		elseif ($this->currentTag->isVerbatim())
1391
		{
1392 4
			$this->outputVerbatim($this->currentTag);
1393
		}
1394 137
		elseif ($this->currentTag->isStartTag())
1395
		{
1396 136
			$this->processStartTag($this->currentTag);
1397
		}
1398
		else
1399
		{
1400 103
			$this->processEndTag($this->currentTag);
1401
		}
1402 153
	}
1403
1404
	/**
1405
	* Process given start tag (including self-closing tags) at current position
1406
	*
1407
	* @param  Tag  $tag Start tag (including self-closing)
1408
	* @return void
1409
	*/
1410 136
	protected function processStartTag(Tag $tag)
1411
	{
1412 136
		$tagName   = $tag->getName();
1413 136
		$tagConfig = $this->tagsConfig[$tagName];
1414
1415
		// 1. Check that this tag has not reached its global limit tagLimit
1416
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1417
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1418
		// 4. Check for nestingLimit
1419
		// 5. Apply requireAncestor rules
1420
		//
1421
		// This order ensures that the tag is valid and within the set limits before we attempt to
1422
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1423
		// limits, whether this tag is allowed within current context (the context may change
1424
		// as ancestors are closed) or whether the required ancestors are still there (they might
1425
		// have been closed by a rule.)
1426 136
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1427
		{
1428 2
			$this->logger->err(
1429 2
				'Tag limit exceeded',
1430
				[
1431 2
					'tag'      => $tag,
1432 2
					'tagName'  => $tagName,
1433 2
					'tagLimit' => $tagConfig['tagLimit']
1434
				]
1435
			);
1436 2
			$tag->invalidate();
1437
1438 2
			return;
1439
		}
1440
1441 136
		FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1442 136
		if ($tag->isInvalid())
1443
		{
1444 1
			return;
1445
		}
1446
1447 136
		if ($this->currentFixingCost < $this->maxFixingCost)
1448
		{
1449 133
			if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1450
			{
1451
				// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1452 21
				return;
1453
			}
1454
		}
1455
1456 136
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1457
		{
1458 2
			$this->logger->err(
1459 2
				'Nesting limit exceeded',
1460
				[
1461 2
					'tag'          => $tag,
1462 2
					'tagName'      => $tagName,
1463 2
					'nestingLimit' => $tagConfig['nestingLimit']
1464
				]
1465
			);
1466 2
			$tag->invalidate();
1467
1468 2
			return;
1469
		}
1470
1471 136
		if (!$this->tagIsAllowed($tagName))
1472
		{
1473 7
			$msg     = 'Tag is not allowed in this context';
1474 7
			$context = ['tag' => $tag, 'tagName' => $tagName];
1475 7
			if ($tag->getLen() > 0)
1476
			{
1477 6
				$this->logger->warn($msg, $context);
1478
			}
1479
			else
1480
			{
1481 1
				$this->logger->debug($msg, $context);
1482
			}
1483 7
			$tag->invalidate();
1484
1485 7
			return;
1486
		}
1487
1488 136
		if ($this->requireAncestor($tag))
1489
		{
1490 2
			$tag->invalidate();
1491
1492 2
			return;
1493
		}
1494
1495
		// If this tag has an autoClose rule and it's not paired with an end tag or followed by an
1496
		// end tag, we replace it with a self-closing tag with the same properties
1497 134
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1498 134
		 && !$tag->getEndTag()
1499 134
		 && !$this->isFollowedByClosingTag($tag))
1500
		{
1501 2
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1502 2
			$newTag->setAttributes($tag->getAttributes());
1503 2
			$newTag->setFlags($tag->getFlags());
1504
1505 2
			$tag = $newTag;
1506
		}
1507
1508 134
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1509 134
		 && !$tag->getEndTag()
1510 134
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1511
		{
1512 1
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1513
		}
1514
1515
		// This tag is valid, output it and update the context
1516 134
		$this->outputTag($tag);
1517 134
		$this->pushContext($tag);
1518
1519
		// Apply the createChild rules if applicable
1520 134
		$this->createChild($tag);
1521 134
	}
1522
1523
	/**
1524
	* Process given end tag at current position
1525
	*
1526
	* @param  Tag  $tag end tag
1527
	* @return void
1528
	*/
1529 103
	protected function processEndTag(Tag $tag)
1530
	{
1531 103
		$tagName = $tag->getName();
1532
1533 103
		if (empty($this->cntOpen[$tagName]))
1534
		{
1535
			// This is an end tag with no start tag
1536 9
			return;
1537
		}
1538
1539
		/**
1540
		* @var array List of tags need to be closed before given tag
1541
		*/
1542 102
		$closeTags = [];
1543
1544
		// Iterate through all open tags from last to first to find a match for our tag
1545 102
		$i = count($this->openTags);
1546 102
		while (--$i >= 0)
1547
		{
1548 102
			$openTag = $this->openTags[$i];
1549
1550 102
			if ($tag->canClose($openTag))
1551
			{
1552 102
				break;
1553
			}
1554
1555 26
			$closeTags[] = $openTag;
1556 26
			++$this->currentFixingCost;
1557
		}
1558
1559 102
		if ($i < 0)
1560
		{
1561
			// Did not find a matching tag
1562 2
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1563
1564 2
			return;
1565
		}
1566
1567
		// Accumulate flags to determine whether whitespace should be trimmed
1568 102
		$flags = $tag->getFlags();
1569 102
		foreach ($closeTags as $openTag)
1570
		{
1571 25
			$flags |= $openTag->getFlags();
1572
		}
1573 102
		$ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1574
1575
		// Only reopen tags if we haven't exceeded our "fixing" budget
1576 102
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1577
1578
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1579
		// reopened
1580 102
		$reopenTags = [];
1581 102
		foreach ($closeTags as $openTag)
1582
		{
1583 25
			$openTagName = $openTag->getName();
1584
1585
			// Test whether this tag should be reopened automatically
1586 25
			if ($keepReopening)
1587
			{
1588 23
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1589
				{
1590 12
					$reopenTags[] = $openTag;
1591
				}
1592
				else
1593
				{
1594 11
					$keepReopening = false;
1595
				}
1596
			}
1597
1598
			// Find the earliest position we can close this open tag
1599 25
			$tagPos = $tag->getPos();
1600 25
			if ($ignoreWhitespace)
1601
			{
1602 5
				$tagPos = $this->getMagicEndPos($tagPos);
1603
			}
1604
1605
			// Output an end tag to close this start tag, then update the context
1606 25
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1607 25
			$endTag->setFlags($openTag->getFlags());
1608 25
			$this->outputTag($endTag);
1609 25
			$this->popContext();
1610
		}
1611
1612
		// Output our tag, moving the cursor past it, then update the context
1613 102
		$this->outputTag($tag);
1614 102
		$this->popContext();
1615
1616
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1617
		// close tags that are already being closed now. Also, filter our list of tags being
1618
		// reopened by removing those that would immediately be closed
1619 102
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1620
		{
1621
			/**
1622
			* @var integer Rightmost position of the portion of text to ignore
1623
			*/
1624 23
			$ignorePos = $this->pos;
1625
1626 23
			$i = count($this->tagStack);
1627 23
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1628
			{
1629 15
				$upcomingTag = $this->tagStack[$i];
1630
1631
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1632
				// strictly an end tag (not a start tag or a self-closing tag)
1633 15
				if ($upcomingTag->getPos() > $ignorePos
1634 15
				 || $upcomingTag->isStartTag())
1635
				{
1636 9
					break;
1637
				}
1638
1639
				// Test whether this tag would close any of the tags we're about to reopen
1640 10
				$j = count($closeTags);
1641
1642 10
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1643
				{
1644 10
					if ($upcomingTag->canClose($closeTags[$j]))
1645
					{
1646
						// Remove the tag from the lists and reset the keys
1647 9
						array_splice($closeTags, $j, 1);
1648
1649 9
						if (isset($reopenTags[$j]))
1650
						{
1651 7
							array_splice($reopenTags, $j, 1);
1652
						}
1653
1654
						// Extend the ignored text to cover this tag
1655 9
						$ignorePos = max(
1656 9
							$ignorePos,
1657 9
							$upcomingTag->getPos() + $upcomingTag->getLen()
1658
						);
1659
1660 9
						break;
1661
					}
1662
				}
1663
			}
1664
1665 23
			if ($ignorePos > $this->pos)
1666
			{
1667
				/**
1668
				* @todo have a method that takes (pos,len) rather than a Tag
1669
				*/
1670 8
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1671
			}
1672
		}
1673
1674
		// Re-add tags that need to be reopened, at current cursor position
1675 102
		foreach ($reopenTags as $startTag)
1676
		{
1677 8
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1678
1679
			// Re-pair the new tag
1680 8
			$endTag = $startTag->getEndTag();
1681 8
			if ($endTag)
1682
			{
1683 8
				$newTag->pairWith($endTag);
1684
			}
1685
		}
1686 102
	}
1687
1688
	/**
1689
	* Update counters and replace current context with its parent context
1690
	*
1691
	* @return void
1692
	*/
1693 102
	protected function popContext()
1694
	{
1695 102
		$tag = array_pop($this->openTags);
1696 102
		--$this->cntOpen[$tag->getName()];
1697 102
		$this->context = $this->context['parentContext'];
1698 102
	}
1699
1700
	/**
1701
	* Update counters and replace current context with a new context based on given tag
1702
	*
1703
	* If given tag is a self-closing tag, the context won't change
1704
	*
1705
	* @param  Tag  $tag Start tag (including self-closing)
1706
	* @return void
1707
	*/
1708 134
	protected function pushContext(Tag $tag)
1709
	{
1710 134
		$tagName   = $tag->getName();
1711 134
		$tagFlags  = $tag->getFlags();
1712 134
		$tagConfig = $this->tagsConfig[$tagName];
1713
1714 134
		++$this->cntTotal[$tagName];
1715
1716
		// If this is a self-closing tag, the context remains the same
1717 134
		if ($tag->isSelfClosingTag())
1718
		{
1719 45
			return;
1720
		}
1721
1722
		// Recompute the allowed tags
1723 102
		$allowed = [];
1724 102
		if ($tagFlags & self::RULE_IS_TRANSPARENT)
1725
		{
1726 46
			foreach ($this->context['allowed'] as $k => $v)
1727
			{
1728 46
				$allowed[] = $tagConfig['allowed'][$k] & $v;
1729
			}
1730
		}
1731
		else
1732
		{
1733 57
			foreach ($this->context['allowed'] as $k => $v)
1734
			{
1735 57
				$allowed[] = $tagConfig['allowed'][$k] & (($v & 0xFF00) | ($v >> 8));
1736
			}
1737
		}
1738
1739
		// Use this tag's flags as a base for this context and add inherited rules
1740 102
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1741
1742
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1743 102
		if ($flags & self::RULE_DISABLE_AUTO_BR)
1744
		{
1745 2
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1746
		}
1747
1748 102
		++$this->cntOpen[$tagName];
1749 102
		$this->openTags[] = $tag;
1750 102
		$this->context = [
1751 102
			'allowed'       => $allowed,
1752 102
			'flags'         => $flags,
1753
			'inParagraph'   => false,
1754 102
			'parentContext' => $this->context
1755
		];
1756 102
	}
1757
1758
	/**
1759
	* Return whether given tag is allowed in current context
1760
	*
1761
	* @param  string $tagName
1762
	* @return bool
1763
	*/
1764 136
	protected function tagIsAllowed($tagName)
1765
	{
1766 136
		$n = $this->tagsConfig[$tagName]['bitNumber'];
1767
1768 136
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1769
	}
1770
1771
	//==========================================================================
1772
	// Tag stack
1773
	//==========================================================================
1774
1775
	/**
1776
	* Add a start tag
1777
	*
1778
	* @param  string  $name Name of the tag
1779
	* @param  integer $pos  Position of the tag in the text
1780
	* @param  integer $len  Length of text consumed by the tag
1781
	* @param  integer $prio Tag's priority
1782
	* @return Tag
1783
	*/
1784 120
	public function addStartTag($name, $pos, $len, $prio = 0)
1785
	{
1786 120
		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1787
	}
1788
1789
	/**
1790
	* Add an end tag
1791
	*
1792
	* @param  string  $name Name of the tag
1793
	* @param  integer $pos  Position of the tag in the text
1794
	* @param  integer $len  Length of text consumed by the tag
1795
	* @param  integer $prio Tag's priority
1796
	* @return Tag
1797
	*/
1798 108
	public function addEndTag($name, $pos, $len, $prio = 0)
1799
	{
1800 108
		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1801
	}
1802
1803
	/**
1804
	* Add a self-closing tag
1805
	*
1806
	* @param  string  $name Name of the tag
1807
	* @param  integer $pos  Position of the tag in the text
1808
	* @param  integer $len  Length of text consumed by the tag
1809
	* @param  integer $prio Tag's priority
1810
	* @return Tag
1811
	*/
1812 64
	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1813
	{
1814 64
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1815
	}
1816
1817
	/**
1818
	* Add a 0-width "br" tag to force a line break at given position
1819
	*
1820
	* @param  integer $pos  Position of the tag in the text
1821
	* @param  integer $prio Tag's priority
1822
	* @return Tag
1823
	*/
1824 9
	public function addBrTag($pos, $prio = 0)
1825
	{
1826 9
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1827
	}
1828
1829
	/**
1830
	* Add an "ignore" tag
1831
	*
1832
	* @param  integer $pos  Position of the tag in the text
1833
	* @param  integer $len  Length of text consumed by the tag
1834
	* @param  integer $prio Tag's priority
1835
	* @return Tag
1836
	*/
1837 12
	public function addIgnoreTag($pos, $len, $prio = 0)
1838
	{
1839 12
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1840
	}
1841
1842
	/**
1843
	* Add a paragraph break at given position
1844
	*
1845
	* Uses a zero-width tag that is actually never output in the result
1846
	*
1847
	* @param  integer $pos  Position of the tag in the text
1848
	* @param  integer $prio Tag's priority
1849
	* @return Tag
1850
	*/
1851 5
	public function addParagraphBreak($pos, $prio = 0)
1852
	{
1853 5
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1854
	}
1855
1856
	/**
1857
	* Add a copy of given tag at given position and length
1858
	*
1859
	* @param  Tag     $tag  Original tag
1860
	* @param  integer $pos  Copy's position
1861
	* @param  integer $len  Copy's length
1862
	* @param  integer $prio Copy's priority (same as original by default)
1863
	* @return Tag           Copy tag
1864
	*/
1865 22
	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1866
	{
1867 22
		if (!isset($prio))
1868
		{
1869 11
			$prio = $tag->getSortPriority();
1870
		}
1871 22
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1872 22
		$copy->setAttributes($tag->getAttributes());
1873
1874 22
		return $copy;
1875
	}
1876
1877
	/**
1878
	* Add a tag
1879
	*
1880
	* @param  integer $type Tag's type
1881
	* @param  string  $name Name of the tag
1882
	* @param  integer $pos  Position of the tag in the text
1883
	* @param  integer $len  Length of text consumed by the tag
1884
	* @param  integer $prio Tag's priority
1885
	* @return Tag
1886
	*/
1887 179
	protected function addTag($type, $name, $pos, $len, $prio)
1888
	{
1889
		// Create the tag
1890 179
		$tag = new Tag($type, $name, $pos, $len, $prio);
1891
1892
		// Set this tag's rules bitfield
1893 179
		if (isset($this->tagsConfig[$name]))
1894
		{
1895 159
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1896
		}
1897
1898
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1899
		// position is negative or if it's out of bounds
1900 179
		if (!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1901
		{
1902 2
			$tag->invalidate();
1903
		}
1904 177
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1905
		{
1906 1
			$this->logger->warn(
1907 1
				'Tag is disabled',
1908
				[
1909 1
					'tag'     => $tag,
1910 1
					'tagName' => $name
1911
				]
1912
			);
1913 1
			$tag->invalidate();
1914
		}
1915 176
		elseif ($len < 0 || $pos < 0 || $pos + $len > $this->textLen)
1916
		{
1917 6
			$tag->invalidate();
1918
		}
1919
		else
1920
		{
1921 172
			$this->insertTag($tag);
1922
		}
1923
1924 179
		return $tag;
1925
	}
1926
1927
	/**
1928
	* Insert given tag in the tag stack
1929
	*
1930
	* @param  Tag  $tag
1931
	* @return void
1932
	*/
1933 172
	protected function insertTag(Tag $tag)
1934
	{
1935 172
		if (!$this->tagStackIsSorted)
1936
		{
1937 172
			$this->tagStack[] = $tag;
1938
		}
1939
		else
1940
		{
1941
			// Scan the stack and copy every tag to the next slot until we find the correct index
1942 49
			$i = count($this->tagStack);
1943 49
			while ($i > 0 && self::compareTags($this->tagStack[$i - 1], $tag) > 0)
1944
			{
1945 3
				$this->tagStack[$i] = $this->tagStack[$i - 1];
1946 3
				--$i;
1947
			}
1948 49
			$this->tagStack[$i] = $tag;
1949
		}
1950 172
	}
1951
1952
	/**
1953
	* Add a pair of tags
1954
	*
1955
	* @param  string  $name     Name of the tags
1956
	* @param  integer $startPos Position of the start tag
1957
	* @param  integer $startLen Length of the start tag
1958
	* @param  integer $endPos   Position of the start tag
1959
	* @param  integer $endLen   Length of the start tag
1960
	* @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1961
	* @return Tag               Start tag
1962
	*/
1963 25
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1964
	{
1965
		// NOTE: the end tag is added first to try to keep the stack in the correct order
1966 25
		$endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1967 25
		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1968 25
		$startTag->pairWith($endTag);
1969
1970 25
		return $startTag;
1971
	}
1972
1973
	/**
1974
	* Add a tag that represents a verbatim copy of the original text
1975
	*
1976
	* @param  integer $pos  Position of the tag in the text
1977
	* @param  integer $len  Length of text consumed by the tag
1978
	* @param  integer $prio Tag's priority
1979
	* @return Tag
1980
	*/
1981 4
	public function addVerbatim($pos, $len, $prio = 0)
1982
	{
1983 4
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
1984
	}
1985
1986
	/**
1987
	* Sort tags by position and precedence
1988
	*
1989
	* @return void
1990
	*/
1991 159
	protected function sortTags()
1992
	{
1993 159
		usort($this->tagStack, __CLASS__ . '::compareTags');
1994 159
		$this->tagStackIsSorted = true;
1995 159
	}
1996
1997
	/**
1998
	* sortTags() callback
1999
	*
2000
	* Tags are stored as a stack, in LIFO order. We sort tags by position _descending_ so that they
2001
	* are processed in the order they appear in the text.
2002
	*
2003
	* @param  Tag     $a First tag to compare
2004
	* @param  Tag     $b Second tag to compare
2005
	* @return integer
2006
	*/
2007 121
	protected static function compareTags(Tag $a, Tag $b)
2008
	{
2009 121
		$aPos = $a->getPos();
2010 121
		$bPos = $b->getPos();
2011
2012
		// First we order by pos descending
2013 121
		if ($aPos !== $bPos)
2014
		{
2015 114
			return $bPos - $aPos;
2016
		}
2017
2018
		// If the tags start at the same position, we'll use their sortPriority if applicable. Tags
2019
		// with a lower value get sorted last, which means they'll be processed first. IOW, -10 is
2020
		// processed before 10
2021 46
		if ($a->getSortPriority() !== $b->getSortPriority())
2022
		{
2023 22
			return $b->getSortPriority() - $a->getSortPriority();
2024
		}
2025
2026
		// If the tags start at the same position and have the same priority, we'll sort them
2027
		// according to their length, with special considerations for  zero-width tags
2028 28
		$aLen = $a->getLen();
2029 28
		$bLen = $b->getLen();
2030
2031 28
		if (!$aLen || !$bLen)
2032
		{
2033
			// Zero-width end tags are ordered after zero-width start tags so that a pair that ends
2034
			// with a zero-width tag has the opportunity to be closed before another pair starts
2035
			// with a zero-width tag. For example, the pairs that would enclose each of the letters
2036
			// in the string "XY". Self-closing tags are ordered between end tags and start tags in
2037
			// an attempt to keep them out of tag pairs
2038 26
			if (!$aLen && !$bLen)
2039
			{
2040
				$order = [
2041 16
					Tag::END_TAG          => 0,
2042 16
					Tag::SELF_CLOSING_TAG => 1,
2043 16
					Tag::START_TAG        => 2
2044
				];
2045
2046 16
				return $order[$b->getType()] - $order[$a->getType()];
2047
			}
2048
2049
			// Here, we know that only one of $a or $b is a zero-width tags. Zero-width tags are
2050
			// ordered after wider tags so that they have a chance to be processed before the next
2051
			// character is consumed, which would force them to be skipped
2052 10
			return ($aLen) ? -1 : 1;
2053
		}
2054
2055
		// Here we know that both tags start at the same position and have a length greater than 0.
2056
		// We sort tags by length ascending, so that the longest matches are processed first. If
2057
		// their length is identical, the order is undefined as PHP's sort isn't stable
2058 2
		return $aLen - $bLen;
2059
	}
2060
}