Passed
Push — master ( ffa238...91f977 )
by Josh
02:40
created

Parser::__wakeup()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\FilterProcessing;
13
use s9e\TextFormatter\Parser\Logger;
14
use s9e\TextFormatter\Parser\Tag;
15
16
class Parser
17
{
18
	/**#@+
19
	* Boolean rules bitfield
20
	*/
21
	const RULE_AUTO_CLOSE        = 1 << 0;
22
	const RULE_AUTO_REOPEN       = 1 << 1;
23
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
24
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
25
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
26
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
27
	const RULE_IGNORE_TAGS       = 1 << 6;
28
	const RULE_IGNORE_TEXT       = 1 << 7;
29
	const RULE_IGNORE_WHITESPACE = 1 << 8;
30
	const RULE_IS_TRANSPARENT    = 1 << 9;
31
	const RULE_PREVENT_BR        = 1 << 10;
32
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
33
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
34
	/**#@-*/
35
36
	/**
37
	* Bitwise disjunction of rules related to automatic line breaks
38
	*/
39
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
40
41
	/**
42
	* Bitwise disjunction of rules that are inherited by subcontexts
43
	*/
44
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
45
46
	/**
47
	* All the characters that are considered whitespace
48
	*/
49
	const WHITESPACE = " \n\t";
50
51
	/**
52
	* @var array Number of open tags for each tag name
53
	*/
54
	protected $cntOpen;
55
56
	/**
57
	* @var array Number of times each tag has been used
58
	*/
59
	protected $cntTotal;
60
61
	/**
62
	* @var array Current context
63
	*/
64
	protected $context;
65
66
	/**
67
	* @var integer How hard the parser has worked on fixing bad markup so far
68
	*/
69
	protected $currentFixingCost;
70
71
	/**
72
	* @var Tag Current tag being processed
73
	*/
74
	protected $currentTag;
75
76
	/**
77
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
78
	*/
79
	protected $isRich;
80
81
	/**
82
	* @var Logger This parser's logger
83
	*/
84
	protected $logger;
85
86
	/**
87
	* @var integer How hard the parser should work on fixing bad markup
88
	*/
89
	public $maxFixingCost = 10000;
90
91
	/**
92
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
93
	*/
94
	protected $namespaces;
95
96
	/**
97
	* @var array Stack of open tags (instances of Tag)
98
	*/
99
	protected $openTags;
100
101
	/**
102
	* @var string This parser's output
103
	*/
104
	protected $output;
105
106
	/**
107
	* @var integer Position of the cursor in the original text
108
	*/
109
	protected $pos;
110
111
	/**
112
	* @var array Array of callbacks, using plugin names as keys
113
	*/
114
	protected $pluginParsers = [];
115
116
	/**
117
	* @var array Associative array of [pluginName => pluginConfig]
118
	*/
119
	protected $pluginsConfig;
120
121
	/**
122
	* @var array Variables registered for use in filters
123
	*/
124
	public $registeredVars = [];
125
126
	/**
127
	* @var array Root context, used at the root of the document
128
	*/
129
	protected $rootContext;
130
131
	/**
132
	* @var array Tags' config
133
	*/
134
	protected $tagsConfig;
135
136
	/**
137
	* @var array Tag storage
138
	*/
139
	protected $tagStack;
140
141
	/**
142
	* @var bool Whether the tags in the stack are sorted
143
	*/
144
	protected $tagStackIsSorted;
145
146
	/**
147
	* @var string Text being parsed
148
	*/
149
	protected $text;
150
151
	/**
152
	* @var integer Length of the text being parsed
153
	*/
154
	protected $textLen;
155
156
	/**
157
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
158
	*              whether the parser was reset during execution
159
	*/
160
	protected $uid = 0;
161
162
	/**
163
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
164
	*/
165
	protected $wsPos;
166
167
	/**
168
	* Constructor
169
	*/
170 191
	public function __construct(array $config)
171
	{
172 191
		$this->logger         = new Logger;
173 191
		$this->pluginsConfig  = $config['plugins'];
174 191
		$this->registeredVars = $config['registeredVars'];
175 191
		$this->rootContext    = $config['rootContext'];
176
		$this->tagsConfig     = $config['tags'];
177 191
	}
178
179
	public function __serialize(): array
180
	{
181
		return [
182
			'pluginsConfig'  => $this->pluginsConfig,
183
			'registeredVars' => $this->registeredVars,
184
			'rootContext'    => $this->rootContext,
185
			'tagsConfig'     => $this->tagsConfig
186
		];
187
	}
188
189
	public function __unserialize(array $data): void
190 2
	{
191
		foreach ($data as $k => $v)
192 2
		{
193
			$this->$k = $v;
194
		}
195
		$this->logger = new Logger;
196
	}
197
198
	/**
199
	* Reset the parser for a new parsing
200 191
	*
201
	* @param  string $text Text to be parsed
202 191
	* @return void
203
	*/
204
	protected function reset($text)
205
	{
206
		// Reject invalid UTF-8
207
		if (!preg_match('//u', $text))
208
		{
209
			throw new InvalidArgumentException('Invalid UTF-8 input');
210
		}
211 183
212
		// Normalize CR/CRLF to LF, remove characters that aren't allowed in XML
213
		$text = preg_replace('/\\r\\n?/', "\n", $text);
214 183
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]|\\xEF\\xBF[\\xBE\\xBF]/', '', $text);
215
216 1
		// Clear the logs
217
		$this->logger->clear();
218
219
		// Initialize the rest
220 182
		$this->cntOpen           = [];
221 182
		$this->cntTotal          = [];
222
		$this->currentFixingCost = 0;
223
		$this->currentTag        = null;
224 182
		$this->isRich            = false;
225
		$this->namespaces        = [];
226
		$this->openTags          = [];
227 182
		$this->output            = '';
228 182
		$this->pos               = 0;
229 182
		$this->tagStack          = [];
230 182
		$this->tagStackIsSorted  = false;
231 182
		$this->text              = $text;
232 182
		$this->textLen           = strlen($text);
233 182
		$this->wsPos             = 0;
234 182
235 182
		// Initialize the root context
236 182
		$this->context = $this->rootContext;
237 182
		$this->context['inParagraph'] = false;
238 182
239 182
		// Bump the UID
240 182
		++$this->uid;
241
	}
242
243 182
	/**
244 182
	* Set a tag's option
245
	*
246
	* This method ensures that the tag's config is a value and not a reference, to prevent
247 182
	* potential side-effects. References contained *inside* the tag's config are left untouched
248
	*
249
	* @param  string $tagName     Tag's name
250
	* @param  string $optionName  Option's name
251
	* @param  mixed  $optionValue Option's value
252
	* @return void
253
	*/
254
	protected function setTagOption($tagName, $optionName, $optionValue)
255
	{
256
		if (isset($this->tagsConfig[$tagName]))
257
		{
258
			// Copy the tag's config and remove it. That will destroy the reference
259
			$tagConfig = $this->tagsConfig[$tagName];
260
			unset($this->tagsConfig[$tagName]);
261 7
262
			// Set the new value and replace the tag's config
263 7
			$tagConfig[$optionName]     = $optionValue;
264
			$this->tagsConfig[$tagName] = $tagConfig;
265
		}
266 7
	}
267 7
268
	//==========================================================================
269
	// Public API
270 7
	//==========================================================================
271 7
272
	/**
273
	* Disable a tag
274
	*
275
	* @param  string $tagName Name of the tag
276
	* @return void
277
	*/
278
	public function disableTag($tagName)
279
	{
280
		$this->setTagOption($tagName, 'isDisabled', true);
281
	}
282
283
	/**
284
	* Enable a tag
285 3
	*
286
	* @param  string $tagName Name of the tag
287 3
	* @return void
288
	*/
289
	public function enableTag($tagName)
290
	{
291
		if (isset($this->tagsConfig[$tagName]))
292
		{
293
			unset($this->tagsConfig[$tagName]['isDisabled']);
294
		}
295
	}
296 1
297
	/**
298 1
	* Get this parser's Logger instance
299
	*
300 1
	* @return Logger
301
	*/
302
	public function getLogger()
303
	{
304
		return $this->logger;
305
	}
306
307
	/**
308
	* Return the last text parsed
309 144
	*
310
	* This method returns the normalized text, which may be slightly different from the original
311 144
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
312
	* meant to be used in support of processing log entries, which contain offsets based on the
313
	* normalized text
314
	*
315
	* @see Parser::reset()
316
	*
317
	* @return string
318
	*/
319
	public function getText()
320
	{
321
		return $this->text;
322
	}
323
324
	/**
325
	* Parse a text
326 143
	*
327
	* @param  string $text Text to parse
328 143
	* @return string       XML representation
329
	*/
330
	public function parse($text)
331
	{
332
		// Reset the parser and save the uid
333
		$this->reset($text);
334
		$uid = $this->uid;
335
336
		// Do the heavy lifting
337 183
		$this->executePluginParsers();
338
		$this->processTags();
339
340 183
		// Finalize the document
341 182
		$this->finalizeOutput();
342
343
		// Check the uid in case a plugin or a filter reset the parser mid-execution
344 182
		if ($this->uid !== $uid)
345 182
		{
346
			throw new RuntimeException('The parser has been reset during execution');
347
		}
348 182
349
		// Log a warning if the fixing cost limit was exceeded
350
		if ($this->currentFixingCost > $this->maxFixingCost)
351 182
		{
352
			$this->logger->warn('Fixing cost limit exceeded');
353 1
		}
354
355
		return $this->output;
356
	}
357 182
358
	/**
359 2
	* Change a tag's tagLimit
360
	*
361
	* NOTE: the default tagLimit should generally be set during configuration instead
362 182
	*
363
	* @param  string  $tagName  The tag's name, in UPPERCASE
364
	* @param  integer $tagLimit
365
	* @return void
366
	*/
367
	public function setTagLimit($tagName, $tagLimit)
368
	{
369
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
370
	}
371
372
	/**
373
	* Change a tag's nestingLimit
374 2
	*
375
	* NOTE: the default nestingLimit should generally be set during configuration instead
376 2
	*
377
	* @param  string  $tagName      The tag's name, in UPPERCASE
378
	* @param  integer $nestingLimit
379
	* @return void
380
	*/
381
	public function setNestingLimit($tagName, $nestingLimit)
382
	{
383
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
384
	}
385
386
	//==========================================================================
387
	// Output handling
388 2
	//==========================================================================
389
390 2
	/**
391
	* Finalize the output by appending the rest of the unprocessed text and create the root node
392
	*
393
	* @return void
394
	*/
395
	protected function finalizeOutput()
396
	{
397
		// Output the rest of the text and close the last paragraph
398
		$this->outputText($this->textLen, 0, true);
399
400
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
401
		do
402 182
		{
403
			$this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
404
		}
405 182
		while ($cnt > 0);
406
407
		// Merge consecutive <i> tags
408
		if (strpos($this->output, '</i><i>') !== false)
409
		{
410 182
			$this->output = str_replace('</i><i>', '', $this->output);
411
		}
412 182
413
		// Remove illegal characters from the output to ensure it's valid XML
414
		$this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F]|\\xEF\\xBF[\\xBE\\xBF])', '', $this->output);
415 182
416
		// Encode Unicode characters that are outside of the BMP
417 1
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
418
419
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
420
		$tagName = ($this->isRich) ? 'r' : 't';
421 182
422
		// Prepare the root node with all the namespace declarations
423
		$tmp = '<' . $tagName;
424 182
		foreach (array_keys($this->namespaces) as $prefix)
425
		{
426
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
427 182
		}
428
429
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
430 182
	}
431 182
432
	/**
433 2
	* Append a tag to the output
434
	*
435
	* @param  Tag  $tag Tag to append
436 182
	* @return void
437
	*/
438
	protected function outputTag(Tag $tag)
439
	{
440
		$this->isRich = true;
441
442
		$tagName  = $tag->getName();
443
		$tagPos   = $tag->getPos();
444
		$tagLen   = $tag->getLen();
445 139
		$tagFlags = $tag->getFlags();
446
447 139
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
448
		{
449 139
			$skipBefore = 1;
450 139
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
451 139
		}
452 139
		else
453
		{
454 139
			$skipBefore = $skipAfter = 0;
455
		}
456 11
457 11
		// Current paragraph must end before the tag if:
458
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
459
		//  - the tag is an end tag (but not self-closing)
460
		$closeParagraph = (!$tag->isStartTag() || ($tagFlags & self::RULE_BREAK_PARAGRAPH));
461 134
462
		// Let the cursor catch up with this tag's position
463
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
464
465
		// Capture the text consumed by the tag
466
		$tagText = ($tagLen)
467 139
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
468
		         : '';
469
470 139
		// Output current tag
471
		if ($tag->isStartTag())
472
		{
473 139
			// Handle paragraphs before opening the tag
474 99
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
475 139
			{
476
				$this->outputParagraphStart($tagPos);
477
			}
478 139
479
			// Record this tag's namespace, if applicable
480
			$colonPos = strpos($tagName, ':');
481 139
			if ($colonPos)
482
			{
483 138
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
484
			}
485
486
			// Open the start tag and add its attributes, but don't close the tag
487 139
			$this->output .= '<' . $tagName;
488 139
489
			// We output the attributes in lexical order. Helps canonicalizing the output and could
490 2
			// prove useful someday
491
			$attributes = $tag->getAttributes();
492
			ksort($attributes);
493
494 139
			foreach ($attributes as $attrName => $attrValue)
495
			{
496
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
497
			}
498 139
499 139
			if ($tag->isSelfClosingTag())
500
			{
501 139
				if ($tagLen)
502
				{
503 12
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
504
				}
505
				else
506 139
				{
507
					$this->output .= '/>';
508 47
				}
509
			}
510 36
			elseif ($tagLen)
511
			{
512
				$this->output .= '><s>' . $tagText . '</s>';
513
			}
514 47
			else
515
			{
516
				$this->output .= '>';
517 105
			}
518
		}
519 71
		else
520
		{
521
			if ($tagLen)
522
			{
523 139
				$this->output .= '<e>' . $tagText . '</e>';
524
			}
525
526
			$this->output .= '</' . $tagName . '>';
527
		}
528 105
529
		// Move the cursor past the tag
530 59
		$this->pos = $tagPos + $tagLen;
531
532
		// Skip newlines (no other whitespace) after this tag
533 105
		$this->wsPos = $this->pos;
534
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
535
		{
536
			// Decrement the number of lines to skip
537 139
			--$skipAfter;
538
539
			// Move the cursor past the newline
540 139
			++$this->wsPos;
541 139
		}
542
	}
543
544 9
	/**
545
	* Output the text between the cursor's position (included) and given position (not included)
546
	*
547 9
	* @param  integer $catchupPos     Position we're catching up to
548
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
549
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
550
	* @return void
551
	*/
552
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
553
	{
554
		if ($closeParagraph)
555
		{
556
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
557
			{
558
				$closeParagraph = false;
559 182
			}
560
			else
561 182
			{
562
				// Ignore any number of lines at the end if we're closing a paragraph
563 182
				$maxLines = -1;
564
			}
565 171
		}
566
567
		if ($this->pos >= $catchupPos)
568
		{
569
			// We're already there, close the paragraph if applicable and return
570 18
			if ($closeParagraph)
571
			{
572
				$this->outputParagraphEnd();
573
			}
574 182
575
			return;
576
		}
577 134
578
		// Skip over previously identified whitespace if applicable
579 4
		if ($this->wsPos > $this->pos)
580
		{
581
			$skipPos       = min($catchupPos, $this->wsPos);
582 134
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
583
			$this->pos     = $skipPos;
584
585
			if ($this->pos >= $catchupPos)
586 160
			{
587
				// Skipped everything. Close the paragraph if applicable and return
588 9
				if ($closeParagraph)
589 9
				{
590 9
					$this->outputParagraphEnd();
591
				}
592 9
593
				return;
594
			}
595 2
		}
596
597 1
		// Test whether we're even supposed to output anything
598
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
599
		{
600 2
			$catchupLen  = $catchupPos - $this->pos;
601
			$catchupText = substr($this->text, $this->pos, $catchupLen);
602
603
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
604
			// tags
605 160
			if (strspn($catchupText, " \n\t") < $catchupLen)
606
			{
607 5
				$catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
608 5
			}
609
610
			$this->output .= $catchupText;
611
			$this->pos = $catchupPos;
612 5
613
			if ($closeParagraph)
614 5
			{
615
				$this->outputParagraphEnd();
616
			}
617 5
618 5
			return;
619
		}
620 5
621
		// Compute the amount of text to ignore at the end of the output
622 1
		$ignorePos = $catchupPos;
623
		$ignoreLen = 0;
624
625 5
		// Ignore as many lines (including whitespace) as specified
626
		while ($maxLines && --$ignorePos >= $this->pos)
627
		{
628
			$c = $this->text[$ignorePos];
629 158
			if (strpos(self::WHITESPACE, $c) === false)
630 158
			{
631
				break;
632
			}
633 158
634
			if ($c === "\n")
635 21
			{
636 21
				--$maxLines;
637
			}
638 14
639
			++$ignoreLen;
640
		}
641 12
642
		// Adjust $catchupPos to ignore the text at the end
643 10
		$catchupPos -= $ignoreLen;
644
645
		// Break down the text in paragraphs if applicable
646 12
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
647
		{
648
			if (!$this->context['inParagraph'])
649
			{
650 158
				$this->outputWhitespace($catchupPos);
651
652
				if ($catchupPos > $this->pos)
653 158
				{
654
					$this->outputParagraphStart($catchupPos);
655 15
				}
656
			}
657 13
658
			// Look for a paragraph break in this text
659 13
			$pbPos = strpos($this->text, "\n\n", $this->pos);
660
661 10
			while ($pbPos !== false && $pbPos < $catchupPos)
662
			{
663
				$this->outputText($pbPos, 0, true);
664
				$this->outputParagraphStart($catchupPos);
665
666 15
				$pbPos = strpos($this->text, "\n\n", $this->pos);
667
			}
668 15
		}
669
670 3
		// Capture, escape and output the text
671 3
		if ($catchupPos > $this->pos)
672
		{
673 3
			$catchupText = htmlspecialchars(
674
				substr($this->text, $this->pos, $catchupPos - $this->pos),
675
				ENT_NOQUOTES,
676
				'UTF-8'
677
			);
678 158
679
			// Format line breaks if applicable
680 156
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
681 156
			{
682 156
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
683 156
			}
684
685
			$this->output .= $catchupText;
686
		}
687 156
688
		// Close the paragraph if applicable
689 21
		if ($closeParagraph)
690
		{
691
			$this->outputParagraphEnd();
692 156
		}
693
694
		// Add the ignored text if applicable
695
		if ($ignoreLen)
696 158
		{
697
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
698 14
		}
699
700
		// Move the cursor past the text
701
		$this->pos = $catchupPos + $ignoreLen;
702 158
	}
703
704 12
	/**
705
	* Output a linebreak tag
706
	*
707
	* @param  Tag  $tag
708 158
	* @return void
709
	*/
710
	protected function outputBrTag(Tag $tag)
711
	{
712
		$this->outputText($tag->getPos(), 0, false);
713
		$this->output .= '<br/>';
714
	}
715
716
	/**
717 6
	* Output an ignore tag
718
	*
719 6
	* @param  Tag  $tag
720 6
	* @return void
721
	*/
722
	protected function outputIgnoreTag(Tag $tag)
723
	{
724
		$tagPos = $tag->getPos();
725
		$tagLen = $tag->getLen();
726
727
		// Capture the text to ignore
728
		$ignoreText = substr($this->text, $tagPos, $tagLen);
729 19
730
		// Catch up with the tag's position then output the tag
731 19
		$this->outputText($tagPos, 0, false);
732 19
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
733
		$this->isRich = true;
734
735 19
		// Move the cursor past this tag
736
		$this->pos = $tagPos + $tagLen;
737
	}
738 19
739 19
	/**
740 19
	* Start a paragraph between current position and given position, if applicable
741
	*
742
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
743 19
	* @return void
744
	*/
745
	protected function outputParagraphStart($maxPos)
746
	{
747
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
748
		if ($this->context['inParagraph']
749
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
750
		{
751
			return;
752 145
		}
753
754
		// Output the whitespace between $this->pos and $maxPos if applicable
755 145
		$this->outputWhitespace($maxPos);
756 145
757
		// Open the paragraph, but only if it's not at the very end of the text
758 135
		if ($this->pos < $this->textLen)
759
		{
760
			$this->output .= '<p>';
761
			$this->context['inParagraph'] = true;
762 16
		}
763
	}
764
765 16
	/**
766
	* Close current paragraph at current position if applicable
767 16
	*
768 16
	* @return void
769
	*/
770
	protected function outputParagraphEnd()
771
	{
772
		// Do nothing if we're not in a paragraph
773
		if (!$this->context['inParagraph'])
774
		{
775
			return;
776
		}
777 18
778
		$this->output .= '</p>';
779
		$this->context['inParagraph'] = false;
780 18
	}
781
782 3
	/**
783
	* Output the content of a verbatim tag
784
	*
785 16
	* @param  Tag  $tag
786 16
	* @return void
787
	*/
788
	protected function outputVerbatim(Tag $tag)
789
	{
790
		$flags = $this->context['flags'];
791
		$this->context['flags'] = $tag->getFlags();
792
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
793
		$this->context['flags'] = $flags;
794
	}
795 4
796
	/**
797 4
	* Skip as much whitespace after current position as possible
798 4
	*
799 4
	* @param  integer $maxPos Rightmost character to be skipped
800 4
	* @return void
801
	*/
802
	protected function outputWhitespace($maxPos)
803
	{
804
		if ($maxPos > $this->pos)
805
		{
806
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
807
808
			if ($spn)
809 18
			{
810
				$this->output .= substr($this->text, $this->pos, $spn);
811 18
				$this->pos += $spn;
812
			}
813 13
		}
814
	}
815 13
816
	//==========================================================================
817 6
	// Plugins handling
818 6
	//==========================================================================
819
820
	/**
821
	* Disable a plugin
822
	*
823
	* @param  string $pluginName Name of the plugin
824
	* @return void
825
	*/
826
	public function disablePlugin($pluginName)
827
	{
828
		if (isset($this->pluginsConfig[$pluginName]))
829
		{
830
			// Copy the plugin's config to remove the reference
831
			$pluginConfig = $this->pluginsConfig[$pluginName];
832
			unset($this->pluginsConfig[$pluginName]);
833 5
834
			// Update the value and replace the plugin's config
835 5
			$pluginConfig['isDisabled'] = true;
836
			$this->pluginsConfig[$pluginName] = $pluginConfig;
837
		}
838 4
	}
839 4
840
	/**
841
	* Enable a plugin
842 4
	*
843 4
	* @param  string $pluginName Name of the plugin
844
	* @return void
845
	*/
846
	public function enablePlugin($pluginName)
847
	{
848
		if (isset($this->pluginsConfig[$pluginName]))
849
		{
850
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
851
		}
852
	}
853 2
854
	/**
855 2
	* Execute given plugin
856
	*
857 1
	* @param  string $pluginName Plugin's name
858
	* @return void
859
	*/
860
	protected function executePluginParser($pluginName)
861
	{
862
		$pluginConfig = $this->pluginsConfig[$pluginName];
863
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
864
		{
865
			return;
866
		}
867 183
868
		$matches = [];
869 183
		if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit']))
870 183
		{
871
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
872 1
			if (empty($matches))
873
			{
874
				return;
875 182
			}
876 182
		}
877
878 8
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
879 8
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
880
	}
881 1
882
	/**
883
	* Execute all the plugins
884
	*
885
	* @return void
886 181
	*/
887
	protected function executePluginParsers()
888
	{
889
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
890
		{
891
			if (empty($pluginConfig['isDisabled']))
892
			{
893
				$this->executePluginParser($pluginName);
894 193
			}
895
		}
896 193
	}
897
898 184
	/**
899
	* Execute given regexp and returns as many matches as given limit
900 183
	*
901
	* @param  string  $regexp
902
	* @param  integer $limit
903
	* @return array
904
	*/
905
	protected function getMatches($regexp, $limit)
906
	{
907
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
908
		if ($cnt > $limit)
909
		{
910
			$matches = array_slice($matches, 0, $limit);
911
		}
912 8
913
		return $matches;
914 8
	}
915 8
916
	/**
917 2
	* Get the cached callback for given plugin's parser
918
	*
919
	* @param  string $pluginName Plugin's name
920 8
	* @return callable
921
	*/
922
	protected function getPluginParser($pluginName)
923
	{
924
		// Cache a new instance of this plugin's parser if there isn't one already
925
		if (!isset($this->pluginParsers[$pluginName]))
926
		{
927
			$pluginConfig = $this->pluginsConfig[$pluginName];
928
			$className = (isset($pluginConfig['className']))
929 181
			           ? $pluginConfig['className']
930
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
931
932 181
			// Register the parser as a callback
933
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
934 3
		}
935 3
936 1
		return $this->pluginParsers[$pluginName];
937 3
	}
938
939
	/**
940 3
	* Register a parser
941
	*
942
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
943 181
	* existing plugin
944
	*
945
	* @param  string   $pluginName
946
	* @param  callable $parser
947
	* @param  string   $regexp
948
	* @param  integer  $limit
949
	* @return void
950
	*/
951
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
952
	{
953
		if (!is_callable($parser))
954
		{
955
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
956
		}
957
		// Create an empty config for this plugin to ensure it is executed
958 176
		if (!isset($this->pluginsConfig[$pluginName]))
959
		{
960 176
			$this->pluginsConfig[$pluginName] = [];
961
		}
962 1
		if (isset($regexp))
963
		{
964
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
965 175
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
966
		}
967 174
		$this->pluginParsers[$pluginName] = $parser;
968
	}
969 175
970
	//==========================================================================
971 2
	// Rules handling
972 2
	//==========================================================================
973
974 175
	/**
975
	* Apply closeAncestor rules associated with given tag
976
	*
977
	* @param  Tag  $tag Tag
978
	* @return bool      Whether a new tag has been added
979
	*/
980
	protected function closeAncestor(Tag $tag)
981
	{
982
		if (!empty($this->openTags))
983
		{
984
			$tagName   = $tag->getName();
985
			$tagConfig = $this->tagsConfig[$tagName];
986
987 138
			if (!empty($tagConfig['rules']['closeAncestor']))
988
			{
989 138
				$i = count($this->openTags);
990
991 61
				while (--$i >= 0)
992 61
				{
993
					$ancestor     = $this->openTags[$i];
994 61
					$ancestorName = $ancestor->getName();
995
996 5
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
997
					{
998 5
						++$this->currentFixingCost;
999
1000 5
						// We have to close this ancestor. First we reinsert this tag...
1001 5
						$this->tagStack[] = $tag;
1002
1003 5
						// ...then we add a new end tag for it with a better priority
1004
						$this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1005 4
1006
						return true;
1007
					}
1008 4
				}
1009
			}
1010
		}
1011 4
1012
		return false;
1013 4
	}
1014
1015
	/**
1016
	* Apply closeParent rules associated with given tag
1017
	*
1018
	* @param  Tag  $tag Tag
1019 138
	* @return bool      Whether a new tag has been added
1020
	*/
1021
	protected function closeParent(Tag $tag)
1022
	{
1023
		if (!empty($this->openTags))
1024
		{
1025
			$tagName   = $tag->getName();
1026
			$tagConfig = $this->tagsConfig[$tagName];
1027
1028 138
			if (!empty($tagConfig['rules']['closeParent']))
1029
			{
1030 138
				$parent     = end($this->openTags);
1031
				$parentName = $parent->getName();
1032 65
1033 65
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1034
				{
1035 65
					++$this->currentFixingCost;
1036
1037 7
					// We have to close that parent. First we reinsert the tag...
1038 7
					$this->tagStack[] = $tag;
1039
1040 7
					// ...then we add a new end tag for it with a better priority
1041
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1042 6
1043
					return true;
1044
				}
1045 6
			}
1046
		}
1047
1048 6
		return false;
1049
	}
1050 6
1051
	/**
1052
	* Apply the createChild rules associated with given tag
1053
	*
1054
	* @param  Tag  $tag Tag
1055 138
	* @return void
1056
	*/
1057
	protected function createChild(Tag $tag)
1058
	{
1059
		$tagConfig = $this->tagsConfig[$tag->getName()];
1060
		if (isset($tagConfig['rules']['createChild']))
1061
		{
1062
			$priority = -1000;
1063
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1064 139
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1065
			{
1066 139
				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1067 139
			}
1068
		}
1069 3
	}
1070 3
1071 3
	/**
1072
	* Apply fosterParent rules associated with given tag
1073 3
	*
1074
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1075
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1076
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1077
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1078
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1079
	*       loop from running indefinitely
1080
	*
1081
	* @param  Tag  $tag Tag
1082
	* @return bool      Whether a new tag has been added
1083
	*/
1084
	protected function fosterParent(Tag $tag)
1085
	{
1086
		if (!empty($this->openTags))
1087
		{
1088
			$tagName   = $tag->getName();
1089
			$tagConfig = $this->tagsConfig[$tagName];
1090
1091 138
			if (!empty($tagConfig['rules']['fosterParent']))
1092
			{
1093 138
				$parent     = end($this->openTags);
1094
				$parentName = $parent->getName();
1095 68
1096 68
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1097
				{
1098 68
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1099
					{
1100 13
						$this->addFosterTag($tag, $parent);
1101 13
					}
1102
1103 13
					// Reinsert current tag
1104
					$this->tagStack[] = $tag;
1105 12
1106
					// And finally close its parent with a priority that ensures it is processed
1107 11
					// before this tag
1108
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1109
1110
					// Adjust the fixing cost to account for the additional tags/processing
1111 12
					$this->currentFixingCost += 4;
1112
1113
					return true;
1114
				}
1115 12
			}
1116
		}
1117
1118 12
		return false;
1119
	}
1120 12
1121
	/**
1122
	* Apply requireAncestor rules associated with given tag
1123
	*
1124
	* @param  Tag  $tag Tag
1125 138
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1126
	*/
1127
	protected function requireAncestor(Tag $tag)
1128
	{
1129
		$tagName   = $tag->getName();
1130
		$tagConfig = $this->tagsConfig[$tagName];
1131
1132
		if (isset($tagConfig['rules']['requireAncestor']))
1133
		{
1134 141
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1135
			{
1136 141
				if (!empty($this->cntOpen[$ancestorName]))
1137 141
				{
1138
					return false;
1139 141
				}
1140
			}
1141 3
1142
			$this->logger->err('Tag requires an ancestor', [
1143 3
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1144
				'tag'             => $tag
1145 1
			]);
1146
1147
			return true;
1148
		}
1149 2
1150 2
		return false;
1151 2
	}
1152
1153
	//==========================================================================
1154 2
	// Tag processing
1155
	//==========================================================================
1156
1157 139
	/**
1158
	* Create and add a copy of a tag as a child of a given tag
1159
	*
1160
	* @param  Tag  $tag       Current tag
1161
	* @param  Tag  $fosterTag Tag to foster
1162
	* @return void
1163
	*/
1164
	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1165
	{
1166
		list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1167
1168
		// Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1169
		$childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1170
		$tag->cascadeInvalidationTo($childTag);
1171 11
	}
1172
1173 11
	/**
1174
	* Create and add an end tag for given start tag at given position
1175
	*
1176 11
	* @param  Tag     $startTag Start tag
1177 11
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1178
	* @param  integer $prio     End tag's priority
1179
	* @return Tag
1180
	*/
1181
	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1182
	{
1183
		$tagName = $startTag->getName();
1184
1185
		// Adjust the end tag's position if whitespace is to be minimized
1186
		if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1187
		{
1188 35
			$tagPos = $this->getMagicEndPos($tagPos);
1189
		}
1190 35
1191
		// Add a 0-width end tag that is paired with the given start tag
1192
		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1193 35
		$endTag->pairWith($startTag);
1194
1195 3
		return $endTag;
1196
	}
1197
1198
	/**
1199 35
	* Compute the position of a magic end tag, adjusted for whitespace
1200 35
	*
1201
	* @param  integer $tagPos Rightmost possible position for the tag
1202 35
	* @return integer
1203
	*/
1204
	protected function getMagicEndPos($tagPos)
1205
	{
1206
		// Back up from given position to the cursor's position until we find a character that
1207
		// is not whitespace
1208
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1209
		{
1210
			--$tagPos;
1211 5
		}
1212
1213
		return $tagPos;
1214
	}
1215 5
1216
	/**
1217 5
	* Compute the position and priority of a magic start tag, adjusted for whitespace
1218
	*
1219
	* @param  integer   $tagPos Leftmost possible position for the tag
1220 5
	* @return integer[]         [Tag pos, priority]
1221
	*/
1222
	protected function getMagicStartCoords($tagPos)
1223
	{
1224
		if (empty($this->tagStack))
1225
		{
1226
			// Set the next position outside the text boundaries
1227
			$nextPos  = $this->textLen + 1;
1228
			$nextPrio = 0;
1229 11
		}
1230
		else
1231 11
		{
1232
			$nextTag  = end($this->tagStack);
1233
			$nextPos  = $nextTag->getPos();
1234 3
			$nextPrio = $nextTag->getSortPriority();
1235 3
		}
1236
1237
		// Find the first non-whitespace position before next tag or the end of text
1238
		while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1239 10
		{
1240 10
			++$tagPos;
1241 10
		}
1242
1243
		// Set a priority that ensures this tag appears before the next tag
1244
		$prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1245 11
1246
		return [$tagPos, $prio];
1247 1
	}
1248
1249
	/**
1250
	* Test whether given start tag is immediately followed by a closing tag
1251 11
	*
1252
	* @param  Tag  $tag Start tag
1253 11
	* @return bool
1254
	*/
1255
	protected function isFollowedByClosingTag(Tag $tag)
1256
	{
1257
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1258
	}
1259
1260
	/**
1261
	* Process all tags in the stack
1262 3
	*
1263
	* @return void
1264 3
	*/
1265
	protected function processTags()
1266
	{
1267
		if (empty($this->tagStack))
1268
		{
1269
			return;
1270
		}
1271
1272 182
		// Initialize the count tables
1273
		foreach (array_keys($this->tagsConfig) as $tagName)
1274 182
		{
1275
			$this->cntOpen[$tagName]  = 0;
1276 24
			$this->cntTotal[$tagName] = 0;
1277
		}
1278
1279
		// Process the tag stack, close tags that were left open and repeat until done
1280 158
		do
1281
		{
1282 144
			while (!empty($this->tagStack))
1283 144
			{
1284
				if (!$this->tagStackIsSorted)
1285
				{
1286
					$this->sortTags();
1287
				}
1288
1289 158
				$this->currentTag = array_pop($this->tagStack);
1290
				$this->processCurrentTag();
1291 158
			}
1292
1293 158
			// Close tags that were left open
1294
			foreach ($this->openTags as $startTag)
1295
			{
1296 158
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1297 158
				//       the stack is processed in LIFO order, it means that tags get closed in
1298
				//       the correct order, from descendants to ancestors
1299
				$this->addMagicEndTag($startTag, $this->textLen);
1300
			}
1301 158
		}
1302
		while (!empty($this->tagStack));
1303
	}
1304
1305
	/**
1306 19
	* Process current tag
1307
	*
1308
	* @return void
1309 158
	*/
1310
	protected function processCurrentTag()
1311
	{
1312
		// Invalidate current tag if tags are disabled and current tag would not close the last open
1313
		// tag and is not a system tag
1314
		if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1315
		 && !$this->currentTag->canClose(end($this->openTags))
1316
		 && !$this->currentTag->isSystemTag())
1317 158
		{
1318
			$this->currentTag->invalidate();
1319
		}
1320
1321 158
		$tagPos = $this->currentTag->getPos();
1322 158
		$tagLen = $this->currentTag->getLen();
1323 158
1324
		// Test whether the cursor passed this tag's position already
1325 4
		if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1326
		{
1327
			// Test whether this tag is paired with a start tag and this tag is still open
1328 158
			$startTag = $this->currentTag->getStartTag();
1329 158
1330
			if ($startTag && in_array($startTag, $this->openTags, true))
1331
			{
1332 158
				// Create an end tag that matches current tag's start tag, which consumes as much of
1333
				// the same text as current tag and is paired with the same start tag
1334
				$this->addEndTag(
1335 16
					$startTag->getName(),
1336
					$this->pos,
1337 16
					max(0, $tagPos + $tagLen - $this->pos)
1338
				)->pairWith($startTag);
1339
1340
				// Note that current tag is not invalidated, it's merely replaced
1341 2
				return;
1342 2
			}
1343 2
1344 2
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1345 2
			if ($this->currentTag->isIgnoreTag())
1346
			{
1347
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1348 2
1349
				if ($ignoreLen > 0)
1350
				{
1351
					// Create a new ignore tag and move on
1352 14
					$this->addIgnoreTag($this->pos, $ignoreLen);
1353
1354 2
					return;
1355
				}
1356 2
			}
1357
1358
			// Skipped tags are invalidated
1359 1
			$this->currentTag->invalidate();
1360
		}
1361 1
1362
		if ($this->currentTag->isInvalid())
1363
		{
1364
			return;
1365
		}
1366 13
1367
		if ($this->currentTag->isIgnoreTag())
1368
		{
1369 158
			$this->outputIgnoreTag($this->currentTag);
1370
		}
1371 18
		elseif ($this->currentTag->isBrTag())
1372
		{
1373
			// Output the tag if it's allowed, ignore it otherwise
1374 158
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1375
			{
1376 11
				$this->outputBrTag($this->currentTag);
1377
			}
1378 153
		}
1379
		elseif ($this->currentTag->isParagraphBreak())
1380
		{
1381 7
			$this->outputText($this->currentTag->getPos(), 0, true);
1382
		}
1383 7
		elseif ($this->currentTag->isVerbatim())
1384
		{
1385
			$this->outputVerbatim($this->currentTag);
1386 149
		}
1387
		elseif ($this->currentTag->isStartTag())
1388 4
		{
1389
			$this->processStartTag($this->currentTag);
1390 146
		}
1391
		else
1392 4
		{
1393
			$this->processEndTag($this->currentTag);
1394 142
		}
1395
	}
1396 141
1397
	/**
1398
	* Process given start tag (including self-closing tags) at current position
1399
	*
1400 106
	* @param  Tag  $tag Start tag (including self-closing)
1401
	* @return void
1402
	*/
1403
	protected function processStartTag(Tag $tag)
1404
	{
1405
		$tagName   = $tag->getName();
1406
		$tagConfig = $this->tagsConfig[$tagName];
1407
1408
		// 1. Check that this tag has not reached its global limit tagLimit
1409
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1410 141
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1411
		// 4. Check for nestingLimit
1412 141
		// 5. Apply requireAncestor rules
1413 141
		//
1414
		// This order ensures that the tag is valid and within the set limits before we attempt to
1415
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1416
		// limits, whether this tag is allowed within current context (the context may change
1417
		// as ancestors are closed) or whether the required ancestors are still there (they might
1418
		// have been closed by a rule.)
1419
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1420
		{
1421
			$this->logger->err(
1422
				'Tag limit exceeded',
1423
				[
1424
					'tag'      => $tag,
1425
					'tagName'  => $tagName,
1426 141
					'tagLimit' => $tagConfig['tagLimit']
1427
				]
1428 2
			);
1429 2
			$tag->invalidate();
1430
1431 2
			return;
1432 2
		}
1433 2
1434
		FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1435
		if ($tag->isInvalid())
1436 2
		{
1437
			return;
1438 2
		}
1439
1440
		if ($this->currentFixingCost < $this->maxFixingCost)
1441 141
		{
1442 141
			if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1443
			{
1444 1
				// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1445
				return;
1446
			}
1447 141
		}
1448
1449 138
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1450
		{
1451
			$this->logger->err(
1452 21
				'Nesting limit exceeded',
1453
				[
1454
					'tag'          => $tag,
1455
					'tagName'      => $tagName,
1456 141
					'nestingLimit' => $tagConfig['nestingLimit']
1457
				]
1458 2
			);
1459 2
			$tag->invalidate();
1460
1461 2
			return;
1462 2
		}
1463 2
1464
		if (!$this->tagIsAllowed($tagName))
1465
		{
1466 2
			$msg     = 'Tag is not allowed in this context';
1467
			$context = ['tag' => $tag, 'tagName' => $tagName];
1468 2
			if ($tag->getLen() > 0)
1469
			{
1470
				$this->logger->warn($msg, $context);
1471 141
			}
1472
			else
1473 7
			{
1474 7
				$this->logger->debug($msg, $context);
1475 7
			}
1476
			$tag->invalidate();
1477 6
1478
			return;
1479
		}
1480
1481 1
		if ($this->requireAncestor($tag))
1482
		{
1483 7
			$tag->invalidate();
1484
1485 7
			return;
1486
		}
1487
1488 141
		// If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1489
		// immediately followed by an end tag, we replace it with a self-closing tag with the same
1490 2
		// properties
1491
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1492 2
		 && !$tag->isSelfClosingTag()
1493
		 && !$tag->getEndTag()
1494
		 && !$this->isFollowedByClosingTag($tag))
1495
		{
1496
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1497
			$newTag->setAttributes($tag->getAttributes());
1498 139
			$newTag->setFlags($tag->getFlags());
1499 139
1500 139
			$tag = $newTag;
1501 139
		}
1502
1503 2
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1504 2
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1505 2
		{
1506
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1507 2
		}
1508
1509
		// This tag is valid, output it and update the context
1510 139
		$this->outputTag($tag);
1511 139
		$this->pushContext($tag);
1512
1513 2
		// Apply the createChild rules if applicable
1514
		$this->createChild($tag);
1515
	}
1516
1517 139
	/**
1518 139
	* Process given end tag at current position
1519
	*
1520
	* @param  Tag  $tag end tag
1521 139
	* @return void
1522
	*/
1523
	protected function processEndTag(Tag $tag)
1524
	{
1525
		$tagName = $tag->getName();
1526
1527
		if (empty($this->cntOpen[$tagName]))
1528
		{
1529
			// This is an end tag with no start tag
1530 106
			return;
1531
		}
1532 106
1533
		/**
1534 106
		* @var array List of tags need to be closed before given tag
1535
		*/
1536
		$closeTags = [];
1537 9
1538
		// Iterate through all open tags from last to first to find a match for our tag
1539
		$i = count($this->openTags);
1540
		while (--$i >= 0)
1541
		{
1542
			$openTag = $this->openTags[$i];
1543 105
1544
			if ($tag->canClose($openTag))
1545
			{
1546 105
				break;
1547 105
			}
1548
1549 105
			$closeTags[] = $openTag;
1550
			++$this->currentFixingCost;
1551 105
		}
1552
1553 105
		if ($i < 0)
1554
		{
1555
			// Did not find a matching tag
1556 26
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1557 26
1558
			return;
1559
		}
1560 105
1561
		// Accumulate flags to determine whether whitespace should be trimmed
1562
		$flags = $tag->getFlags();
1563 2
		foreach ($closeTags as $openTag)
1564
		{
1565 2
			$flags |= $openTag->getFlags();
1566
		}
1567
		$ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1568
1569 105
		// Only reopen tags if we haven't exceeded our "fixing" budget
1570 105
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1571
1572 25
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1573
		// reopened
1574 105
		$reopenTags = [];
1575
		foreach ($closeTags as $openTag)
1576
		{
1577 105
			$openTagName = $openTag->getName();
1578
1579
			// Test whether this tag should be reopened automatically
1580
			if ($keepReopening)
1581 105
			{
1582 105
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1583
				{
1584 25
					$reopenTags[] = $openTag;
1585
				}
1586
				else
1587 25
				{
1588
					$keepReopening = false;
1589 23
				}
1590
			}
1591 12
1592
			// Find the earliest position we can close this open tag
1593
			$tagPos = $tag->getPos();
1594
			if ($ignoreWhitespace)
1595 11
			{
1596
				$tagPos = $this->getMagicEndPos($tagPos);
1597
			}
1598
1599
			// Output an end tag to close this start tag, then update the context
1600 25
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1601 25
			$endTag->setFlags($openTag->getFlags());
1602
			$this->outputTag($endTag);
1603 5
			$this->popContext();
1604
		}
1605
1606
		// Output our tag, moving the cursor past it, then update the context
1607 25
		$this->outputTag($tag);
1608 25
		$this->popContext();
1609 25
1610 25
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1611
		// close tags that are already being closed now. Also, filter our list of tags being
1612
		// reopened by removing those that would immediately be closed
1613
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1614 105
		{
1615 105
			/**
1616
			* @var integer Rightmost position of the portion of text to ignore
1617
			*/
1618
			$ignorePos = $this->pos;
1619
1620 105
			$i = count($this->tagStack);
1621
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1622
			{
1623
				$upcomingTag = $this->tagStack[$i];
1624
1625 23
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1626
				// strictly an end tag (not a start tag or a self-closing tag)
1627 23
				if ($upcomingTag->getPos() > $ignorePos
1628 23
				 || $upcomingTag->isStartTag())
1629
				{
1630 15
					break;
1631
				}
1632
1633
				// Test whether this tag would close any of the tags we're about to reopen
1634 15
				$j = count($closeTags);
1635 15
1636
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1637 9
				{
1638
					if ($upcomingTag->canClose($closeTags[$j]))
1639
					{
1640
						// Remove the tag from the lists and reset the keys
1641 10
						array_splice($closeTags, $j, 1);
1642
1643 10
						if (isset($reopenTags[$j]))
1644
						{
1645 10
							array_splice($reopenTags, $j, 1);
1646
						}
1647
1648 9
						// Extend the ignored text to cover this tag
1649
						$ignorePos = max(
1650 9
							$ignorePos,
1651
							$upcomingTag->getPos() + $upcomingTag->getLen()
1652 7
						);
1653
1654
						break;
1655
					}
1656 9
				}
1657 9
			}
1658 9
1659
			if ($ignorePos > $this->pos)
1660
			{
1661 9
				/**
1662
				* @todo have a method that takes (pos,len) rather than a Tag
1663
				*/
1664
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1665
			}
1666 23
		}
1667
1668
		// Re-add tags that need to be reopened, at current cursor position
1669
		foreach ($reopenTags as $startTag)
1670
		{
1671 8
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1672
1673
			// Re-pair the new tag
1674
			$endTag = $startTag->getEndTag();
1675
			if ($endTag)
1676 105
			{
1677
				$newTag->pairWith($endTag);
1678 8
			}
1679
		}
1680
	}
1681 8
1682 8
	/**
1683
	* Update counters and replace current context with its parent context
1684 1
	*
1685
	* @return void
1686
	*/
1687
	protected function popContext()
1688
	{
1689
		$tag = array_pop($this->openTags);
1690
		--$this->cntOpen[$tag->getName()];
1691
		$this->context = $this->context['parentContext'];
1692
	}
1693
1694 105
	/**
1695
	* Update counters and replace current context with a new context based on given tag
1696 105
	*
1697 105
	* If given tag is a self-closing tag, the context won't change
1698 105
	*
1699
	* @param  Tag  $tag Start tag (including self-closing)
1700
	* @return void
1701
	*/
1702
	protected function pushContext(Tag $tag)
1703
	{
1704
		$tagName   = $tag->getName();
1705
		$tagFlags  = $tag->getFlags();
1706
		$tagConfig = $this->tagsConfig[$tagName];
1707
1708
		++$this->cntTotal[$tagName];
1709 139
1710
		// If this is a self-closing tag, the context remains the same
1711 139
		if ($tag->isSelfClosingTag())
1712 139
		{
1713 139
			return;
1714
		}
1715 139
1716
		// Recompute the allowed tags
1717
		$allowed = [];
1718 139
		foreach ($this->context['allowed'] as $k => $v)
1719
		{
1720 47
			// If the current tag is not transparent, override the low bits (allowed children) of
1721
			// current context with its high bits (allowed descendants)
1722
			if (!($tagFlags & self::RULE_IS_TRANSPARENT))
1723
			{
1724 105
				$v = ($v & 0xFF00) | ($v >> 8);
1725 105
			}
1726
			$allowed[] = $tagConfig['allowed'][$k] & $v;
1727
		}
1728
1729 105
		// Use this tag's flags as a base for this context and add inherited rules
1730
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1731 57
1732
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1733 105
		if ($flags & self::RULE_DISABLE_AUTO_BR)
1734
		{
1735
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1736
		}
1737 105
1738
		++$this->cntOpen[$tagName];
1739
		$this->openTags[] = $tag;
1740 105
		$this->context = [
1741
			'allowed'       => $allowed,
1742 2
			'flags'         => $flags,
1743
			'inParagraph'   => false,
1744
			'parentContext' => $this->context
1745 105
		];
1746 105
	}
1747 105
1748 105
	/**
1749 105
	* Return whether given tag is allowed in current context
1750
	*
1751 105
	* @param  string $tagName
1752
	* @return bool
1753
	*/
1754
	protected function tagIsAllowed($tagName)
1755
	{
1756
		$n = $this->tagsConfig[$tagName]['bitNumber'];
1757
1758
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1759
	}
1760
1761 141
	//==========================================================================
1762
	// Tag stack
1763 141
	//==========================================================================
1764
1765 141
	/**
1766
	* Add a start tag
1767
	*
1768
	* @param  string  $name Name of the tag
1769
	* @param  integer $pos  Position of the tag in the text
1770
	* @param  integer $len  Length of text consumed by the tag
1771
	* @param  integer $prio Tag's priority
1772
	* @return Tag
1773
	*/
1774
	public function addStartTag($name, $pos, $len, $prio = 0)
1775
	{
1776
		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1777
	}
1778
1779
	/**
1780
	* Add an end tag
1781 124
	*
1782
	* @param  string  $name Name of the tag
1783 124
	* @param  integer $pos  Position of the tag in the text
1784
	* @param  integer $len  Length of text consumed by the tag
1785
	* @param  integer $prio Tag's priority
1786
	* @return Tag
1787
	*/
1788
	public function addEndTag($name, $pos, $len, $prio = 0)
1789
	{
1790
		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1791
	}
1792
1793
	/**
1794
	* Add a self-closing tag
1795 111
	*
1796
	* @param  string  $name Name of the tag
1797 111
	* @param  integer $pos  Position of the tag in the text
1798
	* @param  integer $len  Length of text consumed by the tag
1799
	* @param  integer $prio Tag's priority
1800
	* @return Tag
1801
	*/
1802
	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1803
	{
1804
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1805
	}
1806
1807
	/**
1808
	* Add a 0-width "br" tag to force a line break at given position
1809 68
	*
1810
	* @param  integer $pos  Position of the tag in the text
1811 68
	* @param  integer $prio Tag's priority
1812
	* @return Tag
1813
	*/
1814
	public function addBrTag($pos, $prio = 0)
1815
	{
1816
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1817
	}
1818
1819
	/**
1820
	* Add an "ignore" tag
1821 9
	*
1822
	* @param  integer $pos  Position of the tag in the text
1823 9
	* @param  integer $len  Length of text consumed by the tag
1824
	* @param  integer $prio Tag's priority
1825
	* @return Tag
1826
	*/
1827
	public function addIgnoreTag($pos, $len, $prio = 0)
1828
	{
1829
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1830
	}
1831
1832
	/**
1833
	* Add a paragraph break at given position
1834 13
	*
1835
	* Uses a zero-width tag that is actually never output in the result
1836 13
	*
1837
	* @param  integer $pos  Position of the tag in the text
1838
	* @param  integer $prio Tag's priority
1839
	* @return Tag
1840
	*/
1841
	public function addParagraphBreak($pos, $prio = 0)
1842
	{
1843
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1844
	}
1845
1846
	/**
1847
	* Add a copy of given tag at given position and length
1848 5
	*
1849
	* @param  Tag     $tag  Original tag
1850 5
	* @param  integer $pos  Copy's position
1851
	* @param  integer $len  Copy's length
1852
	* @param  integer $prio Copy's priority (same as original by default)
1853
	* @return Tag           Copy tag
1854
	*/
1855
	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1856
	{
1857
		if (!isset($prio))
1858
		{
1859
			$prio = $tag->getSortPriority();
1860
		}
1861
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1862 22
		$copy->setAttributes($tag->getAttributes());
1863
1864 22
		return $copy;
1865
	}
1866 11
1867
	/**
1868 22
	* Add a tag
1869 22
	*
1870
	* @param  integer $type Tag's type
1871 22
	* @param  string  $name Name of the tag
1872
	* @param  integer $pos  Position of the tag in the text
1873
	* @param  integer $len  Length of text consumed by the tag
1874
	* @param  integer $prio Tag's priority
1875
	* @return Tag
1876
	*/
1877
	protected function addTag($type, $name, $pos, $len, $prio)
1878
	{
1879
		// Create the tag
1880
		$tag = new Tag($type, $name, $pos, $len, $prio);
1881
1882
		// Set this tag's rules bitfield
1883
		if (isset($this->tagsConfig[$name]))
1884 187
		{
1885
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1886
		}
1887 187
1888
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1889
		// position is negative or if it's out of bounds
1890 187
		if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1891
		 || $this->isInvalidTextSpan($pos, $len))
1892 167
		{
1893
			$tag->invalidate();
1894
		}
1895
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1896
		{
1897 187
			$this->logger->warn(
1898 187
				'Tag is disabled',
1899
				[
1900 10
					'tag'     => $tag,
1901
					'tagName' => $name
1902 179
				]
1903
			);
1904 1
			$tag->invalidate();
1905 1
		}
1906
		else
1907 1
		{
1908 1
			$this->insertTag($tag);
1909
		}
1910
1911 1
		return $tag;
1912
	}
1913
1914
	/**
1915 178
	* Test whether given text span is outside text boundaries or an invalid UTF sequence
1916
	*
1917
	* @param  integer $pos Start of text
1918 187
	* @param  integer $len Length of text
1919
	* @return bool
1920
	*/
1921
	protected function isInvalidTextSpan($pos, $len)
1922
	{
1923
		return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1924
	}
1925
1926
	/**
1927
	* Insert given tag in the tag stack
1928 185
	*
1929
	* @param  Tag  $tag
1930 185
	* @return void
1931
	*/
1932
	protected function insertTag(Tag $tag)
1933
	{
1934
		if (!$this->tagStackIsSorted)
1935
		{
1936
			$this->tagStack[] = $tag;
1937
		}
1938
		else
1939 178
		{
1940
			// Scan the stack and copy every tag to the next slot until we find the correct index
1941 178
			$i   = count($this->tagStack);
1942
			$key = $this->getSortKey($tag);
1943 178
			while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1944
			{
1945
				$this->tagStack[$i] = $this->tagStack[$i - 1];
1946
				--$i;
1947
			}
1948 50
			$this->tagStack[$i] = $tag;
1949 50
		}
1950 50
	}
1951
1952 3
	/**
1953 3
	* Add a pair of tags
1954
	*
1955 50
	* @param  string  $name     Name of the tags
1956
	* @param  integer $startPos Position of the start tag
1957
	* @param  integer $startLen Length of the start tag
1958
	* @param  integer $endPos   Position of the start tag
1959
	* @param  integer $endLen   Length of the start tag
1960
	* @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1961
	* @return Tag               Start tag
1962
	*/
1963
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1964
	{
1965
		// NOTE: the end tag is added first to try to keep the stack in the correct order
1966
		$endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1967
		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1968
		$startTag->pairWith($endTag);
1969
1970 26
		return $startTag;
1971
	}
1972
1973 26
	/**
1974 26
	* Add a tag that represents a verbatim copy of the original text
1975 26
	*
1976
	* @param  integer $pos  Position of the tag in the text
1977 26
	* @param  integer $len  Length of text consumed by the tag
1978
	* @param  integer $prio Tag's priority
1979
	* @return Tag
1980
	*/
1981
	public function addVerbatim($pos, $len, $prio = 0)
1982
	{
1983
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
1984
	}
1985
1986
	/**
1987
	* Sort tags by position and precedence
1988 4
	*
1989
	* @return void
1990 4
	*/
1991
	protected function sortTags()
1992
	{
1993
		$arr = [];
1994
		foreach ($this->tagStack as $i => $tag)
1995
		{
1996
			$key       = $this->getSortKey($tag, $i);
1997
			$arr[$key] = $tag;
1998 165
		}
1999
		krsort($arr);
2000 165
2001 165
		$this->tagStack         = array_values($arr);
2002
		$this->tagStackIsSorted = true;
2003 165
	}
2004 165
2005
	/**
2006 165
	* Generate a key for given tag that can be used to compare its position using lexical comparisons
2007
	*
2008 165
	* Tags are sorted by position first, then by priority, then by whether they consume any text,
2009 165
	* then by length, and finally in order of their creation.
2010
	*
2011
	* The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2012
	* are at the end of the array.
2013
	*
2014
	* @param  Tag     $tag
2015
	* @param  integer $tagIndex
2016
	* @return string
2017
	*/
2018
	protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2019
	{
2020
		// Ensure that negative values are sorted correctly by flagging them and making them positive
2021
		$prioFlag = ($tag->getSortPriority() >= 0);
2022
		$prio     = $tag->getSortPriority();
2023
		if (!$prioFlag)
2024
		{
2025 165
			$prio += (1 << 30);
2026
		}
2027
2028 165
		// Sort 0-width tags separately from the rest
2029 165
		$lenFlag = ($tag->getLen() > 0);
2030 165
		if ($lenFlag)
2031
		{
2032 33
			// Inverse their length so that longest matches are processed first
2033
			$lenOrder = $this->textLen - $tag->getLen();
2034
		}
2035
		else
2036 165
		{
2037 165
			// Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2038
			// pairs
2039
			$order = [
2040 118
				Tag::END_TAG          => 0,
2041
				Tag::SELF_CLOSING_TAG => 1,
2042
				Tag::START_TAG        => 2
2043
			];
2044
			$lenOrder = $order[$tag->getType()];
2045
		}
2046
2047 88
		return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2048
	}
2049
}