Completed
Push — master ( 44baaa...9b2ed8 )
by Josh
18:12
created

Parser::outputVerbatim()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 5
nc 1
nop 1
dl 0
loc 7
ccs 6
cts 6
cp 1
crap 1
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\Logger;
13
use s9e\TextFormatter\Parser\Tag;
14
15
class Parser
16
{
17
	/**#@+
18
	* Boolean rules bitfield
19
	*/
20
	const RULE_AUTO_CLOSE        = 1 << 0;
21
	const RULE_AUTO_REOPEN       = 1 << 1;
22
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
23
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
24
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
25
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
26
	const RULE_IGNORE_TAGS       = 1 << 6;
27
	const RULE_IGNORE_TEXT       = 1 << 7;
28
	const RULE_IGNORE_WHITESPACE = 1 << 8;
29
	const RULE_IS_TRANSPARENT    = 1 << 9;
30
	const RULE_PREVENT_BR        = 1 << 10;
31
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
32
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
33
	/**#@-*/
34
35
	/**
36
	* Bitwise disjunction of rules related to automatic line breaks
37
	*/
38
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
39
40
	/**
41
	* Bitwise disjunction of rules that are inherited by subcontexts
42
	*/
43
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
44
45
	/**
46
	* All the characters that are considered whitespace
47
	*/
48
	const WHITESPACE = " \n\t";
49
50
	/**
51
	* @var array Number of open tags for each tag name
52
	*/
53
	protected $cntOpen;
54
55
	/**
56
	* @var array Number of times each tag has been used
57
	*/
58
	protected $cntTotal;
59
60
	/**
61
	* @var array Current context
62
	*/
63
	protected $context;
64
65
	/**
66
	* @var Tag[] Every tag created by this parser, used for garbage collection
67
	*/
68
	protected $createdTags;
69
70
	/**
71
	* @var integer How hard the parser has worked on fixing bad markup so far
72
	*/
73
	protected $currentFixingCost;
74
75
	/**
76
	* @var Tag Current tag being processed
77
	*/
78
	protected $currentTag;
79
80
	/**
81
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
82
	*/
83
	protected $isRich;
84
85
	/**
86
	* @var Logger This parser's logger
87
	*/
88
	protected $logger;
89
90
	/**
91
	* @var integer How hard the parser should work on fixing bad markup
92
	*/
93
	public $maxFixingCost = 1000;
94
95
	/**
96
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
97
	*/
98
	protected $namespaces;
99
100
	/**
101
	* @var array Stack of open tags (instances of Tag)
102
	*/
103
	protected $openTags;
104
105
	/**
106
	* @var string This parser's output
107
	*/
108
	protected $output;
109
110
	/**
111
	* @var integer Position of the cursor in the original text
112
	*/
113
	protected $pos;
114
115
	/**
116
	* @var array Array of callbacks, using plugin names as keys
117
	*/
118
	protected $pluginParsers = [];
119
120
	/**
121
	* @var array Associative array of [pluginName => pluginConfig]
122
	*/
123
	protected $pluginsConfig;
124
125
	/**
126
	* @var array Variables registered for use in filters
127
	*/
128
	public $registeredVars = [];
129
130
	/**
131
	* @var array Root context, used at the root of the document
132
	*/
133
	protected $rootContext;
134
135
	/**
136
	* @var array Tags' config
137
	*/
138
	protected $tagsConfig;
139
140
	/**
141
	* @var array Tag storage
142
	*/
143
	protected $tagStack;
144
145
	/**
146
	* @var bool Whether the tags in the stack are sorted
147
	*/
148
	protected $tagStackIsSorted;
149
150
	/**
151
	* @var string Text being parsed
152
	*/
153
	protected $text;
154
155
	/**
156
	* @var integer Length of the text being parsed
157
	*/
158
	protected $textLen;
159
160
	/**
161
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
162
	*              whether the parser was reset during execution
163
	*/
164
	protected $uid = 0;
165
166
	/**
167
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
168
	*/
169
	protected $wsPos;
170
171
	/**
172
	* Constructor
173
	*/
174 167
	public function __construct(array $config)
175
	{
176 167
		$this->pluginsConfig  = $config['plugins'];
177 167
		$this->registeredVars = $config['registeredVars'];
178 167
		$this->rootContext    = $config['rootContext'];
179 167
		$this->tagsConfig     = $config['tags'];
180
181 167
		$this->__wakeup();
182 167
	}
183
184
	/**
185
	* Serializer
186
	*
187
	* Returns the properties that need to persist through serialization.
188
	*
189
	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
190
	* of the serializer to the user (e.g. igbinary)
191
	*
192
	* @return array
193
	*/
194 2
	public function __sleep()
195
	{
196 2
		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
197
	}
198
199
	/**
200
	* Unserializer
201
	*
202
	* @return void
203
	*/
204 167
	public function __wakeup()
205
	{
206 167
		$this->logger = new Logger;
207 167
	}
208
209
	/**
210
	* Remove old references to tags
211
	*
212
	* @return void
213
	*/
214 155
	protected function gc()
215
	{
216 155
		foreach ($this->createdTags as $tag)
217
		{
218 136
			$tag->gc();
219 155
		}
220 155
		$this->createdTags = [];
221 155
	}
222
223
	/**
224
	* Reset the parser for a new parsing
225
	*
226
	* @param  string $text Text to be parsed
227
	* @return void
228
	*/
229 155
	protected function reset($text)
230
	{
231
		// Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
232 155
		$text = preg_replace('/\\r\\n?/', "\n", $text);
233 155
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
234
235
		// Clear the logs
236 155
		$this->logger->clear();
237
238
		// Initialize the rest
239 155
		$this->cntOpen     = [];
240 155
		$this->cntTotal    = [];
241 155
		$this->createdTags = [];
242 155
		$this->currentFixingCost = 0;
243 155
		$this->currentTag  = null;
244 155
		$this->isRich      = false;
245 155
		$this->namespaces  = [];
246 155
		$this->openTags    = [];
247 155
		$this->output      = '';
248 155
		$this->pos         = 0;
249 155
		$this->tagStack    = [];
250 155
		$this->tagStackIsSorted = true;
251 155
		$this->text        = $text;
252 155
		$this->textLen     = strlen($text);
253 155
		$this->wsPos       = 0;
254
255
		// Initialize the root context
256 155
		$this->context = $this->rootContext;
257 155
		$this->context['inParagraph'] = false;
258
259
		// Bump the UID
260 155
		++$this->uid;
261 155
	}
262
263
	/**
264
	* Set a tag's option
265
	*
266
	* This method ensures that the tag's config is a value and not a reference, to prevent
267
	* potential side-effects. References contained *inside* the tag's config are left untouched
268
	*
269
	* @param  string $tagName     Tag's name
270
	* @param  string $optionName  Option's name
271
	* @param  mixed  $optionValue Option's value
272
	* @return void
273
	*/
274 7
	protected function setTagOption($tagName, $optionName, $optionValue)
275
	{
276 7
		if (isset($this->tagsConfig[$tagName]))
277 7
		{
278
			// Copy the tag's config and remove it. That will destroy the reference
279 7
			$tagConfig = $this->tagsConfig[$tagName];
280 7
			unset($this->tagsConfig[$tagName]);
281
282
			// Set the new value and replace the tag's config
283 7
			$tagConfig[$optionName]     = $optionValue;
284 7
			$this->tagsConfig[$tagName] = $tagConfig;
285 7
		}
286 7
	}
287
288
	//==========================================================================
289
	// Public API
290
	//==========================================================================
291
292
	/**
293
	* Disable a tag
294
	*
295
	* @param  string $tagName Name of the tag
296
	* @return void
297
	*/
298 3
	public function disableTag($tagName)
299
	{
300 3
		$this->setTagOption($tagName, 'isDisabled', true);
301 3
	}
302
303
	/**
304
	* Enable a tag
305
	*
306
	* @param  string $tagName Name of the tag
307
	* @return void
308
	*/
309 1
	public function enableTag($tagName)
310
	{
311 1
		if (isset($this->tagsConfig[$tagName]))
312 1
		{
313 1
			unset($this->tagsConfig[$tagName]['isDisabled']);
314 1
		}
315 1
	}
316
317
	/**
318
	* Get this parser's Logger instance
319
	*
320
	* @return Logger
321
	*/
322 8
	public function getLogger()
323
	{
324 8
		return $this->logger;
325
	}
326
327
	/**
328
	* Return the last text parsed
329
	*
330
	* This method returns the normalized text, which may be slightly different from the original
331
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
332
	* meant to be used in support of processing log entries, which contain offsets based on the
333
	* normalized text
334
	*
335
	* @see Parser::reset()
336
	*
337
	* @return string
338
	*/
339 2
	public function getText()
340
	{
341 2
		return $this->text;
342
	}
343
344
	/**
345
	* Parse a text
346
	*
347
	* @param  string $text Text to parse
348
	* @return string       XML representation
349
	*/
350 155
	public function parse($text)
351
	{
352
		// Reset the parser and save the uid
353 155
		$this->reset($text);
354 155
		$uid = $this->uid;
355
356
		// Do the heavy lifting
357 155
		$this->executePluginParsers();
358 155
		$this->processTags();
359
360
		// Remove old references
361 155
		$this->gc();
362
363
		// Finalize the document
364 155
		$this->finalizeOutput();
365
366
		// Check the uid in case a plugin or a filter reset the parser mid-execution
367 155
		if ($this->uid !== $uid)
368 155
		{
369 1
			throw new RuntimeException('The parser has been reset during execution');
370
		}
371
372
		// Log a warning if the fixing cost limit was exceeded
373 155
		if ($this->currentFixingCost > $this->maxFixingCost)
374 155
		{
375 3
			$this->logger->warn('Fixing cost limit exceeded');
376 3
		}
377
378 155
		return $this->output;
379
	}
380
381
	/**
382
	* Change a tag's tagLimit
383
	*
384
	* NOTE: the default tagLimit should generally be set during configuration instead
385
	*
386
	* @param  string  $tagName  The tag's name, in UPPERCASE
387
	* @param  integer $tagLimit
388
	* @return void
389
	*/
390 2
	public function setTagLimit($tagName, $tagLimit)
391
	{
392 2
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
393 2
	}
394
395
	/**
396
	* Change a tag's nestingLimit
397
	*
398
	* NOTE: the default nestingLimit should generally be set during configuration instead
399
	*
400
	* @param  string  $tagName      The tag's name, in UPPERCASE
401
	* @param  integer $nestingLimit
402
	* @return void
403
	*/
404 2
	public function setNestingLimit($tagName, $nestingLimit)
405
	{
406 2
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
407 2
	}
408
409
	//==========================================================================
410
	// Filter processing
411
	//==========================================================================
412
413
	/**
414
	* Execute all the attribute preprocessors of given tag
415
	*
416
	* @private
417
	*
418
	* @param  Tag   $tag       Source tag
419
	* @param  array $tagConfig Tag's config
420
	* @return bool             Unconditionally TRUE
421
	*/
422 7
	public static function executeAttributePreprocessors(Tag $tag, array $tagConfig)
423
	{
424 7
		if (!empty($tagConfig['attributePreprocessors']))
425 7
		{
426 7
			foreach ($tagConfig['attributePreprocessors'] as list($attrName, $regexp, $map))
427
			{
428 7
				if (!$tag->hasAttribute($attrName))
429 7
				{
430 1
					continue;
431
				}
432
433 6
				self::executeAttributePreprocessor($tag, $attrName, $regexp, $map);
434 7
			}
435 7
		}
436
437 7
		return true;
438
	}
439
440
	/**
441
	* Execute an attribute preprocessor
442
	*
443
	* @param  Tag      $tag
444
	* @param  string   $attrName
445
	* @param  string   $regexp
446
	* @param  string[] $map
447
	* @return void
448
	*/
449 6
	protected static function executeAttributePreprocessor(Tag $tag, $attrName, $regexp, $map)
450
	{
451 6
		$attrValue = $tag->getAttribute($attrName);
452 6
		$captures  = self::getNamedCaptures($attrValue, $regexp, $map);
453 6
		foreach ($captures as $k => $v)
454
		{
455
			// Attribute preprocessors cannot overwrite other attributes but they can
456
			// overwrite themselves
457 5
			if ($k === $attrName || !$tag->hasAttribute($k))
458 5
			{
459 5
				$tag->setAttribute($k, $v);
460 5
			}
461 6
		}
462 6
	}
463
464
	/**
465
	* Execute a regexp and return the values of the mapped captures
466
	*
467
	* @param  string   $attrValue
468
	* @param  string   $regexp
469
	* @param  string[] $map
470
	* @return array
471
	*/
472 6
	protected static function getNamedCaptures($attrValue, $regexp, $map)
473
	{
474 6
		if (!preg_match($regexp, $attrValue, $m))
475 6
		{
476 1
			return [];
477
		}
478
479 5
		$values = [];
480 5
		foreach ($map as $i => $k)
481
		{
482 5
			if (isset($m[$i]) && $m[$i] !== '')
483 5
			{
484 5
				$values[$k] = $m[$i];
485 5
			}
486 5
		}
487
488 5
		return $values;
489
	}
490
491
	/**
492
	* Execute a filter
493
	*
494
	* @see s9e\TextFormatter\Configurator\Items\ProgrammableCallback
495
	*
496
	* @param  array $filter Programmed callback
497
	* @param  array $vars   Variables to be used when executing the callback
498
	* @return mixed         Whatever the callback returns
499
	*/
500 135
	protected static function executeFilter(array $filter, array $vars)
501
	{
502 135
		$callback = $filter['callback'];
503 135
		$params   = (isset($filter['params'])) ? $filter['params'] : [];
504
505 135
		$args = [];
506 135
		foreach ($params as $k => $v)
507
		{
508 134
			if (is_numeric($k))
509 134
			{
510
				// By-value param
511 2
				$args[] = $v;
512 2
			}
513 133
			elseif (isset($vars[$k]))
514
			{
515
				// By-name param using a supplied var
516 131
				$args[] = $vars[$k];
517 131
			}
518 2
			elseif (isset($vars['registeredVars'][$k]))
519
			{
520
				// By-name param using a registered var
521 1
				$args[] = $vars['registeredVars'][$k];
522 1
			}
523
			else
524
			{
525
				// Unknown param
526 1
				$args[] = null;
527
			}
528 135
		}
529
530 135
		return call_user_func_array($callback, $args);
531
	}
532
533
	/**
534
	* Filter the attributes of given tag
535
	*
536
	* @private
537
	*
538
	* @param  Tag    $tag            Tag being checked
539
	* @param  array  $tagConfig      Tag's config
540
	* @param  array  $registeredVars Array of registered vars for use in attribute filters
541
	* @param  Logger $logger         This parser's Logger instance
542
	* @return bool                   Whether the whole attribute set is valid
543
	*/
544 134
	public static function filterAttributes(Tag $tag, array $tagConfig, array $registeredVars, Logger $logger)
545
	{
546 134
		if (empty($tagConfig['attributes']))
547 134
		{
548 119
			$tag->setAttributes([]);
549
550 119
			return true;
551
		}
552
553
		// Generate values for attributes with a generator set
554 16
		foreach ($tagConfig['attributes'] as $attrName => $attrConfig)
555
		{
556 16
			if (isset($attrConfig['generator']))
557 16
			{
558 1
				$tag->setAttribute(
559 1
					$attrName,
560 1
					self::executeFilter(
561 1
						$attrConfig['generator'],
562
						[
563 1
							'attrName'       => $attrName,
564 1
							'logger'         => $logger,
565
							'registeredVars' => $registeredVars
566 1
						]
567 1
					)
568 1
				);
569 1
			}
570 16
		}
571
572
		// Filter and remove invalid attributes
573 16
		foreach ($tag->getAttributes() as $attrName => $attrValue)
574
		{
575
			// Test whether this attribute exists and remove it if it doesn't
576 15
			if (!isset($tagConfig['attributes'][$attrName]))
577 15
			{
578 2
				$tag->removeAttribute($attrName);
579 2
				continue;
580
			}
581
582 15
			$attrConfig = $tagConfig['attributes'][$attrName];
583
584
			// Test whether this attribute has a filterChain
585 15
			if (!isset($attrConfig['filterChain']))
586 15
			{
587 10
				continue;
588
			}
589
590
			// Record the name of the attribute being filtered into the logger
591 5
			$logger->setAttribute($attrName);
592
593 5
			foreach ($attrConfig['filterChain'] as $filter)
594
			{
595 5
				$attrValue = self::executeFilter(
596 5
					$filter,
597
					[
598 5
						'attrName'       => $attrName,
599 5
						'attrValue'      => $attrValue,
600 5
						'logger'         => $logger,
601
						'registeredVars' => $registeredVars
602 5
					]
603 5
				);
604
605 5
				if ($attrValue === false)
606 5
				{
607 3
					$tag->removeAttribute($attrName);
608 3
					break;
609
				}
610 5
			}
611
612
			// Update the attribute value if it's valid
613 5
			if ($attrValue !== false)
614 5
			{
615 2
				$tag->setAttribute($attrName, $attrValue);
616 2
			}
617
618
			// Remove the attribute's name from the logger
619 5
			$logger->unsetAttribute();
620 16
		}
621
622
		// Iterate over the attribute definitions to handle missing attributes
623 16
		foreach ($tagConfig['attributes'] as $attrName => $attrConfig)
624
		{
625
			// Test whether this attribute is missing
626 16
			if (!$tag->hasAttribute($attrName))
627 16
			{
628 5
				if (isset($attrConfig['defaultValue']))
629 5
				{
630
					// Use the attribute's default value
631 2
					$tag->setAttribute($attrName, $attrConfig['defaultValue']);
632 2
				}
633 3
				elseif (!empty($attrConfig['required']))
634
				{
635
					// This attribute is missing, has no default value and is required, which means
636
					// the attribute set is invalid
637 3
					return false;
638
				}
639 2
			}
640 13
		}
641
642 13
		return true;
643
	}
644
645
	/**
646
	* Execute given tag's filterChain
647
	*
648
	* @param  Tag  $tag Tag to filter
649
	* @return bool      Whether the tag is valid
650
	*/
651 126
	protected function filterTag(Tag $tag)
652
	{
653 126
		$tagName   = $tag->getName();
654 126
		$tagConfig = $this->tagsConfig[$tagName];
655 126
		$isValid   = true;
656
657 126
		if (!empty($tagConfig['filterChain']))
658 126
		{
659
			// Record the tag being processed into the logger it can be added to the context of
660
			// messages logged during the execution
661 125
			$this->logger->setTag($tag);
662
663
			// Prepare the variables that are accessible to filters
664
			$vars = [
665 125
				'logger'         => $this->logger,
666 125
				'openTags'       => $this->openTags,
667 125
				'parser'         => $this,
668 125
				'registeredVars' => $this->registeredVars,
669 125
				'tag'            => $tag,
670 125
				'tagConfig'      => $tagConfig,
671 125
				'text'           => $this->text
672 125
			];
673
674 125
			foreach ($tagConfig['filterChain'] as $filter)
675
			{
676 125
				if (!self::executeFilter($filter, $vars))
677 125
				{
678 4
					$isValid = false;
679 4
					break;
680
				}
681 125
			}
682
683
			// Remove the tag from the logger
684 125
			$this->logger->unsetTag();
685 125
		}
686
687 126
		return $isValid;
688
	}
689
690
	//==========================================================================
691
	// Output handling
692
	//==========================================================================
693
694
	/**
695
	* Finalize the output by appending the rest of the unprocessed text and create the root node
696
	*
697
	* @return void
698
	*/
699 155
	protected function finalizeOutput()
700
	{
701
		// Output the rest of the text and close the last paragraph
702 155
		$this->outputText($this->textLen, 0, true);
703
704
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
705
		do
706
		{
707 155
			$this->output = preg_replace('(<([^ />]+)></\\1>)', '', $this->output, -1, $cnt);
708
		}
709 155
		while ($cnt > 0);
710
711
		// Merge consecutive <i> tags
712 155
		if (strpos($this->output, '</i><i>') !== false)
713 155
		{
714 1
			$this->output = str_replace('</i><i>', '', $this->output);
715 1
		}
716
717
		// Encode Unicode characters that are outside of the BMP
718 155
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
719
720
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
721 155
		$tagName = ($this->isRich) ? 'r' : 't';
722
723
		// Prepare the root node with all the namespace declarations
724 155
		$tmp = '<' . $tagName;
725 155
		foreach (array_keys($this->namespaces) as $prefix)
726
		{
727 2
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
728 155
		}
729
730 155
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
731 155
	}
732
733
	/**
734
	* Append a tag to the output
735
	*
736
	* @param  Tag  $tag Tag to append
737
	* @return void
738
	*/
739 118
	protected function outputTag(Tag $tag)
740
	{
741 118
		$this->isRich = true;
742
743 118
		$tagName  = $tag->getName();
744 118
		$tagPos   = $tag->getPos();
745 118
		$tagLen   = $tag->getLen();
746 118
		$tagFlags = $tag->getFlags();
747
748 118
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
749 118
		{
750 8
			$skipBefore = 1;
751 8
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
752 8
		}
753
		else
754
		{
755 113
			$skipBefore = $skipAfter = 0;
756
		}
757
758
		// Current paragraph must end before the tag if:
759
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
760
		//  - the tag is an end tag (but not self-closing)
761 118
		$closeParagraph = false;
762 118
		if ($tag->isStartTag())
763 118
		{
764 118
			if ($tagFlags & self::RULE_BREAK_PARAGRAPH)
765 118
			{
766 4
				$closeParagraph = true;
767 4
			}
768 118
		}
769
		else
770
		{
771 87
			$closeParagraph = true;
772
		}
773
774
		// Let the cursor catch up with this tag's position
775 118
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
776
777
		// Capture the text consumed by the tag
778
		$tagText = ($tagLen)
779 118
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
780 118
		         : '';
781
782
		// Output current tag
783 118
		if ($tag->isStartTag())
784 118
		{
785
			// Handle paragraphs before opening the tag
786 118
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
787 118
			{
788 117
				$this->outputParagraphStart($tagPos);
789 117
			}
790
791
			// Record this tag's namespace, if applicable
792 118
			$colonPos = strpos($tagName, ':');
793
			if ($colonPos)
794 118
			{
795 2
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
796 2
			}
797
798
			// Open the start tag and add its attributes, but don't close the tag
799 118
			$this->output .= '<' . $tagName;
800
801
			// We output the attributes in lexical order. Helps canonicalizing the output and could
802
			// prove useful someday
803 118
			$attributes = $tag->getAttributes();
804 118
			ksort($attributes);
805
806 118
			foreach ($attributes as $attrName => $attrValue)
807
			{
808 8
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
809 118
			}
810
811 118
			if ($tag->isSelfClosingTag())
812 118
			{
813
				if ($tagLen)
814 44
				{
815 34
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
816 34
				}
817
				else
818
				{
819 10
					$this->output .= '/>';
820
				}
821 44
			}
822 87
			elseif ($tagLen)
823
			{
824 59
				$this->output .= '><s>' . $tagText . '</s>';
825 59
			}
826
			else
827
			{
828 42
				$this->output .= '>';
829
			}
830 118
		}
831
		else
832
		{
833
			if ($tagLen)
834 87
			{
835 50
				$this->output .= '<e>' . $tagText . '</e>';
836 50
			}
837
838 87
			$this->output .= '</' . $tagName . '>';
839
		}
840
841
		// Move the cursor past the tag
842 118
		$this->pos = $tagPos + $tagLen;
843
844
		// Skip newlines (no other whitespace) after this tag
845 118
		$this->wsPos = $this->pos;
846 118
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
847
		{
848
			// Decrement the number of lines to skip
849 8
			--$skipAfter;
850
851
			// Move the cursor past the newline
852 8
			++$this->wsPos;
853 8
		}
854 118
	}
855
856
	/**
857
	* Output the text between the cursor's position (included) and given position (not included)
858
	*
859
	* @param  integer $catchupPos     Position we're catching up to
860
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
861
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
862
	* @return void
863
	*/
864 155
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
865
	{
866
		if ($closeParagraph)
867 155
		{
868 155
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
869 155
			{
870 144
				$closeParagraph = false;
871 144
			}
872
			else
873
			{
874
				// Ignore any number of lines at the end if we're closing a paragraph
875 18
				$maxLines = -1;
876
			}
877 155
		}
878
879 155
		if ($this->pos >= $catchupPos)
880 155
		{
881
			// We're already there, close the paragraph if applicable and return
882
			if ($closeParagraph)
883 111
			{
884 4
				$this->outputParagraphEnd();
885 4
			}
886
887 111
			return;
888
		}
889
890
		// Skip over previously identified whitespace if applicable
891 141
		if ($this->wsPos > $this->pos)
892 141
		{
893 8
			$skipPos       = min($catchupPos, $this->wsPos);
894 8
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
895 8
			$this->pos     = $skipPos;
896
897 8
			if ($this->pos >= $catchupPos)
898 8
			{
899
				// Skipped everything. Close the paragraph if applicable and return
900
				if ($closeParagraph)
901 2
				{
902 1
					$this->outputParagraphEnd();
903 1
				}
904
905 2
				return;
906
			}
907 7
		}
908
909
		// Test whether we're even supposed to output anything
910 141
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
911 141
		{
912 3
			$catchupLen  = $catchupPos - $this->pos;
913 3
			$catchupText = substr($this->text, $this->pos, $catchupLen);
914
915
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
916
			// tags
917 3
			if (strspn($catchupText, " \n\t") < $catchupLen)
918 3
			{
919 3
				$catchupText = '<i>' . $catchupText . '</i>';
920 3
			}
921
922 3
			$this->output .= $catchupText;
923 3
			$this->pos = $catchupPos;
924
925
			if ($closeParagraph)
926 3
			{
927 1
				$this->outputParagraphEnd();
928 1
			}
929
930 3
			return;
931
		}
932
933
		// Compute the amount of text to ignore at the end of the output
934 141
		$ignorePos = $catchupPos;
935 141
		$ignoreLen = 0;
936
937
		// Ignore as many lines (including whitespace) as specified
938 141
		while ($maxLines && --$ignorePos >= $this->pos)
939
		{
940 19
			$c = $this->text[$ignorePos];
941 19
			if (strpos(self::WHITESPACE, $c) === false)
942 19
			{
943 14
				break;
944
			}
945
946 10
			if ($c === "\n")
947 10
			{
948 9
				--$maxLines;
949 9
			}
950
951 10
			++$ignoreLen;
952 10
		}
953
954
		// Adjust $catchupPos to ignore the text at the end
955 141
		$catchupPos -= $ignoreLen;
956
957
		// Break down the text in paragraphs if applicable
958 141
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
959 141
		{
960 15
			if (!$this->context['inParagraph'])
961 15
			{
962 13
				$this->outputWhitespace($catchupPos);
963
964 13
				if ($catchupPos > $this->pos)
965 13
				{
966 10
					$this->outputParagraphStart($catchupPos);
967 10
				}
968 13
			}
969
970
			// Look for a paragraph break in this text
971 15
			$pbPos = strpos($this->text, "\n\n", $this->pos);
972
973 15
			while ($pbPos !== false && $pbPos < $catchupPos)
974
			{
975 3
				$this->outputText($pbPos, 0, true);
976 3
				$this->outputParagraphStart($catchupPos);
977
978 3
				$pbPos = strpos($this->text, "\n\n", $this->pos);
979 3
			}
980 15
		}
981
982
		// Capture, escape and output the text
983 141
		if ($catchupPos > $this->pos)
984 141
		{
985 139
			$catchupText = htmlspecialchars(
986 139
				substr($this->text, $this->pos, $catchupPos - $this->pos),
987 139
				ENT_NOQUOTES,
988
				'UTF-8'
989 139
			);
990
991
			// Format line breaks if applicable
992 139
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
993 139
			{
994 21
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
995 21
			}
996
997 139
			$this->output .= $catchupText;
998 139
		}
999
1000
		// Close the paragraph if applicable
1001
		if ($closeParagraph)
1002 141
		{
1003 14
			$this->outputParagraphEnd();
1004 14
		}
1005
1006
		// Add the ignored text if applicable
1007
		if ($ignoreLen)
1008 141
		{
1009 10
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
1010 10
		}
1011
1012
		// Move the cursor past the text
1013 141
		$this->pos = $catchupPos + $ignoreLen;
1014 141
	}
1015
1016
	/**
1017
	* Output a linebreak tag
1018
	*
1019
	* @param  Tag  $tag
1020
	* @return void
1021
	*/
1022 4
	protected function outputBrTag(Tag $tag)
1023
	{
1024 4
		$this->outputText($tag->getPos(), 0, false);
1025 4
		$this->output .= '<br/>';
1026 4
	}
1027
1028
	/**
1029
	* Output an ignore tag
1030
	*
1031
	* @param  Tag  $tag
1032
	* @return void
1033
	*/
1034 15
	protected function outputIgnoreTag(Tag $tag)
1035
	{
1036 15
		$tagPos = $tag->getPos();
1037 15
		$tagLen = $tag->getLen();
1038
1039
		// Capture the text to ignore
1040 15
		$ignoreText = substr($this->text, $tagPos, $tagLen);
1041
1042
		// Catch up with the tag's position then output the tag
1043 15
		$this->outputText($tagPos, 0, false);
1044 15
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
1045 15
		$this->isRich = true;
1046
1047
		// Move the cursor past this tag
1048 15
		$this->pos = $tagPos + $tagLen;
1049 15
	}
1050
1051
	/**
1052
	* Start a paragraph between current position and given position, if applicable
1053
	*
1054
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
1055
	* @return void
1056
	*/
1057 124
	protected function outputParagraphStart($maxPos)
1058
	{
1059
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
1060 124
		if ($this->context['inParagraph']
1061 124
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
1062 124
		{
1063 114
			return;
1064
		}
1065
1066
		// Output the whitespace between $this->pos and $maxPos if applicable
1067 16
		$this->outputWhitespace($maxPos);
1068
1069
		// Open the paragraph, but only if it's not at the very end of the text
1070 16
		if ($this->pos < $this->textLen)
1071 16
		{
1072 16
			$this->output .= '<p>';
1073 16
			$this->context['inParagraph'] = true;
1074 16
		}
1075 16
	}
1076
1077
	/**
1078
	* Close current paragraph at current position if applicable
1079
	*
1080
	* @return void
1081
	*/
1082 18
	protected function outputParagraphEnd()
1083
	{
1084
		// Do nothing if we're not in a paragraph
1085 18
		if (!$this->context['inParagraph'])
1086 18
		{
1087 3
			return;
1088
		}
1089
1090 16
		$this->output .= '</p>';
1091 16
		$this->context['inParagraph'] = false;
1092 16
	}
1093
1094
	/**
1095
	* Output the content of a verbatim tag
1096
	*
1097
	* @param  Tag  $tag
1098
	* @return void
1099
	*/
1100 3
	protected function outputVerbatim(Tag $tag)
1101
	{
1102 3
		$flags = $this->context['flags'];
1103 3
		$this->context['flags'] = $tag->getFlags();
1104 3
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
1105 3
		$this->context['flags'] = $flags;
1106 3
	}
1107
1108
	/**
1109
	* Skip as much whitespace after current position as possible
1110
	*
1111
	* @param  integer $maxPos Rightmost character to be skipped
1112
	* @return void
1113
	*/
1114 18
	protected function outputWhitespace($maxPos)
1115
	{
1116 18
		if ($maxPos > $this->pos)
1117 18
		{
1118 13
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
1119
1120
			if ($spn)
1121 13
			{
1122 6
				$this->output .= substr($this->text, $this->pos, $spn);
1123 6
				$this->pos += $spn;
1124 6
			}
1125 13
		}
1126 18
	}
1127
1128
	//==========================================================================
1129
	// Plugins handling
1130
	//==========================================================================
1131
1132
	/**
1133
	* Disable a plugin
1134
	*
1135
	* @param  string $pluginName Name of the plugin
1136
	* @return void
1137
	*/
1138 5
	public function disablePlugin($pluginName)
1139
	{
1140 5
		if (isset($this->pluginsConfig[$pluginName]))
1141 5
		{
1142
			// Copy the plugin's config to remove the reference
1143 4
			$pluginConfig = $this->pluginsConfig[$pluginName];
1144 4
			unset($this->pluginsConfig[$pluginName]);
1145
1146
			// Update the value and replace the plugin's config
1147 4
			$pluginConfig['isDisabled'] = true;
1148 4
			$this->pluginsConfig[$pluginName] = $pluginConfig;
1149 4
		}
1150 5
	}
1151
1152
	/**
1153
	* Enable a plugin
1154
	*
1155
	* @param  string $pluginName Name of the plugin
1156
	* @return void
1157
	*/
1158 2
	public function enablePlugin($pluginName)
1159
	{
1160 2
		if (isset($this->pluginsConfig[$pluginName]))
1161 2
		{
1162 1
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
1163 1
		}
1164 2
	}
1165
1166
	/**
1167
	* Execute given plugin
1168
	*
1169
	* @param  string $pluginName Plugin's name
1170
	* @return void
1171
	*/
1172 156
	protected function executePluginParser($pluginName)
1173
	{
1174 156
		$pluginConfig = $this->pluginsConfig[$pluginName];
1175 156
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
1176 156
		{
1177 1
			return;
1178
		}
1179
1180 155
		$matches = [];
1181 155
		if (isset($pluginConfig['regexp']))
1182 155
		{
1183 6
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
1184 6
			if (empty($matches))
1185 6
			{
1186 1
				return;
1187
			}
1188 5
		}
1189
1190
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
1191 154
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
1192 154
	}
1193
1194
	/**
1195
	* Execute all the plugins
1196
	*
1197
	* @return void
1198
	*/
1199 166
	protected function executePluginParsers()
1200
	{
1201 166
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
1202
		{
1203 157
			if (empty($pluginConfig['isDisabled']))
1204 157
			{
1205 156
				$this->executePluginParser($pluginName);
1206 156
			}
1207 166
		}
1208 166
	}
1209
1210
	/**
1211
	* Execute given regexp and returns as many matches as given limit
1212
	*
1213
	* @param  string  $regexp
1214
	* @param  integer $limit
1215
	* @return array
1216
	*/
1217 6
	protected function getMatches($regexp, $limit)
1218
	{
1219 6
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
1220 6
		if ($cnt > $limit)
1221 6
		{
1222 2
			$matches = array_slice($matches, 0, $limit);
1223 2
		}
1224
1225 6
		return $matches;
1226
	}
1227
1228
	/**
1229
	* Get the cached callback for given plugin's parser
1230
	*
1231
	* @param  string $pluginName Plugin's name
1232
	* @return callable
1233
	*/
1234 154
	protected function getPluginParser($pluginName)
1235
	{
1236
		// Cache a new instance of this plugin's parser if there isn't one already
1237 154
		if (!isset($this->pluginParsers[$pluginName]))
1238 154
		{
1239 1
			$pluginConfig = $this->pluginsConfig[$pluginName];
1240 1
			$className = (isset($pluginConfig['className']))
1241 1
			           ? $pluginConfig['className']
1242 1
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
1243
1244
			// Register the parser as a callback
1245 1
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
1246 1
		}
1247
1248 154
		return $this->pluginParsers[$pluginName];
1249
	}
1250
1251
	/**
1252
	* Register a parser
1253
	*
1254
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
1255
	* existing plugin
1256
	*
1257
	* @param  string   $pluginName
1258
	* @param  callback $parser
1259
	* @return void
1260
	*/
1261 149
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
1262
	{
1263 149
		if (!is_callable($parser))
1264 149
		{
1265 1
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
1266
		}
1267
		// Create an empty config for this plugin to ensure it is executed
1268 148
		if (!isset($this->pluginsConfig[$pluginName]))
1269 148
		{
1270 147
			$this->pluginsConfig[$pluginName] = [];
1271 147
		}
1272 148
		if (isset($regexp))
1273 148
		{
1274 2
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
1275 2
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
1276 2
		}
1277 148
		$this->pluginParsers[$pluginName] = $parser;
1278 148
	}
1279
1280
	//==========================================================================
1281
	// Rules handling
1282
	//==========================================================================
1283
1284
	/**
1285
	* Apply closeAncestor rules associated with given tag
1286
	*
1287
	* @param  Tag  $tag Tag
1288
	* @return bool      Whether a new tag has been added
1289
	*/
1290 120
	protected function closeAncestor(Tag $tag)
1291
	{
1292 120
		if (!empty($this->openTags))
1293 120
		{
1294 56
			$tagName   = $tag->getName();
1295 56
			$tagConfig = $this->tagsConfig[$tagName];
1296
1297 56
			if (!empty($tagConfig['rules']['closeAncestor']))
1298 56
			{
1299 4
				$i = count($this->openTags);
1300
1301 4
				while (--$i >= 0)
1302
				{
1303 4
					$ancestor     = $this->openTags[$i];
1304 4
					$ancestorName = $ancestor->getName();
1305
1306 4
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1307 4
					{
1308
						// We have to close this ancestor. First we reinsert this tag...
1309 3
						$this->tagStack[] = $tag;
1310
1311
						// ...then we add a new end tag for it
1312 3
						$this->addMagicEndTag($ancestor, $tag->getPos());
1313
1314 3
						return true;
1315
					}
1316 2
				}
1317 2
			}
1318 55
		}
1319
1320 120
		return false;
1321
	}
1322
1323
	/**
1324
	* Apply closeParent rules associated with given tag
1325
	*
1326
	* @param  Tag  $tag Tag
1327
	* @return bool      Whether a new tag has been added
1328
	*/
1329 120
	protected function closeParent(Tag $tag)
1330
	{
1331 120
		if (!empty($this->openTags))
1332 120
		{
1333 59
			$tagName   = $tag->getName();
1334 59
			$tagConfig = $this->tagsConfig[$tagName];
1335
1336 59
			if (!empty($tagConfig['rules']['closeParent']))
1337 59
			{
1338 5
				$parent     = end($this->openTags);
1339 5
				$parentName = $parent->getName();
1340
1341 5
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1342 5
				{
1343
					// We have to close that parent. First we reinsert the tag...
1344 4
					$this->tagStack[] = $tag;
1345
1346
					// ...then we add a new end tag for it
1347 4
					$this->addMagicEndTag($parent, $tag->getPos());
1348
1349 4
					return true;
1350
				}
1351 2
			}
1352 56
		}
1353
1354 120
		return false;
1355
	}
1356
1357
	/**
1358
	* Apply the createChild rules associated with given tag
1359
	*
1360
	* @param  Tag  $tag Tag
1361
	* @return void
1362
	*/
1363 118
	protected function createChild(Tag $tag)
1364
	{
1365 118
		$tagConfig = $this->tagsConfig[$tag->getName()];
1366 118
		if (isset($tagConfig['rules']['createChild']))
1367 118
		{
1368 3
			$priority = -1000;
1369 3
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1370 3
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1371
			{
1372 3
				$this->addStartTag($tagName, $tagPos, 0)->setSortPriority(++$priority);
1373 3
			}
1374 3
		}
1375 118
	}
1376
1377
	/**
1378
	* Apply fosterParent rules associated with given tag
1379
	*
1380
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1381
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1382
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1383
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1384
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1385
	*       loop from running indefinitely
1386
	*
1387
	* @param  Tag  $tag Tag
1388
	* @return bool      Whether a new tag has been added
1389
	*/
1390 120
	protected function fosterParent(Tag $tag)
1391
	{
1392 120
		if (!empty($this->openTags))
1393 120
		{
1394 63
			$tagName   = $tag->getName();
1395 63
			$tagConfig = $this->tagsConfig[$tagName];
1396
1397 63
			if (!empty($tagConfig['rules']['fosterParent']))
1398 63
			{
1399 11
				$parent     = end($this->openTags);
1400 11
				$parentName = $parent->getName();
1401
1402 11
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1403 11
				{
1404 10
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1405 10
					{
1406
						// Add a 0-width copy of the parent tag right after this tag, with a worse
1407
						// priority and make it depend on this tag
1408 8
						$child = $this->addCopyTag($parent, $tag->getPos() + $tag->getLen(), 0);
1409 8
						$tag->cascadeInvalidationTo($child);
1410 8
						$child->setSortPriority($tag->getSortPriority() + 1);
1411 8
					}
1412
1413
					// Reinsert current tag
1414 10
					$this->tagStack[] = $tag;
1415
1416
					// And finally close its parent with a priority that ensures it is processed
1417
					// before this tag
1418 10
					$this->addMagicEndTag($parent, $tag->getPos())->setSortPriority($tag->getSortPriority() - 1);
1419
1420
					// Adjust the fixing cost commensurately with the size of the tag stack which
1421
					// has to be sorted
1422 10
					$this->currentFixingCost += count($this->tagStack);
1423
1424 10
					return true;
1425
				}
1426 1
			}
1427 59
		}
1428
1429 120
		return false;
1430
	}
1431
1432
	/**
1433
	* Apply requireAncestor rules associated with given tag
1434
	*
1435
	* @param  Tag  $tag Tag
1436
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1437
	*/
1438 120
	protected function requireAncestor(Tag $tag)
1439
	{
1440 120
		$tagName   = $tag->getName();
1441 120
		$tagConfig = $this->tagsConfig[$tagName];
1442
1443 120
		if (isset($tagConfig['rules']['requireAncestor']))
1444 120
		{
1445 3
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1446
			{
1447 3
				if (!empty($this->cntOpen[$ancestorName]))
1448 3
				{
1449 1
					return false;
1450
				}
1451 2
			}
1452
1453 2
			$this->logger->err('Tag requires an ancestor', [
1454 2
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1455
				'tag'             => $tag
1456 2
			]);
1457
1458 2
			return true;
1459
		}
1460
1461 118
		return false;
1462
	}
1463
1464
	//==========================================================================
1465
	// Tag processing
1466
	//==========================================================================
1467
1468
	/**
1469
	* Create and add an end tag for given start tag at given position
1470
	*
1471
	* @param  Tag     $startTag Start tag
1472
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1473
	* @return Tag
1474
	*/
1475 27
	protected function addMagicEndTag(Tag $startTag, $tagPos)
1476
	{
1477 27
		$tagName = $startTag->getName();
1478
1479
		// Adjust the end tag's position if whitespace is to be minimized
1480 27
		if ($startTag->getFlags() & self::RULE_IGNORE_WHITESPACE)
1481 27
		{
1482 2
			$tagPos = $this->getMagicPos($tagPos);
1483 2
		}
1484
1485
		// Add a 0-width end tag that is paired with the given start tag
1486 27
		$endTag = $this->addEndTag($tagName, $tagPos, 0);
1487 27
		$endTag->pairWith($startTag);
1488
1489 27
		return $endTag;
1490
	}
1491
1492
	/**
1493
	* Compute the position of a magic end tag, adjusted for whitespace
1494
	*
1495
	* @param  integer $tagPos Rightmost possible position for the tag
1496
	* @return integer
1497
	*/
1498 2
	protected function getMagicPos($tagPos)
1499
	{
1500
		// Back up from given position to the cursor's position until we find a character that
1501
		// is not whitespace
1502 2
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1503
		{
1504 2
			--$tagPos;
1505 2
		}
1506
1507 2
		return $tagPos;
1508
	}
1509
1510
	/**
1511
	* Test whether given start tag is immediately followed by a closing tag
1512
	*
1513
	* @param  Tag  $tag Start tag
1514
	* @return bool
1515
	*/
1516 3
	protected function isFollowedByClosingTag(Tag $tag)
1517
	{
1518 3
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1519
	}
1520
1521
	/**
1522
	* Process all tags in the stack
1523
	*
1524
	* @return void
1525
	*/
1526 155
	protected function processTags()
1527
	{
1528 155
		if (empty($this->tagStack))
1529 155
		{
1530 22
			return;
1531
		}
1532
1533
		// Initialize the count tables
1534 133
		foreach (array_keys($this->tagsConfig) as $tagName)
1535
		{
1536 122
			$this->cntOpen[$tagName]  = 0;
1537 122
			$this->cntTotal[$tagName] = 0;
1538 133
		}
1539
1540
		// Process the tag stack, close tags that were left open and repeat until done
1541
		do
1542
		{
1543 133
			while (!empty($this->tagStack))
1544
			{
1545 133
				if (!$this->tagStackIsSorted)
1546 133
				{
1547 102
					$this->sortTags();
1548 102
				}
1549
1550 133
				$this->currentTag = array_pop($this->tagStack);
1551
1552
				// Skip current tag if tags are disabled and current tag would not close the last
1553
				// open tag and is not a special tag such as a line/paragraph break or an ignore tag
1554 133
				if ($this->context['flags'] & self::RULE_IGNORE_TAGS)
1555 133
				{
1556 6
					if (!$this->currentTag->canClose(end($this->openTags))
1557 6
					 && !$this->currentTag->isSystemTag())
1558 6
					{
1559 3
						continue;
1560
					}
1561 6
				}
1562
1563 133
				$this->processCurrentTag();
1564 133
			}
1565
1566
			// Close tags that were left open
1567 133
			foreach ($this->openTags as $startTag)
1568
			{
1569
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1570
				//       the stack is processed in LIFO order, it means that tags get closed in
1571
				//       the correct order, from descendants to ancestors
1572 16
				$this->addMagicEndTag($startTag, $this->textLen);
1573 133
			}
1574
		}
1575 133
		while (!empty($this->tagStack));
1576 133
	}
1577
1578
	/**
1579
	* Process current tag
1580
	*
1581
	* @return void
1582
	*/
1583 133
	protected function processCurrentTag()
1584
	{
1585 133
		if ($this->currentTag->isInvalid())
1586 133
		{
1587 2
			return;
1588
		}
1589
1590 133
		$tagPos = $this->currentTag->getPos();
1591 133
		$tagLen = $this->currentTag->getLen();
1592
1593
		// Test whether the cursor passed this tag's position already
1594 133
		if ($this->pos > $tagPos)
1595 133
		{
1596
			// Test whether this tag is paired with a start tag and this tag is still open
1597 14
			$startTag = $this->currentTag->getStartTag();
1598
1599 14
			if ($startTag && in_array($startTag, $this->openTags, true))
1600 14
			{
1601
				// Create an end tag that matches current tag's start tag, which consumes as much of
1602
				// the same text as current tag and is paired with the same start tag
1603 2
				$this->addEndTag(
1604 2
					$startTag->getName(),
1605 2
					$this->pos,
1606 2
					max(0, $tagPos + $tagLen - $this->pos)
1607 2
				)->pairWith($startTag);
1608
1609
				// Note that current tag is not invalidated, it's merely replaced
1610 2
				return;
1611
			}
1612
1613
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1614 12
			if ($this->currentTag->isIgnoreTag())
1615 12
			{
1616 2
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1617
1618 2
				if ($ignoreLen > 0)
1619 2
				{
1620
					// Create a new ignore tag and move on
1621 1
					$this->addIgnoreTag($this->pos, $ignoreLen);
1622
1623 1
					return;
1624
				}
1625 1
			}
1626
1627
			// Skipped tags are invalidated
1628 11
			$this->currentTag->invalidate();
1629
1630 11
			return;
1631
		}
1632
1633 133
		if ($this->currentTag->isIgnoreTag())
1634 133
		{
1635 8
			$this->outputIgnoreTag($this->currentTag);
1636 8
		}
1637 129
		elseif ($this->currentTag->isBrTag())
1638
		{
1639
			// Output the tag if it's allowed, ignore it otherwise
1640 5
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1641 5
			{
1642 4
				$this->outputBrTag($this->currentTag);
1643 4
			}
1644 5
		}
1645 126
		elseif ($this->currentTag->isParagraphBreak())
1646
		{
1647 3
			$this->outputText($this->currentTag->getPos(), 0, true);
1648 3
		}
1649 124
		elseif ($this->currentTag->isVerbatim())
1650
		{
1651 3
			$this->outputVerbatim($this->currentTag);
1652 3
		}
1653 121
		elseif ($this->currentTag->isStartTag())
1654
		{
1655 121
			$this->processStartTag($this->currentTag);
1656 121
		}
1657
		else
1658
		{
1659 87
			$this->processEndTag($this->currentTag);
1660
		}
1661 133
	}
1662
1663
	/**
1664
	* Process given start tag (including self-closing tags) at current position
1665
	*
1666
	* @param  Tag  $tag Start tag (including self-closing)
1667
	* @return void
1668
	*/
1669 121
	protected function processStartTag(Tag $tag)
1670
	{
1671 121
		$tagName   = $tag->getName();
1672 121
		$tagConfig = $this->tagsConfig[$tagName];
1673
1674
		// 1. Check that this tag has not reached its global limit tagLimit
1675
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1676
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1677
		// 4. Check for nestingLimit
1678
		// 5. Apply requireAncestor rules
1679
		//
1680
		// This order ensures that the tag is valid and within the set limits before we attempt to
1681
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1682
		// limits, whether this tag is allowed within current context (the context may change
1683
		// as ancestors are closed) or whether the required ancestors are still there (they might
1684
		// have been closed by a rule.)
1685 121
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1686 121
		{
1687 2
			$this->logger->err(
1688 2
				'Tag limit exceeded',
1689
				[
1690 2
					'tag'      => $tag,
1691 2
					'tagName'  => $tagName,
1692 2
					'tagLimit' => $tagConfig['tagLimit']
1693 2
				]
1694 2
			);
1695 2
			$tag->invalidate();
1696
1697 2
			return;
1698
		}
1699
1700 121
		if (!$this->filterTag($tag))
1701 121
		{
1702 2
			$tag->invalidate();
1703
1704 2
			return;
1705
		}
1706
1707 120
		if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1708 120
		{
1709
			// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1710 17
			return;
1711
		}
1712
1713 120
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1714 120
		{
1715 2
			$this->logger->err(
1716 2
				'Nesting limit exceeded',
1717
				[
1718 2
					'tag'          => $tag,
1719 2
					'tagName'      => $tagName,
1720 2
					'nestingLimit' => $tagConfig['nestingLimit']
1721 2
				]
1722 2
			);
1723 2
			$tag->invalidate();
1724
1725 2
			return;
1726
		}
1727
1728 120
		if (!$this->tagIsAllowed($tagName))
1729 120
		{
1730 7
			$msg     = 'Tag is not allowed in this context';
1731 7
			$context = ['tag' => $tag, 'tagName' => $tagName];
1732 7
			if ($tag->getLen() > 0)
1733 7
			{
1734 6
				$this->logger->warn($msg, $context);
1735 6
			}
1736
			else
1737
			{
1738 1
				$this->logger->debug($msg, $context);
1739
			}
1740 7
			$tag->invalidate();
1741
1742 7
			return;
1743
		}
1744
1745 120
		if ($this->requireAncestor($tag))
1746 120
		{
1747 2
			$tag->invalidate();
1748
1749 2
			return;
1750
		}
1751
1752
		// If this tag has an autoClose rule and it's not paired with an end tag or followed by an
1753
		// end tag, we replace it with a self-closing tag with the same properties
1754 118
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1755 118
		 && !$tag->getEndTag()
1756 118
		 && !$this->isFollowedByClosingTag($tag))
1757 118
		{
1758 2
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1759 2
			$newTag->setAttributes($tag->getAttributes());
1760 2
			$newTag->setFlags($tag->getFlags());
1761
1762 2
			$tag = $newTag;
1763 2
		}
1764
1765 118
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1766 118
		 && !$tag->getEndTag()
1767 118
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1768 118
		{
1769 1
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1770 1
		}
1771
1772
		// This tag is valid, output it and update the context
1773 118
		$this->outputTag($tag);
1774 118
		$this->pushContext($tag);
1775
1776
		// Apply the createChild rules if applicable
1777 118
		$this->createChild($tag);
1778 118
	}
1779
1780
	/**
1781
	* Process given end tag at current position
1782
	*
1783
	* @param  Tag  $tag end tag
1784
	* @return void
1785
	*/
1786 87
	protected function processEndTag(Tag $tag)
1787
	{
1788 87
		$tagName = $tag->getName();
1789
1790 87
		if (empty($this->cntOpen[$tagName]))
1791 87
		{
1792
			// This is an end tag with no start tag
1793 16
			return;
1794
		}
1795
1796
		/**
1797
		* @var array List of tags need to be closed before given tag
1798
		*/
1799 87
		$closeTags = [];
1800
1801
		// Iterate through all open tags from last to first to find a match for our tag
1802 87
		$i = count($this->openTags);
1803 87
		while (--$i >= 0)
1804
		{
1805 87
			$openTag = $this->openTags[$i];
1806
1807 87
			if ($tag->canClose($openTag))
1808 87
			{
1809 87
				break;
1810
			}
1811
1812 28
			$closeTags[] = $openTag;
1813 28
			++$this->currentFixingCost;
1814 28
		}
1815
1816 87
		if ($i < 0)
1817 87
		{
1818
			// Did not find a matching tag
1819 3
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1820
1821 3
			return;
1822
		}
1823
1824
		// Only reopen tags if we haven't exceeded our "fixing" budget
1825 87
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1826
1827
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1828
		// reopened
1829 87
		$reopenTags = [];
1830 87
		foreach ($closeTags as $openTag)
1831
		{
1832 27
			$openTagName = $openTag->getName();
1833
1834
			// Test whether this tag should be reopened automatically
1835
			if ($keepReopening)
1836 27
			{
1837 25
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1838 25
				{
1839 11
					$reopenTags[] = $openTag;
1840 11
				}
1841
				else
1842
				{
1843 14
					$keepReopening = false;
1844
				}
1845 25
			}
1846
1847
			// Find the earliest position we can close this open tag
1848 27
			$tagPos = $tag->getPos();
1849 27
			if ($openTag->getFlags() & self::RULE_IGNORE_WHITESPACE)
1850 27
			{
1851 2
				$tagPos = $this->getMagicPos($tagPos);
1852 2
			}
1853
1854
			// Output an end tag to close this start tag, then update the context
1855 27
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1856 27
			$endTag->setFlags($openTag->getFlags());
1857 27
			$this->outputTag($endTag);
1858 27
			$this->popContext();
1859 87
		}
1860
1861
		// Output our tag, moving the cursor past it, then update the context
1862 87
		$this->outputTag($tag);
1863 87
		$this->popContext();
1864
1865
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1866
		// close tags that are already being closed now. Also, filter our list of tags being
1867
		// reopened by removing those that would immediately be closed
1868 87
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1869 87
		{
1870
			/**
1871
			* @var integer Rightmost position of the portion of text to ignore
1872
			*/
1873 25
			$ignorePos = $this->pos;
1874
1875 25
			$i = count($this->tagStack);
1876 25
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1877
			{
1878 22
				$upcomingTag = $this->tagStack[$i];
1879
1880
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1881
				// strictly an end tag (not a start tag or a self-closing tag)
1882 22
				if ($upcomingTag->getPos() > $ignorePos
1883 22
				 || $upcomingTag->isStartTag())
1884 22
				{
1885 8
					break;
1886
				}
1887
1888
				// Test whether this tag would close any of the tags we're about to reopen
1889 17
				$j = count($closeTags);
1890
1891 17
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1892
				{
1893 17
					if ($upcomingTag->canClose($closeTags[$j]))
1894 17
					{
1895
						// Remove the tag from the lists and reset the keys
1896 16
						array_splice($closeTags, $j, 1);
1897
1898 16
						if (isset($reopenTags[$j]))
1899 16
						{
1900 6
							array_splice($reopenTags, $j, 1);
1901 6
						}
1902
1903
						// Extend the ignored text to cover this tag
1904 16
						$ignorePos = max(
1905 16
							$ignorePos,
1906 16
							$upcomingTag->getPos() + $upcomingTag->getLen()
1907 16
						);
1908
1909 16
						break;
1910
					}
1911 5
				}
1912 17
			}
1913
1914 25
			if ($ignorePos > $this->pos)
1915 25
			{
1916
				/**
1917
				* @todo have a method that takes (pos,len) rather than a Tag
1918
				*/
1919 7
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1920 7
			}
1921 25
		}
1922
1923
		// Re-add tags that need to be reopened, at current cursor position
1924 87
		foreach ($reopenTags as $startTag)
1925
		{
1926 7
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1927
1928
			// Re-pair the new tag
1929 7
			$endTag = $startTag->getEndTag();
1930
			if ($endTag)
1931 7
			{
1932 1
				$newTag->pairWith($endTag);
1933 1
			}
1934 87
		}
1935 87
	}
1936
1937
	/**
1938
	* Update counters and replace current context with its parent context
1939
	*
1940
	* @return void
1941
	*/
1942 87
	protected function popContext()
1943
	{
1944 87
		$tag = array_pop($this->openTags);
1945 87
		--$this->cntOpen[$tag->getName()];
1946 87
		$this->context = $this->context['parentContext'];
1947 87
	}
1948
1949
	/**
1950
	* Update counters and replace current context with a new context based on given tag
1951
	*
1952
	* If given tag is a self-closing tag, the context won't change
1953
	*
1954
	* @param  Tag  $tag Start tag (including self-closing)
1955
	* @return void
1956
	*/
1957 118
	protected function pushContext(Tag $tag)
1958
	{
1959 118
		$tagName   = $tag->getName();
1960 118
		$tagFlags  = $tag->getFlags();
1961 118
		$tagConfig = $this->tagsConfig[$tagName];
1962
1963 118
		++$this->cntTotal[$tagName];
1964
1965
		// If this is a self-closing tag, the context remains the same
1966 118
		if ($tag->isSelfClosingTag())
1967 118
		{
1968 44
			return;
1969
		}
1970
1971
		// Recompute the allowed tags
1972 87
		$allowed = [];
1973 87
		if ($tagFlags & self::RULE_IS_TRANSPARENT)
1974 87
		{
1975 2
			foreach ($this->context['allowed'] as $k => $v)
1976
			{
1977 2
				$allowed[] = $tagConfig['allowed'][$k] & $v;
1978 2
			}
1979 2
		}
1980
		else
1981
		{
1982 86
			foreach ($this->context['allowed'] as $k => $v)
1983
			{
1984 86
				$allowed[] = $tagConfig['allowed'][$k] & (($v & 0xFF00) | ($v >> 8));
1985 86
			}
1986
		}
1987
1988
		// Use this tag's flags as a base for this context and add inherited rules
1989 87
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1990
1991
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1992 87
		if ($flags & self::RULE_DISABLE_AUTO_BR)
1993 87
		{
1994 2
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1995 2
		}
1996
1997 87
		++$this->cntOpen[$tagName];
1998 87
		$this->openTags[] = $tag;
1999 87
		$this->context = [
2000 87
			'allowed'       => $allowed,
2001 87
			'flags'         => $flags,
2002 87
			'inParagraph'   => false,
2003 87
			'parentContext' => $this->context
2004 87
		];
2005 87
	}
2006
2007
	/**
2008
	* Return whether given tag is allowed in current context
2009
	*
2010
	* @param  string $tagName
2011
	* @return bool
2012
	*/
2013 120
	protected function tagIsAllowed($tagName)
2014
	{
2015 120
		$n = $this->tagsConfig[$tagName]['bitNumber'];
2016
2017 120
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
2018
	}
2019
2020
	//==========================================================================
2021
	// Tag stack
2022
	//==========================================================================
2023
2024
	/**
2025
	* Add a start tag
2026
	*
2027
	* @param  string  $name Name of the tag
2028
	* @param  integer $pos  Position of the tag in the text
2029
	* @param  integer $len  Length of text consumed by the tag
2030
	* @return Tag
2031
	*/
2032 105
	public function addStartTag($name, $pos, $len)
2033
	{
2034 105
		return $this->addTag(Tag::START_TAG, $name, $pos, $len);
2035
	}
2036
2037
	/**
2038
	* Add an end tag
2039
	*
2040
	* @param  string  $name Name of the tag
2041
	* @param  integer $pos  Position of the tag in the text
2042
	* @param  integer $len  Length of text consumed by the tag
2043
	* @return Tag
2044
	*/
2045 92
	public function addEndTag($name, $pos, $len)
2046
	{
2047 92
		return $this->addTag(Tag::END_TAG, $name, $pos, $len);
2048
	}
2049
2050
	/**
2051
	* Add a self-closing tag
2052
	*
2053
	* @param  string  $name Name of the tag
2054
	* @param  integer $pos  Position of the tag in the text
2055
	* @param  integer $len  Length of text consumed by the tag
2056
	* @return Tag
2057
	*/
2058 62
	public function addSelfClosingTag($name, $pos, $len)
2059
	{
2060 62
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len);
2061
	}
2062
2063
	/**
2064
	* Add a 0-width "br" tag to force a line break at given position
2065
	*
2066
	* @param  integer $pos  Position of the tag in the text
2067
	* @return Tag
2068
	*/
2069 6
	public function addBrTag($pos)
2070
	{
2071 6
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0);
2072
	}
2073
2074
	/**
2075
	* Add an "ignore" tag
2076
	*
2077
	* @param  integer $pos  Position of the tag in the text
2078
	* @param  integer $len  Length of text consumed by the tag
2079
	* @return Tag
2080
	*/
2081 10
	public function addIgnoreTag($pos, $len)
2082
	{
2083 10
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos));
2084
	}
2085
2086
	/**
2087
	* Add a paragraph break at given position
2088
	*
2089
	* Uses a zero-width tag that is actually never output in the result
2090
	*
2091
	* @param  integer $pos  Position of the tag in the text
2092
	* @return Tag
2093
	*/
2094 4
	public function addParagraphBreak($pos)
2095
	{
2096 4
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0);
2097
	}
2098
2099
	/**
2100
	* Add a copy of given tag at given position and length
2101
	*
2102
	* @param  Tag     $tag Original tag
2103
	* @param  integer $pos Copy's position
2104
	* @param  integer $len Copy's length
2105
	* @return Tag          Copy tag
2106
	*/
2107 17
	public function addCopyTag(Tag $tag, $pos, $len)
2108
	{
2109 17
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len);
2110 17
		$copy->setAttributes($tag->getAttributes());
2111 17
		$copy->setSortPriority($tag->getSortPriority());
2112
2113 17
		return $copy;
2114
	}
2115
2116
	/**
2117
	* Add a tag
2118
	*
2119
	* @param  integer $type Tag's type
2120
	* @param  string  $name Name of the tag
2121
	* @param  integer $pos  Position of the tag in the text
2122
	* @param  integer $len  Length of text consumed by the tag
2123
	* @return Tag
2124
	*/
2125 158
	protected function addTag($type, $name, $pos, $len)
2126
	{
2127
		// Create the tag
2128 158
		$tag = new Tag($type, $name, $pos, $len);
2129
2130
		// Keep a copy of this tag to destroy its references after processing
2131 158
		$this->createdTags[] = $tag;
2132
2133
		// Set this tag's rules bitfield
2134 158
		if (isset($this->tagsConfig[$name]))
2135 158
		{
2136 142
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
2137 142
		}
2138
2139
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
2140
		// position is negative or if it's out of bounds
2141 158
		if (!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
2142 158
		{
2143 2
			$tag->invalidate();
2144 2
		}
2145 156
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
2146
		{
2147 1
			$this->logger->warn(
2148 1
				'Tag is disabled',
2149
				[
2150 1
					'tag'     => $tag,
2151
					'tagName' => $name
2152 1
				]
2153 1
			);
2154 1
			$tag->invalidate();
2155 1
		}
2156 155
		elseif ($len < 0 || $pos < 0 || $pos + $len > $this->textLen)
2157
		{
2158 6
			$tag->invalidate();
2159 6
		}
2160
		else
2161
		{
2162
			// If the stack is sorted we check whether this tag should be stored at a lower offset
2163
			// than the last tag which would mean we need to sort the stack. Note that we cannot use
2164
			// compareTags() to break ties here because setSortPriority() can be called *after* tags
2165
			// have been put on the stack, therefore we need to properly sort the stack if the
2166
			// positions are the same
2167 151
			if ($this->tagStackIsSorted
2168 151
			 && !empty($this->tagStack)
2169 151
			 && $tag->getPos() >= end($this->tagStack)->getPos())
2170 151
			{
2171 102
				$this->tagStackIsSorted = false;
2172 102
			}
2173
2174 151
			$this->tagStack[] = $tag;
2175
		}
2176
2177 158
		return $tag;
2178
	}
2179
2180
	/**
2181
	* Add a pair of tags
2182
	*
2183
	* @param  string  $name     Name of the tags
2184
	* @param  integer $startPos Position of the start tag
2185
	* @param  integer $startLen Length of the starttag
2186
	* @param  integer $endPos   Position of the start tag
2187
	* @param  integer $endLen   Length of the starttag
2188
	* @return Tag               Start tag
2189
	*/
2190 15
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen)
2191
	{
2192 15
		$tag = $this->addStartTag($name, $startPos, $startLen);
2193 15
		$tag->pairWith($this->addEndTag($name, $endPos, $endLen));
2194
2195 15
		return $tag;
2196
	}
2197
2198
	/**
2199
	* Add a tag that represents a verbatim copy of the original text
2200
	*
2201
	* @param  integer $pos  Position of the tag in the text
2202
	* @param  integer $len  Length of text consumed by the tag
2203
	* @return Tag
2204
	*/
2205 3
	public function addVerbatim($pos, $len)
2206
	{
2207 3
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len);
2208
	}
2209
2210
	/**
2211
	* Sort tags by position and precedence
2212
	*
2213
	* @return void
2214
	*/
2215 108
	protected function sortTags()
2216
	{
2217 108
		usort($this->tagStack, __CLASS__ . '::compareTags');
2218 108
		$this->tagStackIsSorted = true;
2219 108
	}
2220
2221
	/**
2222
	* sortTags() callback
2223
	*
2224
	* Tags are stored as a stack, in LIFO order. We sort tags by position _descending_ so that they
2225
	* are processed in the order they appear in the text.
2226
	*
2227
	* @param  Tag     $a First tag to compare
2228
	* @param  Tag     $b Second tag to compare
2229
	* @return integer
2230
	*/
2231 108
	protected static function compareTags(Tag $a, Tag $b)
2232
	{
2233 108
		$aPos = $a->getPos();
2234 108
		$bPos = $b->getPos();
2235
2236
		// First we order by pos descending
2237 108
		if ($aPos !== $bPos)
2238 108
		{
2239 101
			return $bPos - $aPos;
2240
		}
2241
2242
		// If the tags start at the same position, we'll use their sortPriority if applicable. Tags
2243
		// with a lower value get sorted last, which means they'll be processed first. IOW, -10 is
2244
		// processed before 10
2245 40
		if ($a->getSortPriority() !== $b->getSortPriority())
2246 40
		{
2247 14
			return $b->getSortPriority() - $a->getSortPriority();
2248
		}
2249
2250
		// If the tags start at the same position and have the same priority, we'll sort them
2251
		// according to their length, with special considerations for  zero-width tags
2252 29
		$aLen = $a->getLen();
2253 29
		$bLen = $b->getLen();
2254
2255 29
		if (!$aLen || !$bLen)
2256 29
		{
2257
			// Zero-width end tags are ordered after zero-width start tags so that a pair that ends
2258
			// with a zero-width tag has the opportunity to be closed before another pair starts
2259
			// with a zero-width tag. For example, the pairs that would enclose each of the letters
2260
			// in the string "XY". Self-closing tags are ordered between end tags and start tags in
2261
			// an attempt to keep them out of tag pairs
2262 27
			if (!$aLen && !$bLen)
2263 27
			{
2264
				$order = [
2265 16
					Tag::END_TAG          => 0,
2266 16
					Tag::SELF_CLOSING_TAG => 1,
2267 16
					Tag::START_TAG        => 2
2268 16
				];
2269
2270 16
				return $order[$b->getType()] - $order[$a->getType()];
2271
			}
2272
2273
			// Here, we know that only one of $a or $b is a zero-width tags. Zero-width tags are
2274
			// ordered after wider tags so that they have a chance to be processed before the next
2275
			// character is consumed, which would force them to be skipped
2276 11
			return ($aLen) ? -1 : 1;
2277
		}
2278
2279
		// Here we know that both tags start at the same position and have a length greater than 0.
2280
		// We sort tags by length ascending, so that the longest matches are processed first. If
2281
		// their length is identical, the order is undefined as PHP's sort isn't stable
2282 2
		return $aLen - $bLen;
2283
	}
2284
}