Completed
Push — master ( d016f1...8b2e03 )
by Josh
19:18
created

Parser::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 6
nc 1
nop 1
dl 0
loc 9
ccs 7
cts 7
cp 1
crap 1
rs 9.6666
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\Logger;
13
use s9e\TextFormatter\Parser\Tag;
14
15
class Parser
16
{
17
	/**#@+
18
	* Boolean rules bitfield
19
	*/
20
	const RULE_AUTO_CLOSE        = 1 << 0;
21
	const RULE_AUTO_REOPEN       = 1 << 1;
22
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
23
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
24
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
25
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
26
	const RULE_IGNORE_TAGS       = 1 << 6;
27
	const RULE_IGNORE_TEXT       = 1 << 7;
28
	const RULE_IGNORE_WHITESPACE = 1 << 8;
29
	const RULE_IS_TRANSPARENT    = 1 << 9;
30
	const RULE_PREVENT_BR        = 1 << 10;
31
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
32
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
33
	/**#@-*/
34
35
	/**
36
	* Bitwise disjunction of rules related to automatic line breaks
37
	*/
38
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
39
40
	/**
41
	* Bitwise disjunction of rules that are inherited by subcontexts
42
	*/
43
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
44
45
	/**
46
	* All the characters that are considered whitespace
47
	*/
48
	const WHITESPACE = " \n\t";
49
50
	/**
51
	* @var array Number of open tags for each tag name
52
	*/
53
	protected $cntOpen;
54
55
	/**
56
	* @var array Number of times each tag has been used
57
	*/
58
	protected $cntTotal;
59
60
	/**
61
	* @var array Current context
62
	*/
63
	protected $context;
64
65
	/**
66
	* @var integer How hard the parser has worked on fixing bad markup so far
67
	*/
68
	protected $currentFixingCost;
69
70
	/**
71
	* @var Tag Current tag being processed
72
	*/
73
	protected $currentTag;
74
75
	/**
76
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
77
	*/
78
	protected $isRich;
79
80
	/**
81
	* @var Logger This parser's logger
82
	*/
83
	protected $logger;
84
85
	/**
86
	* @var integer How hard the parser should work on fixing bad markup
87
	*/
88
	public $maxFixingCost = 1000;
89
90
	/**
91
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
92
	*/
93
	protected $namespaces;
94
95
	/**
96
	* @var array Stack of open tags (instances of Tag)
97
	*/
98
	protected $openTags;
99
100
	/**
101
	* @var string This parser's output
102
	*/
103
	protected $output;
104
105
	/**
106
	* @var integer Position of the cursor in the original text
107
	*/
108
	protected $pos;
109
110
	/**
111
	* @var array Array of callbacks, using plugin names as keys
112
	*/
113
	protected $pluginParsers = [];
114
115
	/**
116
	* @var array Associative array of [pluginName => pluginConfig]
117
	*/
118
	protected $pluginsConfig;
119
120
	/**
121
	* @var array Variables registered for use in filters
122
	*/
123
	public $registeredVars = [];
124
125
	/**
126
	* @var array Root context, used at the root of the document
127
	*/
128
	protected $rootContext;
129
130
	/**
131
	* @var array Tags' config
132
	*/
133
	protected $tagsConfig;
134
135
	/**
136
	* @var array Tag storage
137
	*/
138
	protected $tagStack;
139
140
	/**
141
	* @var bool Whether the tags in the stack are sorted
142
	*/
143
	protected $tagStackIsSorted;
144
145
	/**
146
	* @var string Text being parsed
147
	*/
148
	protected $text;
149
150
	/**
151
	* @var integer Length of the text being parsed
152
	*/
153
	protected $textLen;
154
155
	/**
156
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
157
	*              whether the parser was reset during execution
158
	*/
159
	protected $uid = 0;
160
161
	/**
162
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
163
	*/
164
	protected $wsPos;
165
166
	/**
167
	* Constructor
168
	*/
169 176
	public function __construct(array $config)
170
	{
171 176
		$this->pluginsConfig  = $config['plugins'];
172 176
		$this->registeredVars = $config['registeredVars'];
173 176
		$this->rootContext    = $config['rootContext'];
174 176
		$this->tagsConfig     = $config['tags'];
175
176 176
		$this->__wakeup();
177 176
	}
178
179
	/**
180
	* Serializer
181
	*
182
	* Returns the properties that need to persist through serialization.
183
	*
184
	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
185
	* of the serializer to the user (e.g. igbinary)
186
	*
187
	* @return array
188
	*/
189 2
	public function __sleep()
190
	{
191 2
		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
192
	}
193
194
	/**
195
	* Unserializer
196
	*
197
	* @return void
198
	*/
199 176
	public function __wakeup()
200
	{
201 176
		$this->logger = new Logger;
202 176
	}
203
204
	/**
205
	* Reset the parser for a new parsing
206
	*
207
	* @param  string $text Text to be parsed
208
	* @return void
209
	*/
210 164
	protected function reset($text)
211
	{
212
		// Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
213 164
		$text = preg_replace('/\\r\\n?/', "\n", $text);
214 164
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
215
216
		// Clear the logs
217 164
		$this->logger->clear();
218
219
		// Initialize the rest
220 164
		$this->cntOpen           = [];
221 164
		$this->cntTotal          = [];
222 164
		$this->currentFixingCost = 0;
223 164
		$this->currentTag        = null;
224 164
		$this->isRich            = false;
225 164
		$this->namespaces        = [];
226 164
		$this->openTags          = [];
227 164
		$this->output            = '';
228 164
		$this->pos               = 0;
229 164
		$this->tagStack          = [];
230 164
		$this->tagStackIsSorted  = false;
231 164
		$this->text              = $text;
232 164
		$this->textLen           = strlen($text);
233 164
		$this->wsPos             = 0;
234
235
		// Initialize the root context
236 164
		$this->context = $this->rootContext;
237 164
		$this->context['inParagraph'] = false;
238
239
		// Bump the UID
240 164
		++$this->uid;
241 164
	}
242
243
	/**
244
	* Set a tag's option
245
	*
246
	* This method ensures that the tag's config is a value and not a reference, to prevent
247
	* potential side-effects. References contained *inside* the tag's config are left untouched
248
	*
249
	* @param  string $tagName     Tag's name
250
	* @param  string $optionName  Option's name
251
	* @param  mixed  $optionValue Option's value
252
	* @return void
253
	*/
254 7
	protected function setTagOption($tagName, $optionName, $optionValue)
255
	{
256 7
		if (isset($this->tagsConfig[$tagName]))
257 7
		{
258
			// Copy the tag's config and remove it. That will destroy the reference
259 7
			$tagConfig = $this->tagsConfig[$tagName];
260 7
			unset($this->tagsConfig[$tagName]);
261
262
			// Set the new value and replace the tag's config
263 7
			$tagConfig[$optionName]     = $optionValue;
264 7
			$this->tagsConfig[$tagName] = $tagConfig;
265 7
		}
266 7
	}
267
268
	//==========================================================================
269
	// Public API
270
	//==========================================================================
271
272
	/**
273
	* Disable a tag
274
	*
275
	* @param  string $tagName Name of the tag
276
	* @return void
277
	*/
278 3
	public function disableTag($tagName)
279
	{
280 3
		$this->setTagOption($tagName, 'isDisabled', true);
281 3
	}
282
283
	/**
284
	* Enable a tag
285
	*
286
	* @param  string $tagName Name of the tag
287
	* @return void
288
	*/
289 1
	public function enableTag($tagName)
290
	{
291 1
		if (isset($this->tagsConfig[$tagName]))
292 1
		{
293 1
			unset($this->tagsConfig[$tagName]['isDisabled']);
294 1
		}
295 1
	}
296
297
	/**
298
	* Get this parser's Logger instance
299
	*
300
	* @return Logger
301
	*/
302 8
	public function getLogger()
303
	{
304 8
		return $this->logger;
305
	}
306
307
	/**
308
	* Return the last text parsed
309
	*
310
	* This method returns the normalized text, which may be slightly different from the original
311
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
312
	* meant to be used in support of processing log entries, which contain offsets based on the
313
	* normalized text
314
	*
315
	* @see Parser::reset()
316
	*
317
	* @return string
318
	*/
319 2
	public function getText()
320
	{
321 2
		return $this->text;
322
	}
323
324
	/**
325
	* Parse a text
326
	*
327
	* @param  string $text Text to parse
328
	* @return string       XML representation
329
	*/
330 164
	public function parse($text)
331
	{
332
		// Reset the parser and save the uid
333 164
		$this->reset($text);
334 164
		$uid = $this->uid;
335
336
		// Do the heavy lifting
337 164
		$this->executePluginParsers();
338 164
		$this->processTags();
339
340
		// Finalize the document
341 164
		$this->finalizeOutput();
342
343
		// Check the uid in case a plugin or a filter reset the parser mid-execution
344 164
		if ($this->uid !== $uid)
345 164
		{
346 1
			throw new RuntimeException('The parser has been reset during execution');
347
		}
348
349
		// Log a warning if the fixing cost limit was exceeded
350 164
		if ($this->currentFixingCost > $this->maxFixingCost)
351 164
		{
352 3
			$this->logger->warn('Fixing cost limit exceeded');
353 3
		}
354
355 164
		return $this->output;
356
	}
357
358
	/**
359
	* Change a tag's tagLimit
360
	*
361
	* NOTE: the default tagLimit should generally be set during configuration instead
362
	*
363
	* @param  string  $tagName  The tag's name, in UPPERCASE
364
	* @param  integer $tagLimit
365
	* @return void
366
	*/
367 2
	public function setTagLimit($tagName, $tagLimit)
368
	{
369 2
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
370 2
	}
371
372
	/**
373
	* Change a tag's nestingLimit
374
	*
375
	* NOTE: the default nestingLimit should generally be set during configuration instead
376
	*
377
	* @param  string  $tagName      The tag's name, in UPPERCASE
378
	* @param  integer $nestingLimit
379
	* @return void
380
	*/
381 2
	public function setNestingLimit($tagName, $nestingLimit)
382
	{
383 2
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
384 2
	}
385
386
	//==========================================================================
387
	// Filter processing
388
	//==========================================================================
389
390
	/**
391
	* Execute all the attribute preprocessors of given tag
392
	*
393
	* @private
394
	*
395
	* @param  Tag   $tag       Source tag
396
	* @param  array $tagConfig Tag's config
397
	* @return bool             Unconditionally TRUE
398
	*/
399 7
	public static function executeAttributePreprocessors(Tag $tag, array $tagConfig)
400
	{
401 7
		if (!empty($tagConfig['attributePreprocessors']))
402 7
		{
403 7
			foreach ($tagConfig['attributePreprocessors'] as list($attrName, $regexp, $map))
404
			{
405 7
				if (!$tag->hasAttribute($attrName))
406 7
				{
407 1
					continue;
408
				}
409
410 6
				self::executeAttributePreprocessor($tag, $attrName, $regexp, $map);
411 7
			}
412 7
		}
413
414 7
		return true;
415
	}
416
417
	/**
418
	* Execute an attribute preprocessor
419
	*
420
	* @param  Tag      $tag
421
	* @param  string   $attrName
422
	* @param  string   $regexp
423
	* @param  string[] $map
424
	* @return void
425
	*/
426 6
	protected static function executeAttributePreprocessor(Tag $tag, $attrName, $regexp, $map)
427
	{
428 6
		$attrValue = $tag->getAttribute($attrName);
429 6
		$captures  = self::getNamedCaptures($attrValue, $regexp, $map);
430 6
		foreach ($captures as $k => $v)
431
		{
432
			// Attribute preprocessors cannot overwrite other attributes but they can
433
			// overwrite themselves
434 5
			if ($k === $attrName || !$tag->hasAttribute($k))
435 5
			{
436 5
				$tag->setAttribute($k, $v);
437 5
			}
438 6
		}
439 6
	}
440
441
	/**
442
	* Execute a regexp and return the values of the mapped captures
443
	*
444
	* @param  string   $attrValue
445
	* @param  string   $regexp
446
	* @param  string[] $map
447
	* @return array
448
	*/
449 6
	protected static function getNamedCaptures($attrValue, $regexp, $map)
450
	{
451 6
		if (!preg_match($regexp, $attrValue, $m))
452 6
		{
453 1
			return [];
454
		}
455
456 5
		$values = [];
457 5
		foreach ($map as $i => $k)
458
		{
459 5
			if (isset($m[$i]) && $m[$i] !== '')
460 5
			{
461 5
				$values[$k] = $m[$i];
462 5
			}
463 5
		}
464
465 5
		return $values;
466
	}
467
468
	/**
469
	* Execute a filter
470
	*
471
	* @see s9e\TextFormatter\Configurator\Items\ProgrammableCallback
472
	*
473
	* @param  array $filter Programmed callback
474
	* @param  array $vars   Variables to be used when executing the callback
475
	* @return mixed         Whatever the callback returns
476
	*/
477 139
	protected static function executeFilter(array $filter, array $vars)
478
	{
479 139
		$callback = $filter['callback'];
480 139
		$params   = (isset($filter['params'])) ? $filter['params'] : [];
481
482 139
		$args = [];
483 139
		foreach ($params as $k => $v)
484
		{
485 138
			if (is_numeric($k))
486 138
			{
487
				// By-value param
488 2
				$args[] = $v;
489 2
			}
490 137
			elseif (isset($vars[$k]))
491
			{
492
				// By-name param using a supplied var
493 135
				$args[] = $vars[$k];
494 135
			}
495 2
			elseif (isset($vars['registeredVars'][$k]))
496
			{
497
				// By-name param using a registered var
498 1
				$args[] = $vars['registeredVars'][$k];
499 1
			}
500
			else
501
			{
502
				// Unknown param
503 1
				$args[] = null;
504
			}
505 139
		}
506
507 139
		return call_user_func_array($callback, $args);
508
	}
509
510
	/**
511
	* Filter the attributes of given tag
512
	*
513
	* @private
514
	*
515
	* @param  Tag    $tag            Tag being checked
516
	* @param  array  $tagConfig      Tag's config
517
	* @param  array  $registeredVars Array of registered vars for use in attribute filters
518
	* @param  Logger $logger         This parser's Logger instance
519
	* @return bool                   Whether the whole attribute set is valid
520
	*/
521 138
	public static function filterAttributes(Tag $tag, array $tagConfig, array $registeredVars, Logger $logger)
522
	{
523 138
		if (empty($tagConfig['attributes']))
524 138
		{
525 123
			$tag->setAttributes([]);
526
527 123
			return true;
528
		}
529
530
		// Generate values for attributes with a generator set
531 16
		foreach ($tagConfig['attributes'] as $attrName => $attrConfig)
532
		{
533 16
			if (isset($attrConfig['generator']))
534 16
			{
535 1
				$tag->setAttribute(
536 1
					$attrName,
537 1
					self::executeFilter(
538 1
						$attrConfig['generator'],
539
						[
540 1
							'attrName'       => $attrName,
541 1
							'logger'         => $logger,
542
							'registeredVars' => $registeredVars
543 1
						]
544 1
					)
545 1
				);
546 1
			}
547 16
		}
548
549
		// Filter and remove invalid attributes
550 16
		foreach ($tag->getAttributes() as $attrName => $attrValue)
551
		{
552
			// Test whether this attribute exists and remove it if it doesn't
553 15
			if (!isset($tagConfig['attributes'][$attrName]))
554 15
			{
555 2
				$tag->removeAttribute($attrName);
556 2
				continue;
557
			}
558
559 15
			$attrConfig = $tagConfig['attributes'][$attrName];
560
561
			// Test whether this attribute has a filterChain
562 15
			if (!isset($attrConfig['filterChain']))
563 15
			{
564 10
				continue;
565
			}
566
567
			// Record the name of the attribute being filtered into the logger
568 5
			$logger->setAttribute($attrName);
569
570 5
			foreach ($attrConfig['filterChain'] as $filter)
571
			{
572 5
				$attrValue = self::executeFilter(
573 5
					$filter,
574
					[
575 5
						'attrName'       => $attrName,
576 5
						'attrValue'      => $attrValue,
577 5
						'logger'         => $logger,
578
						'registeredVars' => $registeredVars
579 5
					]
580 5
				);
581
582 5
				if ($attrValue === false)
583 5
				{
584 3
					$tag->removeAttribute($attrName);
585 3
					break;
586
				}
587 5
			}
588
589
			// Update the attribute value if it's valid
590 5
			if ($attrValue !== false)
591 5
			{
592 2
				$tag->setAttribute($attrName, $attrValue);
593 2
			}
594
595
			// Remove the attribute's name from the logger
596 5
			$logger->unsetAttribute();
597 16
		}
598
599
		// Iterate over the attribute definitions to handle missing attributes
600 16
		foreach ($tagConfig['attributes'] as $attrName => $attrConfig)
601
		{
602
			// Test whether this attribute is missing
603 16
			if (!$tag->hasAttribute($attrName))
604 16
			{
605 5
				if (isset($attrConfig['defaultValue']))
606 5
				{
607
					// Use the attribute's default value
608 2
					$tag->setAttribute($attrName, $attrConfig['defaultValue']);
609 2
				}
610 3
				elseif (!empty($attrConfig['required']))
611
				{
612
					// This attribute is missing, has no default value and is required, which means
613
					// the attribute set is invalid
614 3
					return false;
615
				}
616 2
			}
617 13
		}
618
619 13
		return true;
620
	}
621
622
	/**
623
	* Execute given tag's filterChain
624
	*
625
	* @param  Tag  $tag Tag to filter
626
	* @return bool      Whether the tag is valid
627
	*/
628 130
	protected function filterTag(Tag $tag)
629
	{
630 130
		$tagName   = $tag->getName();
631 130
		$tagConfig = $this->tagsConfig[$tagName];
632 130
		$isValid   = true;
633
634 130
		if (!empty($tagConfig['filterChain']))
635 130
		{
636
			// Record the tag being processed into the logger it can be added to the context of
637
			// messages logged during the execution
638 129
			$this->logger->setTag($tag);
639
640
			// Prepare the variables that are accessible to filters
641
			$vars = [
642 129
				'logger'         => $this->logger,
643 129
				'openTags'       => $this->openTags,
644 129
				'parser'         => $this,
645 129
				'registeredVars' => $this->registeredVars,
646 129
				'tag'            => $tag,
647 129
				'tagConfig'      => $tagConfig,
648 129
				'text'           => $this->text
649 129
			];
650
651 129
			foreach ($tagConfig['filterChain'] as $filter)
652
			{
653 129
				if (!self::executeFilter($filter, $vars))
654 129
				{
655 4
					$isValid = false;
656 4
					break;
657
				}
658 129
			}
659
660
			// Remove the tag from the logger
661 129
			$this->logger->unsetTag();
662 129
		}
663
664 130
		return $isValid;
665
	}
666
667
	//==========================================================================
668
	// Output handling
669
	//==========================================================================
670
671
	/**
672
	* Finalize the output by appending the rest of the unprocessed text and create the root node
673
	*
674
	* @return void
675
	*/
676 164
	protected function finalizeOutput()
677
	{
678
		// Output the rest of the text and close the last paragraph
679 164
		$this->outputText($this->textLen, 0, true);
680
681
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
682
		do
683
		{
684 164
			$this->output = preg_replace('(<([^ />]+)></\\1>)', '', $this->output, -1, $cnt);
685
		}
686 164
		while ($cnt > 0);
687
688
		// Merge consecutive <i> tags
689 164
		if (strpos($this->output, '</i><i>') !== false)
690 164
		{
691 1
			$this->output = str_replace('</i><i>', '', $this->output);
692 1
		}
693
694
		// Encode Unicode characters that are outside of the BMP
695 164
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
696
697
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
698 164
		$tagName = ($this->isRich) ? 'r' : 't';
699
700
		// Prepare the root node with all the namespace declarations
701 164
		$tmp = '<' . $tagName;
702 164
		foreach (array_keys($this->namespaces) as $prefix)
703
		{
704 2
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
705 164
		}
706
707 164
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
708 164
	}
709
710
	/**
711
	* Append a tag to the output
712
	*
713
	* @param  Tag  $tag Tag to append
714
	* @return void
715
	*/
716 122
	protected function outputTag(Tag $tag)
717
	{
718 122
		$this->isRich = true;
719
720 122
		$tagName  = $tag->getName();
721 122
		$tagPos   = $tag->getPos();
722 122
		$tagLen   = $tag->getLen();
723 122
		$tagFlags = $tag->getFlags();
724
725 122
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
726 122
		{
727 8
			$skipBefore = 1;
728 8
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
729 8
		}
730
		else
731
		{
732 117
			$skipBefore = $skipAfter = 0;
733
		}
734
735
		// Current paragraph must end before the tag if:
736
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
737
		//  - the tag is an end tag (but not self-closing)
738 122
		$closeParagraph = false;
739 122
		if ($tag->isStartTag())
740 122
		{
741 122
			if ($tagFlags & self::RULE_BREAK_PARAGRAPH)
742 122
			{
743 4
				$closeParagraph = true;
744 4
			}
745 122
		}
746
		else
747
		{
748 90
			$closeParagraph = true;
749
		}
750
751
		// Let the cursor catch up with this tag's position
752 122
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
753
754
		// Capture the text consumed by the tag
755
		$tagText = ($tagLen)
756 122
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
757 122
		         : '';
758
759
		// Output current tag
760 122
		if ($tag->isStartTag())
761 122
		{
762
			// Handle paragraphs before opening the tag
763 122
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
764 122
			{
765 121
				$this->outputParagraphStart($tagPos);
766 121
			}
767
768
			// Record this tag's namespace, if applicable
769 122
			$colonPos = strpos($tagName, ':');
770
			if ($colonPos)
771 122
			{
772 2
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
773 2
			}
774
775
			// Open the start tag and add its attributes, but don't close the tag
776 122
			$this->output .= '<' . $tagName;
777
778
			// We output the attributes in lexical order. Helps canonicalizing the output and could
779
			// prove useful someday
780 122
			$attributes = $tag->getAttributes();
781 122
			ksort($attributes);
782
783 122
			foreach ($attributes as $attrName => $attrValue)
784
			{
785 8
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
786 122
			}
787
788 122
			if ($tag->isSelfClosingTag())
789 122
			{
790
				if ($tagLen)
791 45
				{
792 34
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
793 34
				}
794
				else
795
				{
796 11
					$this->output .= '/>';
797
				}
798 45
			}
799 90
			elseif ($tagLen)
800
			{
801 59
				$this->output .= '><s>' . $tagText . '</s>';
802 59
			}
803
			else
804
			{
805 45
				$this->output .= '>';
806
			}
807 122
		}
808
		else
809
		{
810
			if ($tagLen)
811 90
			{
812 50
				$this->output .= '<e>' . $tagText . '</e>';
813 50
			}
814
815 90
			$this->output .= '</' . $tagName . '>';
816
		}
817
818
		// Move the cursor past the tag
819 122
		$this->pos = $tagPos + $tagLen;
820
821
		// Skip newlines (no other whitespace) after this tag
822 122
		$this->wsPos = $this->pos;
823 122
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
824
		{
825
			// Decrement the number of lines to skip
826 8
			--$skipAfter;
827
828
			// Move the cursor past the newline
829 8
			++$this->wsPos;
830 8
		}
831 122
	}
832
833
	/**
834
	* Output the text between the cursor's position (included) and given position (not included)
835
	*
836
	* @param  integer $catchupPos     Position we're catching up to
837
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
838
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
839
	* @return void
840
	*/
841 164
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
842
	{
843
		if ($closeParagraph)
844 164
		{
845 164
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
846 164
			{
847 153
				$closeParagraph = false;
848 153
			}
849
			else
850
			{
851
				// Ignore any number of lines at the end if we're closing a paragraph
852 18
				$maxLines = -1;
853
			}
854 164
		}
855
856 164
		if ($this->pos >= $catchupPos)
857 164
		{
858
			// We're already there, close the paragraph if applicable and return
859
			if ($closeParagraph)
860 120
			{
861 4
				$this->outputParagraphEnd();
862 4
			}
863
864 120
			return;
865
		}
866
867
		// Skip over previously identified whitespace if applicable
868 144
		if ($this->wsPos > $this->pos)
869 144
		{
870 8
			$skipPos       = min($catchupPos, $this->wsPos);
871 8
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
872 8
			$this->pos     = $skipPos;
873
874 8
			if ($this->pos >= $catchupPos)
875 8
			{
876
				// Skipped everything. Close the paragraph if applicable and return
877
				if ($closeParagraph)
878 2
				{
879 1
					$this->outputParagraphEnd();
880 1
				}
881
882 2
				return;
883
			}
884 7
		}
885
886
		// Test whether we're even supposed to output anything
887 144
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
888 144
		{
889 3
			$catchupLen  = $catchupPos - $this->pos;
890 3
			$catchupText = substr($this->text, $this->pos, $catchupLen);
891
892
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
893
			// tags
894 3
			if (strspn($catchupText, " \n\t") < $catchupLen)
895 3
			{
896 3
				$catchupText = '<i>' . $catchupText . '</i>';
897 3
			}
898
899 3
			$this->output .= $catchupText;
900 3
			$this->pos = $catchupPos;
901
902
			if ($closeParagraph)
903 3
			{
904 1
				$this->outputParagraphEnd();
905 1
			}
906
907 3
			return;
908
		}
909
910
		// Compute the amount of text to ignore at the end of the output
911 144
		$ignorePos = $catchupPos;
912 144
		$ignoreLen = 0;
913
914
		// Ignore as many lines (including whitespace) as specified
915 144
		while ($maxLines && --$ignorePos >= $this->pos)
916
		{
917 19
			$c = $this->text[$ignorePos];
918 19
			if (strpos(self::WHITESPACE, $c) === false)
919 19
			{
920 14
				break;
921
			}
922
923 10
			if ($c === "\n")
924 10
			{
925 9
				--$maxLines;
926 9
			}
927
928 10
			++$ignoreLen;
929 10
		}
930
931
		// Adjust $catchupPos to ignore the text at the end
932 144
		$catchupPos -= $ignoreLen;
933
934
		// Break down the text in paragraphs if applicable
935 144
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
936 144
		{
937 15
			if (!$this->context['inParagraph'])
938 15
			{
939 13
				$this->outputWhitespace($catchupPos);
940
941 13
				if ($catchupPos > $this->pos)
942 13
				{
943 10
					$this->outputParagraphStart($catchupPos);
944 10
				}
945 13
			}
946
947
			// Look for a paragraph break in this text
948 15
			$pbPos = strpos($this->text, "\n\n", $this->pos);
949
950 15
			while ($pbPos !== false && $pbPos < $catchupPos)
951
			{
952 3
				$this->outputText($pbPos, 0, true);
953 3
				$this->outputParagraphStart($catchupPos);
954
955 3
				$pbPos = strpos($this->text, "\n\n", $this->pos);
956 3
			}
957 15
		}
958
959
		// Capture, escape and output the text
960 144
		if ($catchupPos > $this->pos)
961 144
		{
962 142
			$catchupText = htmlspecialchars(
963 142
				substr($this->text, $this->pos, $catchupPos - $this->pos),
964 142
				ENT_NOQUOTES,
965
				'UTF-8'
966 142
			);
967
968
			// Format line breaks if applicable
969 142
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
970 142
			{
971 21
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
972 21
			}
973
974 142
			$this->output .= $catchupText;
975 142
		}
976
977
		// Close the paragraph if applicable
978
		if ($closeParagraph)
979 144
		{
980 14
			$this->outputParagraphEnd();
981 14
		}
982
983
		// Add the ignored text if applicable
984
		if ($ignoreLen)
985 144
		{
986 10
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
987 10
		}
988
989
		// Move the cursor past the text
990 144
		$this->pos = $catchupPos + $ignoreLen;
991 144
	}
992
993
	/**
994
	* Output a linebreak tag
995
	*
996
	* @param  Tag  $tag
997
	* @return void
998
	*/
999 6
	protected function outputBrTag(Tag $tag)
1000
	{
1001 6
		$this->outputText($tag->getPos(), 0, false);
1002 6
		$this->output .= '<br/>';
1003 6
	}
1004
1005
	/**
1006
	* Output an ignore tag
1007
	*
1008
	* @param  Tag  $tag
1009
	* @return void
1010
	*/
1011 17
	protected function outputIgnoreTag(Tag $tag)
1012
	{
1013 17
		$tagPos = $tag->getPos();
1014 17
		$tagLen = $tag->getLen();
1015
1016
		// Capture the text to ignore
1017 17
		$ignoreText = substr($this->text, $tagPos, $tagLen);
1018
1019
		// Catch up with the tag's position then output the tag
1020 17
		$this->outputText($tagPos, 0, false);
1021 17
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
1022 17
		$this->isRich = true;
1023
1024
		// Move the cursor past this tag
1025 17
		$this->pos = $tagPos + $tagLen;
1026 17
	}
1027
1028
	/**
1029
	* Start a paragraph between current position and given position, if applicable
1030
	*
1031
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
1032
	* @return void
1033
	*/
1034 128
	protected function outputParagraphStart($maxPos)
1035
	{
1036
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
1037 128
		if ($this->context['inParagraph']
1038 128
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
1039 128
		{
1040 118
			return;
1041
		}
1042
1043
		// Output the whitespace between $this->pos and $maxPos if applicable
1044 16
		$this->outputWhitespace($maxPos);
1045
1046
		// Open the paragraph, but only if it's not at the very end of the text
1047 16
		if ($this->pos < $this->textLen)
1048 16
		{
1049 16
			$this->output .= '<p>';
1050 16
			$this->context['inParagraph'] = true;
1051 16
		}
1052 16
	}
1053
1054
	/**
1055
	* Close current paragraph at current position if applicable
1056
	*
1057
	* @return void
1058
	*/
1059 18
	protected function outputParagraphEnd()
1060
	{
1061
		// Do nothing if we're not in a paragraph
1062 18
		if (!$this->context['inParagraph'])
1063 18
		{
1064 3
			return;
1065
		}
1066
1067 16
		$this->output .= '</p>';
1068 16
		$this->context['inParagraph'] = false;
1069 16
	}
1070
1071
	/**
1072
	* Output the content of a verbatim tag
1073
	*
1074
	* @param  Tag  $tag
1075
	* @return void
1076
	*/
1077 4
	protected function outputVerbatim(Tag $tag)
1078
	{
1079 4
		$flags = $this->context['flags'];
1080 4
		$this->context['flags'] = $tag->getFlags();
1081 4
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
1082 4
		$this->context['flags'] = $flags;
1083 4
	}
1084
1085
	/**
1086
	* Skip as much whitespace after current position as possible
1087
	*
1088
	* @param  integer $maxPos Rightmost character to be skipped
1089
	* @return void
1090
	*/
1091 18
	protected function outputWhitespace($maxPos)
1092
	{
1093 18
		if ($maxPos > $this->pos)
1094 18
		{
1095 13
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
1096
1097
			if ($spn)
1098 13
			{
1099 6
				$this->output .= substr($this->text, $this->pos, $spn);
1100 6
				$this->pos += $spn;
1101 6
			}
1102 13
		}
1103 18
	}
1104
1105
	//==========================================================================
1106
	// Plugins handling
1107
	//==========================================================================
1108
1109
	/**
1110
	* Disable a plugin
1111
	*
1112
	* @param  string $pluginName Name of the plugin
1113
	* @return void
1114
	*/
1115 5
	public function disablePlugin($pluginName)
1116
	{
1117 5
		if (isset($this->pluginsConfig[$pluginName]))
1118 5
		{
1119
			// Copy the plugin's config to remove the reference
1120 4
			$pluginConfig = $this->pluginsConfig[$pluginName];
1121 4
			unset($this->pluginsConfig[$pluginName]);
1122
1123
			// Update the value and replace the plugin's config
1124 4
			$pluginConfig['isDisabled'] = true;
1125 4
			$this->pluginsConfig[$pluginName] = $pluginConfig;
1126 4
		}
1127 5
	}
1128
1129
	/**
1130
	* Enable a plugin
1131
	*
1132
	* @param  string $pluginName Name of the plugin
1133
	* @return void
1134
	*/
1135 2
	public function enablePlugin($pluginName)
1136
	{
1137 2
		if (isset($this->pluginsConfig[$pluginName]))
1138 2
		{
1139 1
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
1140 1
		}
1141 2
	}
1142
1143
	/**
1144
	* Execute given plugin
1145
	*
1146
	* @param  string $pluginName Plugin's name
1147
	* @return void
1148
	*/
1149 165
	protected function executePluginParser($pluginName)
1150
	{
1151 165
		$pluginConfig = $this->pluginsConfig[$pluginName];
1152 165
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
1153 165
		{
1154 1
			return;
1155
		}
1156
1157 164
		$matches = [];
1158 164
		if (isset($pluginConfig['regexp']))
1159 164
		{
1160 6
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
1161 6
			if (empty($matches))
1162 6
			{
1163 1
				return;
1164
			}
1165 5
		}
1166
1167
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
1168 163
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
1169 163
	}
1170
1171
	/**
1172
	* Execute all the plugins
1173
	*
1174
	* @return void
1175
	*/
1176 175
	protected function executePluginParsers()
1177
	{
1178 175
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
1179
		{
1180 166
			if (empty($pluginConfig['isDisabled']))
1181 166
			{
1182 165
				$this->executePluginParser($pluginName);
1183 165
			}
1184 175
		}
1185 175
	}
1186
1187
	/**
1188
	* Execute given regexp and returns as many matches as given limit
1189
	*
1190
	* @param  string  $regexp
1191
	* @param  integer $limit
1192
	* @return array
1193
	*/
1194 6
	protected function getMatches($regexp, $limit)
1195
	{
1196 6
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
1197 6
		if ($cnt > $limit)
1198 6
		{
1199 2
			$matches = array_slice($matches, 0, $limit);
1200 2
		}
1201
1202 6
		return $matches;
1203
	}
1204
1205
	/**
1206
	* Get the cached callback for given plugin's parser
1207
	*
1208
	* @param  string $pluginName Plugin's name
1209
	* @return callable
1210
	*/
1211 163
	protected function getPluginParser($pluginName)
1212
	{
1213
		// Cache a new instance of this plugin's parser if there isn't one already
1214 163
		if (!isset($this->pluginParsers[$pluginName]))
1215 163
		{
1216 1
			$pluginConfig = $this->pluginsConfig[$pluginName];
1217 1
			$className = (isset($pluginConfig['className']))
1218 1
			           ? $pluginConfig['className']
1219 1
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
1220
1221
			// Register the parser as a callback
1222 1
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
1223 1
		}
1224
1225 163
		return $this->pluginParsers[$pluginName];
1226
	}
1227
1228
	/**
1229
	* Register a parser
1230
	*
1231
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
1232
	* existing plugin
1233
	*
1234
	* @param  string   $pluginName
1235
	* @param  callback $parser
1236
	* @return void
1237
	*/
1238 158
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
1239
	{
1240 158
		if (!is_callable($parser))
1241 158
		{
1242 1
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
1243
		}
1244
		// Create an empty config for this plugin to ensure it is executed
1245 157
		if (!isset($this->pluginsConfig[$pluginName]))
1246 157
		{
1247 156
			$this->pluginsConfig[$pluginName] = [];
1248 156
		}
1249 157
		if (isset($regexp))
1250 157
		{
1251 2
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
1252 2
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
1253 2
		}
1254 157
		$this->pluginParsers[$pluginName] = $parser;
1255 157
	}
1256
1257
	//==========================================================================
1258
	// Rules handling
1259
	//==========================================================================
1260
1261
	/**
1262
	* Apply closeAncestor rules associated with given tag
1263
	*
1264
	* @param  Tag  $tag Tag
1265
	* @return bool      Whether a new tag has been added
1266
	*/
1267 124
	protected function closeAncestor(Tag $tag)
1268
	{
1269 124
		if (!empty($this->openTags))
1270 124
		{
1271 56
			$tagName   = $tag->getName();
1272 56
			$tagConfig = $this->tagsConfig[$tagName];
1273
1274 56
			if (!empty($tagConfig['rules']['closeAncestor']))
1275 56
			{
1276 4
				$i = count($this->openTags);
1277
1278 4
				while (--$i >= 0)
1279
				{
1280 4
					$ancestor     = $this->openTags[$i];
1281 4
					$ancestorName = $ancestor->getName();
1282
1283 4
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1284 4
					{
1285
						// We have to close this ancestor. First we reinsert this tag...
1286 3
						$this->tagStack[] = $tag;
1287
1288
						// ...then we add a new end tag for it
1289 3
						$this->addMagicEndTag($ancestor, $tag->getPos());
1290
1291 3
						return true;
1292
					}
1293 2
				}
1294 2
			}
1295 55
		}
1296
1297 124
		return false;
1298
	}
1299
1300
	/**
1301
	* Apply closeParent rules associated with given tag
1302
	*
1303
	* @param  Tag  $tag Tag
1304
	* @return bool      Whether a new tag has been added
1305
	*/
1306 124
	protected function closeParent(Tag $tag)
1307
	{
1308 124
		if (!empty($this->openTags))
1309 124
		{
1310 59
			$tagName   = $tag->getName();
1311 59
			$tagConfig = $this->tagsConfig[$tagName];
1312
1313 59
			if (!empty($tagConfig['rules']['closeParent']))
1314 59
			{
1315 5
				$parent     = end($this->openTags);
1316 5
				$parentName = $parent->getName();
1317
1318 5
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1319 5
				{
1320
					// We have to close that parent. First we reinsert the tag...
1321 4
					$this->tagStack[] = $tag;
1322
1323
					// ...then we add a new end tag for it
1324 4
					$this->addMagicEndTag($parent, $tag->getPos());
1325
1326 4
					return true;
1327
				}
1328 2
			}
1329 56
		}
1330
1331 124
		return false;
1332
	}
1333
1334
	/**
1335
	* Apply the createChild rules associated with given tag
1336
	*
1337
	* @param  Tag  $tag Tag
1338
	* @return void
1339
	*/
1340 122
	protected function createChild(Tag $tag)
1341
	{
1342 122
		$tagConfig = $this->tagsConfig[$tag->getName()];
1343 122
		if (isset($tagConfig['rules']['createChild']))
1344 122
		{
1345 3
			$priority = -1000;
1346 3
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1347 3
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1348
			{
1349 3
				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1350 3
			}
1351 3
		}
1352 122
	}
1353
1354
	/**
1355
	* Apply fosterParent rules associated with given tag
1356
	*
1357
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1358
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1359
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1360
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1361
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1362
	*       loop from running indefinitely
1363
	*
1364
	* @param  Tag  $tag Tag
1365
	* @return bool      Whether a new tag has been added
1366
	*/
1367 124
	protected function fosterParent(Tag $tag)
1368
	{
1369 124
		if (!empty($this->openTags))
1370 124
		{
1371 63
			$tagName   = $tag->getName();
1372 63
			$tagConfig = $this->tagsConfig[$tagName];
1373
1374 63
			if (!empty($tagConfig['rules']['fosterParent']))
1375 63
			{
1376 11
				$parent     = end($this->openTags);
1377 11
				$parentName = $parent->getName();
1378
1379 11
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1380 11
				{
1381 10
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1382 10
					{
1383
						// Add a 0-width copy of the parent tag right after this tag, with a worse
1384
						// priority and make it depend on this tag
1385 8
						$child = $this->addCopyTag($parent, $tag->getPos() + $tag->getLen(), 0, $tag->getSortPriority() + 1);
1386 8
						$tag->cascadeInvalidationTo($child);
1387 8
					}
1388
1389
					// Reinsert current tag
1390 10
					$this->tagStack[] = $tag;
1391
1392
					// And finally close its parent with a priority that ensures it is processed
1393
					// before this tag
1394 10
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1395
1396
					// Adjust the fixing cost to account for the additional tags/processing
1397 10
					$this->currentFixingCost += 4;
1398
1399 10
					return true;
1400
				}
1401 1
			}
1402 59
		}
1403
1404 124
		return false;
1405
	}
1406
1407
	/**
1408
	* Apply requireAncestor rules associated with given tag
1409
	*
1410
	* @param  Tag  $tag Tag
1411
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1412
	*/
1413 124
	protected function requireAncestor(Tag $tag)
1414
	{
1415 124
		$tagName   = $tag->getName();
1416 124
		$tagConfig = $this->tagsConfig[$tagName];
1417
1418 124
		if (isset($tagConfig['rules']['requireAncestor']))
1419 124
		{
1420 3
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1421
			{
1422 3
				if (!empty($this->cntOpen[$ancestorName]))
1423 3
				{
1424 1
					return false;
1425
				}
1426 2
			}
1427
1428 2
			$this->logger->err('Tag requires an ancestor', [
1429 2
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1430
				'tag'             => $tag
1431 2
			]);
1432
1433 2
			return true;
1434
		}
1435
1436 122
		return false;
1437
	}
1438
1439
	//==========================================================================
1440
	// Tag processing
1441
	//==========================================================================
1442
1443
	/**
1444
	* Create and add an end tag for given start tag at given position
1445
	*
1446
	* @param  Tag     $startTag Start tag
1447
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1448
	* @param  integer $prio     End tag's priority
1449
	* @return Tag
1450
	*/
1451 28
	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1452
	{
1453 28
		$tagName = $startTag->getName();
1454
1455
		// Adjust the end tag's position if whitespace is to be minimized
1456 28
		if ($startTag->getFlags() & self::RULE_IGNORE_WHITESPACE)
1457 28
		{
1458 2
			$tagPos = $this->getMagicPos($tagPos);
1459 2
		}
1460
1461
		// Add a 0-width end tag that is paired with the given start tag
1462 28
		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1463 28
		$endTag->pairWith($startTag);
1464
1465 28
		return $endTag;
1466
	}
1467
1468
	/**
1469
	* Compute the position of a magic end tag, adjusted for whitespace
1470
	*
1471
	* @param  integer $tagPos Rightmost possible position for the tag
1472
	* @return integer
1473
	*/
1474 2
	protected function getMagicPos($tagPos)
1475
	{
1476
		// Back up from given position to the cursor's position until we find a character that
1477
		// is not whitespace
1478 2
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1479
		{
1480 2
			--$tagPos;
1481 2
		}
1482
1483 2
		return $tagPos;
1484
	}
1485
1486
	/**
1487
	* Test whether given start tag is immediately followed by a closing tag
1488
	*
1489
	* @param  Tag  $tag Start tag
1490
	* @return bool
1491
	*/
1492 3
	protected function isFollowedByClosingTag(Tag $tag)
1493
	{
1494 3
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1495
	}
1496
1497
	/**
1498
	* Process all tags in the stack
1499
	*
1500
	* @return void
1501
	*/
1502 164
	protected function processTags()
1503
	{
1504 164
		if (empty($this->tagStack))
1505 164
		{
1506 22
			return;
1507
		}
1508
1509
		// Initialize the count tables
1510 142
		foreach (array_keys($this->tagsConfig) as $tagName)
1511
		{
1512 128
			$this->cntOpen[$tagName]  = 0;
1513 128
			$this->cntTotal[$tagName] = 0;
1514 142
		}
1515
1516
		// Process the tag stack, close tags that were left open and repeat until done
1517
		do
1518
		{
1519 142
			while (!empty($this->tagStack))
1520
			{
1521 142
				if (!$this->tagStackIsSorted)
1522 142
				{
1523 142
					$this->sortTags();
1524 142
				}
1525
1526 142
				$this->currentTag = array_pop($this->tagStack);
1527
1528
				// Skip current tag if tags are disabled and current tag would not close the last
1529
				// open tag and is not a special tag such as a line/paragraph break or an ignore tag
1530 142
				if ($this->context['flags'] & self::RULE_IGNORE_TAGS)
1531 142
				{
1532 6
					if (!$this->currentTag->canClose(end($this->openTags))
1533 6
					 && !$this->currentTag->isSystemTag())
1534 6
					{
1535 3
						continue;
1536
					}
1537 6
				}
1538
1539 142
				$this->processCurrentTag();
1540 142
			}
1541
1542
			// Close tags that were left open
1543 142
			foreach ($this->openTags as $startTag)
1544
			{
1545
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1546
				//       the stack is processed in LIFO order, it means that tags get closed in
1547
				//       the correct order, from descendants to ancestors
1548 17
				$this->addMagicEndTag($startTag, $this->textLen);
1549 142
			}
1550
		}
1551 142
		while (!empty($this->tagStack));
1552 142
	}
1553
1554
	/**
1555
	* Process current tag
1556
	*
1557
	* @return void
1558
	*/
1559 142
	protected function processCurrentTag()
1560
	{
1561 142
		if ($this->currentTag->isInvalid())
1562 142
		{
1563 2
			return;
1564
		}
1565
1566 142
		$tagPos = $this->currentTag->getPos();
1567 142
		$tagLen = $this->currentTag->getLen();
1568
1569
		// Test whether the cursor passed this tag's position already
1570 142
		if ($this->pos > $tagPos)
1571 142
		{
1572
			// Test whether this tag is paired with a start tag and this tag is still open
1573 14
			$startTag = $this->currentTag->getStartTag();
1574
1575 14
			if ($startTag && in_array($startTag, $this->openTags, true))
1576 14
			{
1577
				// Create an end tag that matches current tag's start tag, which consumes as much of
1578
				// the same text as current tag and is paired with the same start tag
1579 2
				$this->addEndTag(
1580 2
					$startTag->getName(),
1581 2
					$this->pos,
1582 2
					max(0, $tagPos + $tagLen - $this->pos)
1583 2
				)->pairWith($startTag);
1584
1585
				// Note that current tag is not invalidated, it's merely replaced
1586 2
				return;
1587
			}
1588
1589
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1590 12
			if ($this->currentTag->isIgnoreTag())
1591 12
			{
1592 2
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1593
1594 2
				if ($ignoreLen > 0)
1595 2
				{
1596
					// Create a new ignore tag and move on
1597 1
					$this->addIgnoreTag($this->pos, $ignoreLen);
1598
1599 1
					return;
1600
				}
1601 1
			}
1602
1603
			// Skipped tags are invalidated
1604 11
			$this->currentTag->invalidate();
1605
1606 11
			return;
1607
		}
1608
1609 142
		if ($this->currentTag->isIgnoreTag())
1610 142
		{
1611 10
			$this->outputIgnoreTag($this->currentTag);
1612 10
		}
1613 137
		elseif ($this->currentTag->isBrTag())
1614
		{
1615
			// Output the tag if it's allowed, ignore it otherwise
1616 7
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1617 7
			{
1618 6
				$this->outputBrTag($this->currentTag);
1619 6
			}
1620 7
		}
1621 133
		elseif ($this->currentTag->isParagraphBreak())
1622
		{
1623 4
			$this->outputText($this->currentTag->getPos(), 0, true);
1624 4
		}
1625 130
		elseif ($this->currentTag->isVerbatim())
1626
		{
1627 4
			$this->outputVerbatim($this->currentTag);
1628 4
		}
1629 126
		elseif ($this->currentTag->isStartTag())
1630
		{
1631 125
			$this->processStartTag($this->currentTag);
1632 125
		}
1633
		else
1634
		{
1635 91
			$this->processEndTag($this->currentTag);
1636
		}
1637 142
	}
1638
1639
	/**
1640
	* Process given start tag (including self-closing tags) at current position
1641
	*
1642
	* @param  Tag  $tag Start tag (including self-closing)
1643
	* @return void
1644
	*/
1645 125
	protected function processStartTag(Tag $tag)
1646
	{
1647 125
		$tagName   = $tag->getName();
1648 125
		$tagConfig = $this->tagsConfig[$tagName];
1649
1650
		// 1. Check that this tag has not reached its global limit tagLimit
1651
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1652
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1653
		// 4. Check for nestingLimit
1654
		// 5. Apply requireAncestor rules
1655
		//
1656
		// This order ensures that the tag is valid and within the set limits before we attempt to
1657
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1658
		// limits, whether this tag is allowed within current context (the context may change
1659
		// as ancestors are closed) or whether the required ancestors are still there (they might
1660
		// have been closed by a rule.)
1661 125
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1662 125
		{
1663 2
			$this->logger->err(
1664 2
				'Tag limit exceeded',
1665
				[
1666 2
					'tag'      => $tag,
1667 2
					'tagName'  => $tagName,
1668 2
					'tagLimit' => $tagConfig['tagLimit']
1669 2
				]
1670 2
			);
1671 2
			$tag->invalidate();
1672
1673 2
			return;
1674
		}
1675
1676 125
		if (!$this->filterTag($tag))
1677 125
		{
1678 2
			$tag->invalidate();
1679
1680 2
			return;
1681
		}
1682
1683 124
		if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1684 124
		{
1685
			// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1686 17
			return;
1687
		}
1688
1689 124
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1690 124
		{
1691 2
			$this->logger->err(
1692 2
				'Nesting limit exceeded',
1693
				[
1694 2
					'tag'          => $tag,
1695 2
					'tagName'      => $tagName,
1696 2
					'nestingLimit' => $tagConfig['nestingLimit']
1697 2
				]
1698 2
			);
1699 2
			$tag->invalidate();
1700
1701 2
			return;
1702
		}
1703
1704 124
		if (!$this->tagIsAllowed($tagName))
1705 124
		{
1706 7
			$msg     = 'Tag is not allowed in this context';
1707 7
			$context = ['tag' => $tag, 'tagName' => $tagName];
1708 7
			if ($tag->getLen() > 0)
1709 7
			{
1710 6
				$this->logger->warn($msg, $context);
1711 6
			}
1712
			else
1713
			{
1714 1
				$this->logger->debug($msg, $context);
1715
			}
1716 7
			$tag->invalidate();
1717
1718 7
			return;
1719
		}
1720
1721 124
		if ($this->requireAncestor($tag))
1722 124
		{
1723 2
			$tag->invalidate();
1724
1725 2
			return;
1726
		}
1727
1728
		// If this tag has an autoClose rule and it's not paired with an end tag or followed by an
1729
		// end tag, we replace it with a self-closing tag with the same properties
1730 122
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1731 122
		 && !$tag->getEndTag()
1732 122
		 && !$this->isFollowedByClosingTag($tag))
1733 122
		{
1734 2
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1735 2
			$newTag->setAttributes($tag->getAttributes());
1736 2
			$newTag->setFlags($tag->getFlags());
1737
1738 2
			$tag = $newTag;
1739 2
		}
1740
1741 122
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1742 122
		 && !$tag->getEndTag()
1743 122
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1744 122
		{
1745 1
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1746 1
		}
1747
1748
		// This tag is valid, output it and update the context
1749 122
		$this->outputTag($tag);
1750 122
		$this->pushContext($tag);
1751
1752
		// Apply the createChild rules if applicable
1753 122
		$this->createChild($tag);
1754 122
	}
1755
1756
	/**
1757
	* Process given end tag at current position
1758
	*
1759
	* @param  Tag  $tag end tag
1760
	* @return void
1761
	*/
1762 91
	protected function processEndTag(Tag $tag)
1763
	{
1764 91
		$tagName = $tag->getName();
1765
1766 91
		if (empty($this->cntOpen[$tagName]))
1767 91
		{
1768
			// This is an end tag with no start tag
1769 9
			return;
1770
		}
1771
1772
		/**
1773
		* @var array List of tags need to be closed before given tag
1774
		*/
1775 90
		$closeTags = [];
1776
1777
		// Iterate through all open tags from last to first to find a match for our tag
1778 90
		$i = count($this->openTags);
1779 90
		while (--$i >= 0)
1780
		{
1781 90
			$openTag = $this->openTags[$i];
1782
1783 90
			if ($tag->canClose($openTag))
1784 90
			{
1785 90
				break;
1786
			}
1787
1788 20
			$closeTags[] = $openTag;
1789 20
			++$this->currentFixingCost;
1790 20
		}
1791
1792 90
		if ($i < 0)
1793 90
		{
1794
			// Did not find a matching tag
1795 2
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1796
1797 2
			return;
1798
		}
1799
1800
		// Only reopen tags if we haven't exceeded our "fixing" budget
1801 90
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1802
1803
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1804
		// reopened
1805 90
		$reopenTags = [];
1806 90
		foreach ($closeTags as $openTag)
1807
		{
1808 19
			$openTagName = $openTag->getName();
1809
1810
			// Test whether this tag should be reopened automatically
1811
			if ($keepReopening)
1812 19
			{
1813 17
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1814 17
				{
1815 11
					$reopenTags[] = $openTag;
1816 11
				}
1817
				else
1818
				{
1819 6
					$keepReopening = false;
1820
				}
1821 17
			}
1822
1823
			// Find the earliest position we can close this open tag
1824 19
			$tagPos = $tag->getPos();
1825 19
			if ($openTag->getFlags() & self::RULE_IGNORE_WHITESPACE)
1826 19
			{
1827 2
				$tagPos = $this->getMagicPos($tagPos);
1828 2
			}
1829
1830
			// Output an end tag to close this start tag, then update the context
1831 19
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1832 19
			$endTag->setFlags($openTag->getFlags());
1833 19
			$this->outputTag($endTag);
1834 19
			$this->popContext();
1835 90
		}
1836
1837
		// Output our tag, moving the cursor past it, then update the context
1838 90
		$this->outputTag($tag);
1839 90
		$this->popContext();
1840
1841
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1842
		// close tags that are already being closed now. Also, filter our list of tags being
1843
		// reopened by removing those that would immediately be closed
1844 90
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1845 90
		{
1846
			/**
1847
			* @var integer Rightmost position of the portion of text to ignore
1848
			*/
1849 17
			$ignorePos = $this->pos;
1850
1851 17
			$i = count($this->tagStack);
1852 17
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1853
			{
1854 14
				$upcomingTag = $this->tagStack[$i];
1855
1856
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1857
				// strictly an end tag (not a start tag or a self-closing tag)
1858 14
				if ($upcomingTag->getPos() > $ignorePos
1859 14
				 || $upcomingTag->isStartTag())
1860 14
				{
1861 8
					break;
1862
				}
1863
1864
				// Test whether this tag would close any of the tags we're about to reopen
1865 9
				$j = count($closeTags);
1866
1867 9
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1868
				{
1869 9
					if ($upcomingTag->canClose($closeTags[$j]))
1870 9
					{
1871
						// Remove the tag from the lists and reset the keys
1872 8
						array_splice($closeTags, $j, 1);
1873
1874 8
						if (isset($reopenTags[$j]))
1875 8
						{
1876 6
							array_splice($reopenTags, $j, 1);
1877 6
						}
1878
1879
						// Extend the ignored text to cover this tag
1880 8
						$ignorePos = max(
1881 8
							$ignorePos,
1882 8
							$upcomingTag->getPos() + $upcomingTag->getLen()
1883 8
						);
1884
1885 8
						break;
1886
					}
1887 5
				}
1888 9
			}
1889
1890 17
			if ($ignorePos > $this->pos)
1891 17
			{
1892
				/**
1893
				* @todo have a method that takes (pos,len) rather than a Tag
1894
				*/
1895 7
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1896 7
			}
1897 17
		}
1898
1899
		// Re-add tags that need to be reopened, at current cursor position
1900 90
		foreach ($reopenTags as $startTag)
1901
		{
1902 7
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1903
1904
			// Re-pair the new tag
1905 7
			$endTag = $startTag->getEndTag();
1906
			if ($endTag)
1907 7
			{
1908 1
				$newTag->pairWith($endTag);
1909 1
			}
1910 90
		}
1911 90
	}
1912
1913
	/**
1914
	* Update counters and replace current context with its parent context
1915
	*
1916
	* @return void
1917
	*/
1918 90
	protected function popContext()
1919
	{
1920 90
		$tag = array_pop($this->openTags);
1921 90
		--$this->cntOpen[$tag->getName()];
1922 90
		$this->context = $this->context['parentContext'];
1923 90
	}
1924
1925
	/**
1926
	* Update counters and replace current context with a new context based on given tag
1927
	*
1928
	* If given tag is a self-closing tag, the context won't change
1929
	*
1930
	* @param  Tag  $tag Start tag (including self-closing)
1931
	* @return void
1932
	*/
1933 122
	protected function pushContext(Tag $tag)
1934
	{
1935 122
		$tagName   = $tag->getName();
1936 122
		$tagFlags  = $tag->getFlags();
1937 122
		$tagConfig = $this->tagsConfig[$tagName];
1938
1939 122
		++$this->cntTotal[$tagName];
1940
1941
		// If this is a self-closing tag, the context remains the same
1942 122
		if ($tag->isSelfClosingTag())
1943 122
		{
1944 45
			return;
1945
		}
1946
1947
		// Recompute the allowed tags
1948 90
		$allowed = [];
1949 90
		if ($tagFlags & self::RULE_IS_TRANSPARENT)
1950 90
		{
1951 2
			foreach ($this->context['allowed'] as $k => $v)
1952
			{
1953 2
				$allowed[] = $tagConfig['allowed'][$k] & $v;
1954 2
			}
1955 2
		}
1956
		else
1957
		{
1958 89
			foreach ($this->context['allowed'] as $k => $v)
1959
			{
1960 89
				$allowed[] = $tagConfig['allowed'][$k] & (($v & 0xFF00) | ($v >> 8));
1961 89
			}
1962
		}
1963
1964
		// Use this tag's flags as a base for this context and add inherited rules
1965 90
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1966
1967
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1968 90
		if ($flags & self::RULE_DISABLE_AUTO_BR)
1969 90
		{
1970 2
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1971 2
		}
1972
1973 90
		++$this->cntOpen[$tagName];
1974 90
		$this->openTags[] = $tag;
1975 90
		$this->context = [
1976 90
			'allowed'       => $allowed,
1977 90
			'flags'         => $flags,
1978 90
			'inParagraph'   => false,
1979 90
			'parentContext' => $this->context
1980 90
		];
1981 90
	}
1982
1983
	/**
1984
	* Return whether given tag is allowed in current context
1985
	*
1986
	* @param  string $tagName
1987
	* @return bool
1988
	*/
1989 124
	protected function tagIsAllowed($tagName)
1990
	{
1991 124
		$n = $this->tagsConfig[$tagName]['bitNumber'];
1992
1993 124
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1994
	}
1995
1996
	//==========================================================================
1997
	// Tag stack
1998
	//==========================================================================
1999
2000
	/**
2001
	* Add a start tag
2002
	*
2003
	* @param  string  $name Name of the tag
2004
	* @param  integer $pos  Position of the tag in the text
2005
	* @param  integer $len  Length of text consumed by the tag
2006
	* @param  integer $prio Tag's priority
2007
	* @return Tag
2008
	*/
2009 108
	public function addStartTag($name, $pos, $len, $prio = 0)
2010
	{
2011 108
		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
2012
	}
2013
2014
	/**
2015
	* Add an end tag
2016
	*
2017
	* @param  string  $name Name of the tag
2018
	* @param  integer $pos  Position of the tag in the text
2019
	* @param  integer $len  Length of text consumed by the tag
2020
	* @param  integer $prio Tag's priority
2021
	* @return Tag
2022
	*/
2023 96
	public function addEndTag($name, $pos, $len, $prio = 0)
2024
	{
2025 96
		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
2026
	}
2027
2028
	/**
2029
	* Add a self-closing tag
2030
	*
2031
	* @param  string  $name Name of the tag
2032
	* @param  integer $pos  Position of the tag in the text
2033
	* @param  integer $len  Length of text consumed by the tag
2034
	* @param  integer $prio Tag's priority
2035
	* @return Tag
2036
	*/
2037 64
	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
2038
	{
2039 64
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
2040
	}
2041
2042
	/**
2043
	* Add a 0-width "br" tag to force a line break at given position
2044
	*
2045
	* @param  integer $pos  Position of the tag in the text
2046
	* @param  integer $prio Tag's priority
2047
	* @return Tag
2048
	*/
2049 8
	public function addBrTag($pos, $prio = 0)
2050
	{
2051 8
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
2052
	}
2053
2054
	/**
2055
	* Add an "ignore" tag
2056
	*
2057
	* @param  integer $pos  Position of the tag in the text
2058
	* @param  integer $len  Length of text consumed by the tag
2059
	* @param  integer $prio Tag's priority
2060
	* @return Tag
2061
	*/
2062 12
	public function addIgnoreTag($pos, $len, $prio = 0)
2063
	{
2064 12
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
2065
	}
2066
2067
	/**
2068
	* Add a paragraph break at given position
2069
	*
2070
	* Uses a zero-width tag that is actually never output in the result
2071
	*
2072
	* @param  integer $pos  Position of the tag in the text
2073
	* @param  integer $prio Tag's priority
2074
	* @return Tag
2075
	*/
2076 5
	public function addParagraphBreak($pos, $prio = 0)
2077
	{
2078 5
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
2079
	}
2080
2081
	/**
2082
	* Add a copy of given tag at given position and length
2083
	*
2084
	* @param  Tag     $tag  Original tag
2085
	* @param  integer $pos  Copy's position
2086
	* @param  integer $len  Copy's length
2087
	* @param  integer $prio Copy's priority (same as original by default)
2088
	* @return Tag           Copy tag
2089
	*/
2090 18
	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
2091
	{
2092 18
		if (!isset($prio))
2093 18
		{
2094 10
			$prio = $tag->getSortPriority();
2095 10
		}
2096 18
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
2097 18
		$copy->setAttributes($tag->getAttributes());
2098
2099 18
		return $copy;
2100
	}
2101
2102
	/**
2103
	* Add a tag
2104
	*
2105
	* @param  integer $type Tag's type
2106
	* @param  string  $name Name of the tag
2107
	* @param  integer $pos  Position of the tag in the text
2108
	* @param  integer $len  Length of text consumed by the tag
2109
	* @param  integer $prio Tag's priority
2110
	* @return Tag
2111
	*/
2112 168
	protected function addTag($type, $name, $pos, $len, $prio)
2113
	{
2114
		// Create the tag
2115 168
		$tag = new Tag($type, $name, $pos, $len, $prio);
2116
2117
		// Set this tag's rules bitfield
2118 168
		if (isset($this->tagsConfig[$name]))
2119 168
		{
2120 148
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
2121 148
		}
2122
2123
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
2124
		// position is negative or if it's out of bounds
2125 168
		if (!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
2126 168
		{
2127 2
			$tag->invalidate();
2128 2
		}
2129 166
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
2130
		{
2131 1
			$this->logger->warn(
2132 1
				'Tag is disabled',
2133
				[
2134 1
					'tag'     => $tag,
2135
					'tagName' => $name
2136 1
				]
2137 1
			);
2138 1
			$tag->invalidate();
2139 1
		}
2140 165
		elseif ($len < 0 || $pos < 0 || $pos + $len > $this->textLen)
2141
		{
2142 6
			$tag->invalidate();
2143 6
		}
2144
		else
2145
		{
2146 161
			$this->insertTag($tag);
2147
		}
2148
2149 168
		return $tag;
2150
	}
2151
2152
	/**
2153
	* Insert given tag in the tag stack
2154
	*
2155
	* @param  Tag  $tag
2156
	* @return void
2157
	*/
2158 161
	protected function insertTag(Tag $tag)
2159
	{
2160 161
		if (!$this->tagStackIsSorted)
2161 161
		{
2162 161
			$this->tagStack[] = $tag;
2163 161
		}
2164
		else
2165
		{
2166
			// Scan the stack and copy every tag to the next slot until we find the correct index
2167 41
			$i = count($this->tagStack);
2168 41
			while ($i > 0 && self::compareTags($this->tagStack[$i - 1], $tag) > 0)
2169
			{
2170 3
				$this->tagStack[$i] = $this->tagStack[$i - 1];
2171 3
				--$i;
2172 3
			}
2173 41
			$this->tagStack[$i] = $tag;
2174
		}
2175 161
	}
2176
2177
	/**
2178
	* Add a pair of tags
2179
	*
2180
	* @param  string  $name     Name of the tags
2181
	* @param  integer $startPos Position of the start tag
2182
	* @param  integer $startLen Length of the start tag
2183
	* @param  integer $endPos   Position of the start tag
2184
	* @param  integer $endLen   Length of the start tag
2185
	* @param  integer $prio     Start tag's priority
2186
	* @return Tag               Start tag
2187
	*/
2188 17
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
2189
	{
2190
		// NOTE: the end tag is added first to try to keep the stack in the correct order
2191 17
		$endTag   = $this->addEndTag($name, $endPos, $endLen);
2192 17
		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
2193 17
		$startTag->pairWith($endTag);
2194
2195 17
		return $startTag;
2196
	}
2197
2198
	/**
2199
	* Add a tag that represents a verbatim copy of the original text
2200
	*
2201
	* @param  integer $pos  Position of the tag in the text
2202
	* @param  integer $len  Length of text consumed by the tag
2203
	* @param  integer $prio Tag's priority
2204
	* @return Tag
2205
	*/
2206 4
	public function addVerbatim($pos, $len, $prio = 0)
2207
	{
2208 4
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
2209
	}
2210
2211
	/**
2212
	* Sort tags by position and precedence
2213
	*
2214
	* @return void
2215
	*/
2216 148
	protected function sortTags()
2217
	{
2218 148
		usort($this->tagStack, __CLASS__ . '::compareTags');
2219 148
		$this->tagStackIsSorted = true;
2220 148
	}
2221
2222
	/**
2223
	* sortTags() callback
2224
	*
2225
	* Tags are stored as a stack, in LIFO order. We sort tags by position _descending_ so that they
2226
	* are processed in the order they appear in the text.
2227
	*
2228
	* @param  Tag     $a First tag to compare
2229
	* @param  Tag     $b Second tag to compare
2230
	* @return integer
2231
	*/
2232 111
	protected static function compareTags(Tag $a, Tag $b)
2233
	{
2234 111
		$aPos = $a->getPos();
2235 111
		$bPos = $b->getPos();
2236
2237
		// First we order by pos descending
2238 111
		if ($aPos !== $bPos)
2239 111
		{
2240 104
			return $bPos - $aPos;
2241
		}
2242
2243
		// If the tags start at the same position, we'll use their sortPriority if applicable. Tags
2244
		// with a lower value get sorted last, which means they'll be processed first. IOW, -10 is
2245
		// processed before 10
2246 41
		if ($a->getSortPriority() !== $b->getSortPriority())
2247 41
		{
2248 14
			return $b->getSortPriority() - $a->getSortPriority();
2249
		}
2250
2251
		// If the tags start at the same position and have the same priority, we'll sort them
2252
		// according to their length, with special considerations for  zero-width tags
2253 30
		$aLen = $a->getLen();
2254 30
		$bLen = $b->getLen();
2255
2256 30
		if (!$aLen || !$bLen)
2257 30
		{
2258
			// Zero-width end tags are ordered after zero-width start tags so that a pair that ends
2259
			// with a zero-width tag has the opportunity to be closed before another pair starts
2260
			// with a zero-width tag. For example, the pairs that would enclose each of the letters
2261
			// in the string "XY". Self-closing tags are ordered between end tags and start tags in
2262
			// an attempt to keep them out of tag pairs
2263 28
			if (!$aLen && !$bLen)
2264 28
			{
2265
				$order = [
2266 16
					Tag::END_TAG          => 0,
2267 16
					Tag::SELF_CLOSING_TAG => 1,
2268 16
					Tag::START_TAG        => 2
2269 16
				];
2270
2271 16
				return $order[$b->getType()] - $order[$a->getType()];
2272
			}
2273
2274
			// Here, we know that only one of $a or $b is a zero-width tags. Zero-width tags are
2275
			// ordered after wider tags so that they have a chance to be processed before the next
2276
			// character is consumed, which would force them to be skipped
2277 12
			return ($aLen) ? -1 : 1;
2278
		}
2279
2280
		// Here we know that both tags start at the same position and have a length greater than 0.
2281
		// We sort tags by length ascending, so that the longest matches are processed first. If
2282
		// their length is identical, the order is undefined as PHP's sort isn't stable
2283 2
		return $aLen - $bLen;
2284
	}
2285
}