Completed
Push — master ( abdfd9...3d7542 )
by Josh
21:47
created

Parser::addFosterTag()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 8
ccs 5
cts 5
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 2
crap 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Parser\Logger;
13
use s9e\TextFormatter\Parser\Tag;
14
15
class Parser
16
{
17
	/**#@+
18
	* Boolean rules bitfield
19
	*/
20
	const RULE_AUTO_CLOSE        = 1 << 0;
21
	const RULE_AUTO_REOPEN       = 1 << 1;
22
	const RULE_BREAK_PARAGRAPH   = 1 << 2;
23
	const RULE_CREATE_PARAGRAPHS = 1 << 3;
24
	const RULE_DISABLE_AUTO_BR   = 1 << 4;
25
	const RULE_ENABLE_AUTO_BR    = 1 << 5;
26
	const RULE_IGNORE_TAGS       = 1 << 6;
27
	const RULE_IGNORE_TEXT       = 1 << 7;
28
	const RULE_IGNORE_WHITESPACE = 1 << 8;
29
	const RULE_IS_TRANSPARENT    = 1 << 9;
30
	const RULE_PREVENT_BR        = 1 << 10;
31
	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
32
	const RULE_TRIM_FIRST_LINE   = 1 << 12;
33
	/**#@-*/
34
35
	/**
36
	* Bitwise disjunction of rules related to automatic line breaks
37
	*/
38
	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
39
40
	/**
41
	* Bitwise disjunction of rules that are inherited by subcontexts
42
	*/
43
	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
44
45
	/**
46
	* All the characters that are considered whitespace
47
	*/
48
	const WHITESPACE = " \n\t";
49
50
	/**
51
	* @var array Number of open tags for each tag name
52
	*/
53
	protected $cntOpen;
54
55
	/**
56
	* @var array Number of times each tag has been used
57
	*/
58
	protected $cntTotal;
59
60
	/**
61
	* @var array Current context
62
	*/
63
	protected $context;
64
65
	/**
66
	* @var integer How hard the parser has worked on fixing bad markup so far
67
	*/
68
	protected $currentFixingCost;
69
70
	/**
71
	* @var Tag Current tag being processed
72
	*/
73
	protected $currentTag;
74
75
	/**
76
	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
77
	*/
78
	protected $isRich;
79
80
	/**
81
	* @var Logger This parser's logger
82
	*/
83
	protected $logger;
84
85
	/**
86
	* @var integer How hard the parser should work on fixing bad markup
87
	*/
88
	public $maxFixingCost = 1000;
89
90
	/**
91
	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
92
	*/
93
	protected $namespaces;
94
95
	/**
96
	* @var array Stack of open tags (instances of Tag)
97
	*/
98
	protected $openTags;
99
100
	/**
101
	* @var string This parser's output
102
	*/
103
	protected $output;
104
105
	/**
106
	* @var integer Position of the cursor in the original text
107
	*/
108
	protected $pos;
109
110
	/**
111
	* @var array Array of callbacks, using plugin names as keys
112
	*/
113
	protected $pluginParsers = [];
114
115
	/**
116
	* @var array Associative array of [pluginName => pluginConfig]
117
	*/
118
	protected $pluginsConfig;
119
120
	/**
121
	* @var array Variables registered for use in filters
122
	*/
123
	public $registeredVars = [];
124
125
	/**
126
	* @var array Root context, used at the root of the document
127
	*/
128
	protected $rootContext;
129
130
	/**
131
	* @var array Tags' config
132
	*/
133
	protected $tagsConfig;
134
135
	/**
136
	* @var array Tag storage
137
	*/
138
	protected $tagStack;
139
140
	/**
141
	* @var bool Whether the tags in the stack are sorted
142
	*/
143
	protected $tagStackIsSorted;
144
145
	/**
146
	* @var string Text being parsed
147
	*/
148
	protected $text;
149
150
	/**
151
	* @var integer Length of the text being parsed
152
	*/
153
	protected $textLen;
154
155
	/**
156
	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
157
	*              whether the parser was reset during execution
158
	*/
159
	protected $uid = 0;
160
161
	/**
162
	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
163
	*/
164
	protected $wsPos;
165
166
	/**
167
	* Constructor
168
	*/
169 180
	public function __construct(array $config)
170
	{
171 180
		$this->pluginsConfig  = $config['plugins'];
172 180
		$this->registeredVars = $config['registeredVars'];
173 180
		$this->rootContext    = $config['rootContext'];
174 180
		$this->tagsConfig     = $config['tags'];
175
176 180
		$this->__wakeup();
177 180
	}
178
179
	/**
180
	* Serializer
181
	*
182
	* Returns the properties that need to persist through serialization.
183
	*
184
	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
185
	* of the serializer to the user (e.g. igbinary)
186
	*
187
	* @return array
188
	*/
189 2
	public function __sleep()
190
	{
191 2
		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
192
	}
193
194
	/**
195
	* Unserializer
196
	*
197
	* @return void
198
	*/
199 180
	public function __wakeup()
200
	{
201 180
		$this->logger = new Logger;
202 180
	}
203
204
	/**
205
	* Reset the parser for a new parsing
206
	*
207
	* @param  string $text Text to be parsed
208
	* @return void
209
	*/
210 168
	protected function reset($text)
211
	{
212
		// Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
213 168
		$text = preg_replace('/\\r\\n?/', "\n", $text);
214 168
		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
215
216
		// Clear the logs
217 168
		$this->logger->clear();
218
219
		// Initialize the rest
220 168
		$this->cntOpen           = [];
221 168
		$this->cntTotal          = [];
222 168
		$this->currentFixingCost = 0;
223 168
		$this->currentTag        = null;
224 168
		$this->isRich            = false;
225 168
		$this->namespaces        = [];
226 168
		$this->openTags          = [];
227 168
		$this->output            = '';
228 168
		$this->pos               = 0;
229 168
		$this->tagStack          = [];
230 168
		$this->tagStackIsSorted  = false;
231 168
		$this->text              = $text;
232 168
		$this->textLen           = strlen($text);
233 168
		$this->wsPos             = 0;
234
235
		// Initialize the root context
236 168
		$this->context = $this->rootContext;
237 168
		$this->context['inParagraph'] = false;
238
239
		// Bump the UID
240 168
		++$this->uid;
241 168
	}
242
243
	/**
244
	* Set a tag's option
245
	*
246
	* This method ensures that the tag's config is a value and not a reference, to prevent
247
	* potential side-effects. References contained *inside* the tag's config are left untouched
248
	*
249
	* @param  string $tagName     Tag's name
250
	* @param  string $optionName  Option's name
251
	* @param  mixed  $optionValue Option's value
252
	* @return void
253
	*/
254 7
	protected function setTagOption($tagName, $optionName, $optionValue)
255
	{
256 7
		if (isset($this->tagsConfig[$tagName]))
257 7
		{
258
			// Copy the tag's config and remove it. That will destroy the reference
259 7
			$tagConfig = $this->tagsConfig[$tagName];
260 7
			unset($this->tagsConfig[$tagName]);
261
262
			// Set the new value and replace the tag's config
263 7
			$tagConfig[$optionName]     = $optionValue;
264 7
			$this->tagsConfig[$tagName] = $tagConfig;
265 7
		}
266 7
	}
267
268
	//==========================================================================
269
	// Public API
270
	//==========================================================================
271
272
	/**
273
	* Disable a tag
274
	*
275
	* @param  string $tagName Name of the tag
276
	* @return void
277
	*/
278 3
	public function disableTag($tagName)
279
	{
280 3
		$this->setTagOption($tagName, 'isDisabled', true);
281 3
	}
282
283
	/**
284
	* Enable a tag
285
	*
286
	* @param  string $tagName Name of the tag
287
	* @return void
288
	*/
289 1
	public function enableTag($tagName)
290
	{
291 1
		if (isset($this->tagsConfig[$tagName]))
292 1
		{
293 1
			unset($this->tagsConfig[$tagName]['isDisabled']);
294 1
		}
295 1
	}
296
297
	/**
298
	* Get this parser's Logger instance
299
	*
300
	* @return Logger
301
	*/
302 8
	public function getLogger()
303
	{
304 8
		return $this->logger;
305
	}
306
307
	/**
308
	* Return the last text parsed
309
	*
310
	* This method returns the normalized text, which may be slightly different from the original
311
	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
312
	* meant to be used in support of processing log entries, which contain offsets based on the
313
	* normalized text
314
	*
315
	* @see Parser::reset()
316
	*
317
	* @return string
318
	*/
319 2
	public function getText()
320
	{
321 2
		return $this->text;
322
	}
323
324
	/**
325
	* Parse a text
326
	*
327
	* @param  string $text Text to parse
328
	* @return string       XML representation
329
	*/
330 168
	public function parse($text)
331
	{
332
		// Reset the parser and save the uid
333 168
		$this->reset($text);
334 168
		$uid = $this->uid;
335
336
		// Do the heavy lifting
337 168
		$this->executePluginParsers();
338 168
		$this->processTags();
339
340
		// Finalize the document
341 168
		$this->finalizeOutput();
342
343
		// Check the uid in case a plugin or a filter reset the parser mid-execution
344 168
		if ($this->uid !== $uid)
345 168
		{
346 1
			throw new RuntimeException('The parser has been reset during execution');
347
		}
348
349
		// Log a warning if the fixing cost limit was exceeded
350 168
		if ($this->currentFixingCost > $this->maxFixingCost)
351 168
		{
352 3
			$this->logger->warn('Fixing cost limit exceeded');
353 3
		}
354
355 168
		return $this->output;
356
	}
357
358
	/**
359
	* Change a tag's tagLimit
360
	*
361
	* NOTE: the default tagLimit should generally be set during configuration instead
362
	*
363
	* @param  string  $tagName  The tag's name, in UPPERCASE
364
	* @param  integer $tagLimit
365
	* @return void
366
	*/
367 2
	public function setTagLimit($tagName, $tagLimit)
368
	{
369 2
		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
370 2
	}
371
372
	/**
373
	* Change a tag's nestingLimit
374
	*
375
	* NOTE: the default nestingLimit should generally be set during configuration instead
376
	*
377
	* @param  string  $tagName      The tag's name, in UPPERCASE
378
	* @param  integer $nestingLimit
379
	* @return void
380
	*/
381 2
	public function setNestingLimit($tagName, $nestingLimit)
382
	{
383 2
		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
384 2
	}
385
386
	//==========================================================================
387
	// Filter processing
388
	//==========================================================================
389
390
	/**
391
	* Execute all the attribute preprocessors of given tag
392
	*
393
	* @private
394
	*
395
	* @param  Tag   $tag       Source tag
396
	* @param  array $tagConfig Tag's config
397
	* @return bool             Unconditionally TRUE
398
	*/
399 7
	public static function executeAttributePreprocessors(Tag $tag, array $tagConfig)
400
	{
401 7
		if (!empty($tagConfig['attributePreprocessors']))
402 7
		{
403 7
			foreach ($tagConfig['attributePreprocessors'] as list($attrName, $regexp, $map))
404
			{
405 7
				if (!$tag->hasAttribute($attrName))
406 7
				{
407 1
					continue;
408
				}
409
410 6
				self::executeAttributePreprocessor($tag, $attrName, $regexp, $map);
411 7
			}
412 7
		}
413
414 7
		return true;
415
	}
416
417
	/**
418
	* Execute an attribute preprocessor
419
	*
420
	* @param  Tag      $tag
421
	* @param  string   $attrName
422
	* @param  string   $regexp
423
	* @param  string[] $map
424
	* @return void
425
	*/
426 6
	protected static function executeAttributePreprocessor(Tag $tag, $attrName, $regexp, $map)
427
	{
428 6
		$attrValue = $tag->getAttribute($attrName);
429 6
		$captures  = self::getNamedCaptures($attrValue, $regexp, $map);
430 6
		foreach ($captures as $k => $v)
431
		{
432
			// Attribute preprocessors cannot overwrite other attributes but they can
433
			// overwrite themselves
434 5
			if ($k === $attrName || !$tag->hasAttribute($k))
435 5
			{
436 5
				$tag->setAttribute($k, $v);
437 5
			}
438 6
		}
439 6
	}
440
441
	/**
442
	* Execute a regexp and return the values of the mapped captures
443
	*
444
	* @param  string   $attrValue
445
	* @param  string   $regexp
446
	* @param  string[] $map
447
	* @return array
448
	*/
449 6
	protected static function getNamedCaptures($attrValue, $regexp, $map)
450
	{
451 6
		if (!preg_match($regexp, $attrValue, $m))
452 6
		{
453 1
			return [];
454
		}
455
456 5
		$values = [];
457 5
		foreach ($map as $i => $k)
458
		{
459 5
			if (isset($m[$i]) && $m[$i] !== '')
460 5
			{
461 5
				$values[$k] = $m[$i];
462 5
			}
463 5
		}
464
465 5
		return $values;
466
	}
467
468
	/**
469
	* Execute a filter
470
	*
471
	* @see s9e\TextFormatter\Configurator\Items\ProgrammableCallback
472
	*
473
	* @param  array $filter Programmed callback
474
	* @param  array $vars   Variables to be used when executing the callback
475
	* @return mixed         Whatever the callback returns
476
	*/
477 143
	protected static function executeFilter(array $filter, array $vars)
478
	{
479 143
		$callback = $filter['callback'];
480 143
		$params   = (isset($filter['params'])) ? $filter['params'] : [];
481
482 143
		$args = [];
483 143
		foreach ($params as $k => $v)
484
		{
485 142
			if (is_numeric($k))
486 142
			{
487
				// By-value param
488 2
				$args[] = $v;
489 2
			}
490 141
			elseif (isset($vars[$k]))
491
			{
492
				// By-name param using a supplied var
493 139
				$args[] = $vars[$k];
494 139
			}
495 2
			elseif (isset($vars['registeredVars'][$k]))
496
			{
497
				// By-name param using a registered var
498 1
				$args[] = $vars['registeredVars'][$k];
499 1
			}
500
			else
501
			{
502
				// Unknown param
503 1
				$args[] = null;
504
			}
505 143
		}
506
507 143
		return call_user_func_array($callback, $args);
508
	}
509
510
	/**
511
	* Filter the attributes of given tag
512
	*
513
	* @private
514
	*
515
	* @param  Tag    $tag            Tag being checked
516
	* @param  array  $tagConfig      Tag's config
517
	* @param  array  $registeredVars Array of registered vars for use in attribute filters
518
	* @param  Logger $logger         This parser's Logger instance
519
	* @return bool                   Whether the whole attribute set is valid
520
	*/
521 142
	public static function filterAttributes(Tag $tag, array $tagConfig, array $registeredVars, Logger $logger)
522
	{
523 142
		if (empty($tagConfig['attributes']))
524 142
		{
525 127
			$tag->setAttributes([]);
526
527 127
			return true;
528
		}
529
530
		// Generate values for attributes with a generator set
531 17
		foreach ($tagConfig['attributes'] as $attrName => $attrConfig)
532
		{
533 17
			if (isset($attrConfig['generator']))
534 17
			{
535 1
				$tag->setAttribute(
536 1
					$attrName,
537 1
					self::executeFilter(
538 1
						$attrConfig['generator'],
539
						[
540 1
							'attrName'       => $attrName,
541 1
							'logger'         => $logger,
542
							'registeredVars' => $registeredVars
543 1
						]
544 1
					)
545 1
				);
546 1
			}
547 17
		}
548
549
		// Filter and remove invalid attributes
550 17
		foreach ($tag->getAttributes() as $attrName => $attrValue)
551
		{
552
			// Test whether this attribute exists and remove it if it doesn't
553 16
			if (!isset($tagConfig['attributes'][$attrName]))
554 16
			{
555 2
				$tag->removeAttribute($attrName);
556 2
				continue;
557
			}
558
559 16
			$attrConfig = $tagConfig['attributes'][$attrName];
560
561
			// Test whether this attribute has a filterChain
562 16
			if (!isset($attrConfig['filterChain']))
563 16
			{
564 11
				continue;
565
			}
566
567
			// Record the name of the attribute being filtered into the logger
568 5
			$logger->setAttribute($attrName);
569
570 5
			foreach ($attrConfig['filterChain'] as $filter)
571
			{
572 5
				$attrValue = self::executeFilter(
573 5
					$filter,
574
					[
575 5
						'attrName'       => $attrName,
576 5
						'attrValue'      => $attrValue,
577 5
						'logger'         => $logger,
578
						'registeredVars' => $registeredVars
579 5
					]
580 5
				);
581
582 5
				if ($attrValue === false)
583 5
				{
584 3
					$tag->removeAttribute($attrName);
585 3
					break;
586
				}
587 5
			}
588
589
			// Update the attribute value if it's valid
590 5
			if ($attrValue !== false)
591 5
			{
592 2
				$tag->setAttribute($attrName, $attrValue);
593 2
			}
594
595
			// Remove the attribute's name from the logger
596 5
			$logger->unsetAttribute();
597 17
		}
598
599
		// Iterate over the attribute definitions to handle missing attributes
600 17
		foreach ($tagConfig['attributes'] as $attrName => $attrConfig)
601
		{
602
			// Test whether this attribute is missing
603 17
			if (!$tag->hasAttribute($attrName))
604 17
			{
605 5
				if (isset($attrConfig['defaultValue']))
606 5
				{
607
					// Use the attribute's default value
608 2
					$tag->setAttribute($attrName, $attrConfig['defaultValue']);
609 2
				}
610 3
				elseif (!empty($attrConfig['required']))
611
				{
612
					// This attribute is missing, has no default value and is required, which means
613
					// the attribute set is invalid
614 3
					return false;
615
				}
616 2
			}
617 14
		}
618
619 14
		return true;
620
	}
621
622
	/**
623
	* Execute given tag's filterChain
624
	*
625
	* @param  Tag  $tag Tag to filter
626
	* @return bool      Whether the tag is valid
627
	*/
628 134
	protected function filterTag(Tag $tag)
629
	{
630 134
		$tagName   = $tag->getName();
631 134
		$tagConfig = $this->tagsConfig[$tagName];
632 134
		$isValid   = true;
633
634 134
		if (!empty($tagConfig['filterChain']))
635 134
		{
636
			// Record the tag being processed into the logger it can be added to the context of
637
			// messages logged during the execution
638 133
			$this->logger->setTag($tag);
639
640
			// Prepare the variables that are accessible to filters
641
			$vars = [
642 133
				'logger'         => $this->logger,
643 133
				'openTags'       => $this->openTags,
644 133
				'parser'         => $this,
645 133
				'registeredVars' => $this->registeredVars,
646 133
				'tag'            => $tag,
647 133
				'tagConfig'      => $tagConfig,
648 133
				'text'           => $this->text
649 133
			];
650
651 133
			foreach ($tagConfig['filterChain'] as $filter)
652
			{
653 133
				if (!self::executeFilter($filter, $vars))
654 133
				{
655 4
					$isValid = false;
656 4
					break;
657
				}
658 133
			}
659
660
			// Remove the tag from the logger
661 133
			$this->logger->unsetTag();
662 133
		}
663
664 134
		return $isValid;
665
	}
666
667
	//==========================================================================
668
	// Output handling
669
	//==========================================================================
670
671
	/**
672
	* Finalize the output by appending the rest of the unprocessed text and create the root node
673
	*
674
	* @return void
675
	*/
676 168
	protected function finalizeOutput()
677
	{
678
		// Output the rest of the text and close the last paragraph
679 168
		$this->outputText($this->textLen, 0, true);
680
681
		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
682
		do
683
		{
684 168
			$this->output = preg_replace('(<([^ />]+)[^>]*></\\1>)', '', $this->output, -1, $cnt);
685
		}
686 168
		while ($cnt > 0);
687
688
		// Merge consecutive <i> tags
689 168
		if (strpos($this->output, '</i><i>') !== false)
690 168
		{
691 1
			$this->output = str_replace('</i><i>', '', $this->output);
692 1
		}
693
694
		// Encode Unicode characters that are outside of the BMP
695 168
		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
696
697
		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
698 168
		$tagName = ($this->isRich) ? 'r' : 't';
699
700
		// Prepare the root node with all the namespace declarations
701 168
		$tmp = '<' . $tagName;
702 168
		foreach (array_keys($this->namespaces) as $prefix)
703
		{
704 2
			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
705 168
		}
706
707 168
		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
708 168
	}
709
710
	/**
711
	* Append a tag to the output
712
	*
713
	* @param  Tag  $tag Tag to append
714
	* @return void
715
	*/
716 126
	protected function outputTag(Tag $tag)
717
	{
718 126
		$this->isRich = true;
719
720 126
		$tagName  = $tag->getName();
721 126
		$tagPos   = $tag->getPos();
722 126
		$tagLen   = $tag->getLen();
723 126
		$tagFlags = $tag->getFlags();
724
725 126
		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
726 126
		{
727 8
			$skipBefore = 1;
728 8
			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
729 8
		}
730
		else
731
		{
732 121
			$skipBefore = $skipAfter = 0;
733
		}
734
735
		// Current paragraph must end before the tag if:
736
		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
737
		//  - the tag is an end tag (but not self-closing)
738 126
		$closeParagraph = false;
739 126
		if ($tag->isStartTag())
740 126
		{
741 126
			if ($tagFlags & self::RULE_BREAK_PARAGRAPH)
742 126
			{
743 4
				$closeParagraph = true;
744 4
			}
745 126
		}
746
		else
747
		{
748 94
			$closeParagraph = true;
749
		}
750
751
		// Let the cursor catch up with this tag's position
752 126
		$this->outputText($tagPos, $skipBefore, $closeParagraph);
753
754
		// Capture the text consumed by the tag
755
		$tagText = ($tagLen)
756 126
		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
757 126
		         : '';
758
759
		// Output current tag
760 126
		if ($tag->isStartTag())
761 126
		{
762
			// Handle paragraphs before opening the tag
763 126
			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
764 126
			{
765 125
				$this->outputParagraphStart($tagPos);
766 125
			}
767
768
			// Record this tag's namespace, if applicable
769 126
			$colonPos = strpos($tagName, ':');
770
			if ($colonPos)
771 126
			{
772 2
				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
773 2
			}
774
775
			// Open the start tag and add its attributes, but don't close the tag
776 126
			$this->output .= '<' . $tagName;
777
778
			// We output the attributes in lexical order. Helps canonicalizing the output and could
779
			// prove useful someday
780 126
			$attributes = $tag->getAttributes();
781 126
			ksort($attributes);
782
783 126
			foreach ($attributes as $attrName => $attrValue)
784
			{
785 9
				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
786 126
			}
787
788 126
			if ($tag->isSelfClosingTag())
789 126
			{
790
				if ($tagLen)
791 45
				{
792 34
					$this->output .= '>' . $tagText . '</' . $tagName . '>';
793 34
				}
794
				else
795
				{
796 11
					$this->output .= '/>';
797
				}
798 45
			}
799 94
			elseif ($tagLen)
800
			{
801 61
				$this->output .= '><s>' . $tagText . '</s>';
802 61
			}
803
			else
804
			{
805 49
				$this->output .= '>';
806
			}
807 126
		}
808
		else
809
		{
810
			if ($tagLen)
811 94
			{
812 52
				$this->output .= '<e>' . $tagText . '</e>';
813 52
			}
814
815 94
			$this->output .= '</' . $tagName . '>';
816
		}
817
818
		// Move the cursor past the tag
819 126
		$this->pos = $tagPos + $tagLen;
820
821
		// Skip newlines (no other whitespace) after this tag
822 126
		$this->wsPos = $this->pos;
823 126
		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
824
		{
825
			// Decrement the number of lines to skip
826 8
			--$skipAfter;
827
828
			// Move the cursor past the newline
829 8
			++$this->wsPos;
830 8
		}
831 126
	}
832
833
	/**
834
	* Output the text between the cursor's position (included) and given position (not included)
835
	*
836
	* @param  integer $catchupPos     Position we're catching up to
837
	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
838
	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
839
	* @return void
840
	*/
841 168
	protected function outputText($catchupPos, $maxLines, $closeParagraph)
842
	{
843
		if ($closeParagraph)
844 168
		{
845 168
			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
846 168
			{
847 157
				$closeParagraph = false;
848 157
			}
849
			else
850
			{
851
				// Ignore any number of lines at the end if we're closing a paragraph
852 18
				$maxLines = -1;
853
			}
854 168
		}
855
856 168
		if ($this->pos >= $catchupPos)
857 168
		{
858
			// We're already there, close the paragraph if applicable and return
859
			if ($closeParagraph)
860 124
			{
861 4
				$this->outputParagraphEnd();
862 4
			}
863
864 124
			return;
865
		}
866
867
		// Skip over previously identified whitespace if applicable
868 148
		if ($this->wsPos > $this->pos)
869 148
		{
870 8
			$skipPos       = min($catchupPos, $this->wsPos);
871 8
			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
872 8
			$this->pos     = $skipPos;
873
874 8
			if ($this->pos >= $catchupPos)
875 8
			{
876
				// Skipped everything. Close the paragraph if applicable and return
877
				if ($closeParagraph)
878 2
				{
879 1
					$this->outputParagraphEnd();
880 1
				}
881
882 2
				return;
883
			}
884 7
		}
885
886
		// Test whether we're even supposed to output anything
887 148
		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
888 148
		{
889 3
			$catchupLen  = $catchupPos - $this->pos;
890 3
			$catchupText = substr($this->text, $this->pos, $catchupLen);
891
892
			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
893
			// tags
894 3
			if (strspn($catchupText, " \n\t") < $catchupLen)
895 3
			{
896 3
				$catchupText = '<i>' . $catchupText . '</i>';
897 3
			}
898
899 3
			$this->output .= $catchupText;
900 3
			$this->pos = $catchupPos;
901
902
			if ($closeParagraph)
903 3
			{
904 1
				$this->outputParagraphEnd();
905 1
			}
906
907 3
			return;
908
		}
909
910
		// Compute the amount of text to ignore at the end of the output
911 148
		$ignorePos = $catchupPos;
912 148
		$ignoreLen = 0;
913
914
		// Ignore as many lines (including whitespace) as specified
915 148
		while ($maxLines && --$ignorePos >= $this->pos)
916
		{
917 19
			$c = $this->text[$ignorePos];
918 19
			if (strpos(self::WHITESPACE, $c) === false)
919 19
			{
920 14
				break;
921
			}
922
923 10
			if ($c === "\n")
924 10
			{
925 9
				--$maxLines;
926 9
			}
927
928 10
			++$ignoreLen;
929 10
		}
930
931
		// Adjust $catchupPos to ignore the text at the end
932 148
		$catchupPos -= $ignoreLen;
933
934
		// Break down the text in paragraphs if applicable
935 148
		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
936 148
		{
937 15
			if (!$this->context['inParagraph'])
938 15
			{
939 13
				$this->outputWhitespace($catchupPos);
940
941 13
				if ($catchupPos > $this->pos)
942 13
				{
943 10
					$this->outputParagraphStart($catchupPos);
944 10
				}
945 13
			}
946
947
			// Look for a paragraph break in this text
948 15
			$pbPos = strpos($this->text, "\n\n", $this->pos);
949
950 15
			while ($pbPos !== false && $pbPos < $catchupPos)
951
			{
952 3
				$this->outputText($pbPos, 0, true);
953 3
				$this->outputParagraphStart($catchupPos);
954
955 3
				$pbPos = strpos($this->text, "\n\n", $this->pos);
956 3
			}
957 15
		}
958
959
		// Capture, escape and output the text
960 148
		if ($catchupPos > $this->pos)
961 148
		{
962 146
			$catchupText = htmlspecialchars(
963 146
				substr($this->text, $this->pos, $catchupPos - $this->pos),
964 146
				ENT_NOQUOTES,
965
				'UTF-8'
966 146
			);
967
968
			// Format line breaks if applicable
969 146
			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
970 146
			{
971 21
				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
972 21
			}
973
974 146
			$this->output .= $catchupText;
975 146
		}
976
977
		// Close the paragraph if applicable
978
		if ($closeParagraph)
979 148
		{
980 14
			$this->outputParagraphEnd();
981 14
		}
982
983
		// Add the ignored text if applicable
984
		if ($ignoreLen)
985 148
		{
986 10
			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
987 10
		}
988
989
		// Move the cursor past the text
990 148
		$this->pos = $catchupPos + $ignoreLen;
991 148
	}
992
993
	/**
994
	* Output a linebreak tag
995
	*
996
	* @param  Tag  $tag
997
	* @return void
998
	*/
999 6
	protected function outputBrTag(Tag $tag)
1000
	{
1001 6
		$this->outputText($tag->getPos(), 0, false);
1002 6
		$this->output .= '<br/>';
1003 6
	}
1004
1005
	/**
1006
	* Output an ignore tag
1007
	*
1008
	* @param  Tag  $tag
1009
	* @return void
1010
	*/
1011 18
	protected function outputIgnoreTag(Tag $tag)
1012
	{
1013 18
		$tagPos = $tag->getPos();
1014 18
		$tagLen = $tag->getLen();
1015
1016
		// Capture the text to ignore
1017 18
		$ignoreText = substr($this->text, $tagPos, $tagLen);
1018
1019
		// Catch up with the tag's position then output the tag
1020 18
		$this->outputText($tagPos, 0, false);
1021 18
		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
1022 18
		$this->isRich = true;
1023
1024
		// Move the cursor past this tag
1025 18
		$this->pos = $tagPos + $tagLen;
1026 18
	}
1027
1028
	/**
1029
	* Start a paragraph between current position and given position, if applicable
1030
	*
1031
	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
1032
	* @return void
1033
	*/
1034 132
	protected function outputParagraphStart($maxPos)
1035
	{
1036
		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
1037 132
		if ($this->context['inParagraph']
1038 132
		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
1039 132
		{
1040 122
			return;
1041
		}
1042
1043
		// Output the whitespace between $this->pos and $maxPos if applicable
1044 16
		$this->outputWhitespace($maxPos);
1045
1046
		// Open the paragraph, but only if it's not at the very end of the text
1047 16
		if ($this->pos < $this->textLen)
1048 16
		{
1049 16
			$this->output .= '<p>';
1050 16
			$this->context['inParagraph'] = true;
1051 16
		}
1052 16
	}
1053
1054
	/**
1055
	* Close current paragraph at current position if applicable
1056
	*
1057
	* @return void
1058
	*/
1059 18
	protected function outputParagraphEnd()
1060
	{
1061
		// Do nothing if we're not in a paragraph
1062 18
		if (!$this->context['inParagraph'])
1063 18
		{
1064 3
			return;
1065
		}
1066
1067 16
		$this->output .= '</p>';
1068 16
		$this->context['inParagraph'] = false;
1069 16
	}
1070
1071
	/**
1072
	* Output the content of a verbatim tag
1073
	*
1074
	* @param  Tag  $tag
1075
	* @return void
1076
	*/
1077 4
	protected function outputVerbatim(Tag $tag)
1078
	{
1079 4
		$flags = $this->context['flags'];
1080 4
		$this->context['flags'] = $tag->getFlags();
1081 4
		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
1082 4
		$this->context['flags'] = $flags;
1083 4
	}
1084
1085
	/**
1086
	* Skip as much whitespace after current position as possible
1087
	*
1088
	* @param  integer $maxPos Rightmost character to be skipped
1089
	* @return void
1090
	*/
1091 18
	protected function outputWhitespace($maxPos)
1092
	{
1093 18
		if ($maxPos > $this->pos)
1094 18
		{
1095 13
			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
1096
1097
			if ($spn)
1098 13
			{
1099 6
				$this->output .= substr($this->text, $this->pos, $spn);
1100 6
				$this->pos += $spn;
1101 6
			}
1102 13
		}
1103 18
	}
1104
1105
	//==========================================================================
1106
	// Plugins handling
1107
	//==========================================================================
1108
1109
	/**
1110
	* Disable a plugin
1111
	*
1112
	* @param  string $pluginName Name of the plugin
1113
	* @return void
1114
	*/
1115 5
	public function disablePlugin($pluginName)
1116
	{
1117 5
		if (isset($this->pluginsConfig[$pluginName]))
1118 5
		{
1119
			// Copy the plugin's config to remove the reference
1120 4
			$pluginConfig = $this->pluginsConfig[$pluginName];
1121 4
			unset($this->pluginsConfig[$pluginName]);
1122
1123
			// Update the value and replace the plugin's config
1124 4
			$pluginConfig['isDisabled'] = true;
1125 4
			$this->pluginsConfig[$pluginName] = $pluginConfig;
1126 4
		}
1127 5
	}
1128
1129
	/**
1130
	* Enable a plugin
1131
	*
1132
	* @param  string $pluginName Name of the plugin
1133
	* @return void
1134
	*/
1135 2
	public function enablePlugin($pluginName)
1136
	{
1137 2
		if (isset($this->pluginsConfig[$pluginName]))
1138 2
		{
1139 1
			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
1140 1
		}
1141 2
	}
1142
1143
	/**
1144
	* Execute given plugin
1145
	*
1146
	* @param  string $pluginName Plugin's name
1147
	* @return void
1148
	*/
1149 169
	protected function executePluginParser($pluginName)
1150
	{
1151 169
		$pluginConfig = $this->pluginsConfig[$pluginName];
1152 169
		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
1153 169
		{
1154 1
			return;
1155
		}
1156
1157 168
		$matches = [];
1158 168
		if (isset($pluginConfig['regexp']))
1159 168
		{
1160 6
			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
1161 6
			if (empty($matches))
1162 6
			{
1163 1
				return;
1164
			}
1165 5
		}
1166
1167
		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
1168 167
		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
1169 167
	}
1170
1171
	/**
1172
	* Execute all the plugins
1173
	*
1174
	* @return void
1175
	*/
1176 179
	protected function executePluginParsers()
1177
	{
1178 179
		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
1179
		{
1180 170
			if (empty($pluginConfig['isDisabled']))
1181 170
			{
1182 169
				$this->executePluginParser($pluginName);
1183 169
			}
1184 179
		}
1185 179
	}
1186
1187
	/**
1188
	* Execute given regexp and returns as many matches as given limit
1189
	*
1190
	* @param  string  $regexp
1191
	* @param  integer $limit
1192
	* @return array
1193
	*/
1194 6
	protected function getMatches($regexp, $limit)
1195
	{
1196 6
		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
1197 6
		if ($cnt > $limit)
1198 6
		{
1199 2
			$matches = array_slice($matches, 0, $limit);
1200 2
		}
1201
1202 6
		return $matches;
1203
	}
1204
1205
	/**
1206
	* Get the cached callback for given plugin's parser
1207
	*
1208
	* @param  string $pluginName Plugin's name
1209
	* @return callable
1210
	*/
1211 167
	protected function getPluginParser($pluginName)
1212
	{
1213
		// Cache a new instance of this plugin's parser if there isn't one already
1214 167
		if (!isset($this->pluginParsers[$pluginName]))
1215 167
		{
1216 1
			$pluginConfig = $this->pluginsConfig[$pluginName];
1217 1
			$className = (isset($pluginConfig['className']))
1218 1
			           ? $pluginConfig['className']
1219 1
			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
1220
1221
			// Register the parser as a callback
1222 1
			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
1223 1
		}
1224
1225 167
		return $this->pluginParsers[$pluginName];
1226
	}
1227
1228
	/**
1229
	* Register a parser
1230
	*
1231
	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
1232
	* existing plugin
1233
	*
1234
	* @param  string   $pluginName
1235
	* @param  callback $parser
1236
	* @return void
1237
	*/
1238 162
	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
1239
	{
1240 162
		if (!is_callable($parser))
1241 162
		{
1242 1
			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
1243
		}
1244
		// Create an empty config for this plugin to ensure it is executed
1245 161
		if (!isset($this->pluginsConfig[$pluginName]))
1246 161
		{
1247 160
			$this->pluginsConfig[$pluginName] = [];
1248 160
		}
1249 161
		if (isset($regexp))
1250 161
		{
1251 2
			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
1252 2
			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
1253 2
		}
1254 161
		$this->pluginParsers[$pluginName] = $parser;
1255 161
	}
1256
1257
	//==========================================================================
1258
	// Rules handling
1259
	//==========================================================================
1260
1261
	/**
1262
	* Apply closeAncestor rules associated with given tag
1263
	*
1264
	* @param  Tag  $tag Tag
1265
	* @return bool      Whether a new tag has been added
1266
	*/
1267 128
	protected function closeAncestor(Tag $tag)
1268
	{
1269 128
		if (!empty($this->openTags))
1270 128
		{
1271 58
			$tagName   = $tag->getName();
1272 58
			$tagConfig = $this->tagsConfig[$tagName];
1273
1274 58
			if (!empty($tagConfig['rules']['closeAncestor']))
1275 58
			{
1276 4
				$i = count($this->openTags);
1277
1278 4
				while (--$i >= 0)
1279
				{
1280 4
					$ancestor     = $this->openTags[$i];
1281 4
					$ancestorName = $ancestor->getName();
1282
1283 4
					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1284 4
					{
1285
						// We have to close this ancestor. First we reinsert this tag...
1286 3
						$this->tagStack[] = $tag;
1287
1288
						// ...then we add a new end tag for it
1289 3
						$this->addMagicEndTag($ancestor, $tag->getPos());
1290
1291 3
						return true;
1292
					}
1293 2
				}
1294 2
			}
1295 57
		}
1296
1297 128
		return false;
1298
	}
1299
1300
	/**
1301
	* Apply closeParent rules associated with given tag
1302
	*
1303
	* @param  Tag  $tag Tag
1304
	* @return bool      Whether a new tag has been added
1305
	*/
1306 128
	protected function closeParent(Tag $tag)
1307
	{
1308 128
		if (!empty($this->openTags))
1309 128
		{
1310 61
			$tagName   = $tag->getName();
1311 61
			$tagConfig = $this->tagsConfig[$tagName];
1312
1313 61
			if (!empty($tagConfig['rules']['closeParent']))
1314 61
			{
1315 5
				$parent     = end($this->openTags);
1316 5
				$parentName = $parent->getName();
1317
1318 5
				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1319 5
				{
1320
					// We have to close that parent. First we reinsert the tag...
1321 4
					$this->tagStack[] = $tag;
1322
1323
					// ...then we add a new end tag for it
1324 4
					$this->addMagicEndTag($parent, $tag->getPos());
1325
1326 4
					return true;
1327
				}
1328 2
			}
1329 58
		}
1330
1331 128
		return false;
1332
	}
1333
1334
	/**
1335
	* Apply the createChild rules associated with given tag
1336
	*
1337
	* @param  Tag  $tag Tag
1338
	* @return void
1339
	*/
1340 126
	protected function createChild(Tag $tag)
1341
	{
1342 126
		$tagConfig = $this->tagsConfig[$tag->getName()];
1343 126
		if (isset($tagConfig['rules']['createChild']))
1344 126
		{
1345 3
			$priority = -1000;
1346 3
			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1347 3
			foreach ($tagConfig['rules']['createChild'] as $tagName)
1348
			{
1349 3
				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1350 3
			}
1351 3
		}
1352 126
	}
1353
1354
	/**
1355
	* Apply fosterParent rules associated with given tag
1356
	*
1357
	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1358
	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1359
	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1360
	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1361
	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1362
	*       loop from running indefinitely
1363
	*
1364
	* @param  Tag  $tag Tag
1365
	* @return bool      Whether a new tag has been added
1366
	*/
1367 128
	protected function fosterParent(Tag $tag)
1368
	{
1369 128
		if (!empty($this->openTags))
1370 128
		{
1371 65
			$tagName   = $tag->getName();
1372 65
			$tagConfig = $this->tagsConfig[$tagName];
1373
1374 65
			if (!empty($tagConfig['rules']['fosterParent']))
1375 65
			{
1376 12
				$parent     = end($this->openTags);
1377 12
				$parentName = $parent->getName();
1378
1379 12
				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1380 12
				{
1381 11
					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1382 11
					{
1383 9
						$this->addFosterTag($tag, $parent);
1384 9
					}
1385
1386
					// Reinsert current tag
1387 11
					$this->tagStack[] = $tag;
1388
1389
					// And finally close its parent with a priority that ensures it is processed
1390
					// before this tag
1391 11
					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1392
1393
					// Adjust the fixing cost to account for the additional tags/processing
1394 11
					$this->currentFixingCost += 4;
1395
1396 11
					return true;
1397
				}
1398 1
			}
1399 61
		}
1400
1401 128
		return false;
1402
	}
1403
1404
	/**
1405
	* Apply requireAncestor rules associated with given tag
1406
	*
1407
	* @param  Tag  $tag Tag
1408
	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1409
	*/
1410 128
	protected function requireAncestor(Tag $tag)
1411
	{
1412 128
		$tagName   = $tag->getName();
1413 128
		$tagConfig = $this->tagsConfig[$tagName];
1414
1415 128
		if (isset($tagConfig['rules']['requireAncestor']))
1416 128
		{
1417 3
			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1418
			{
1419 3
				if (!empty($this->cntOpen[$ancestorName]))
1420 3
				{
1421 1
					return false;
1422
				}
1423 2
			}
1424
1425 2
			$this->logger->err('Tag requires an ancestor', [
1426 2
				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1427
				'tag'             => $tag
1428 2
			]);
1429
1430 2
			return true;
1431
		}
1432
1433 126
		return false;
1434
	}
1435
1436
	//==========================================================================
1437
	// Tag processing
1438
	//==========================================================================
1439
1440
	/**
1441
	* Create and add a copy of a tag as a child of a given tag
1442
	*
1443
	* @param  Tag  $tag       Current tag
1444
	* @param  Tag  $fosterTag Tag to foster
1445
	* @return void
1446
	*/
1447 9
	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1448
	{
1449 9
		list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1450
1451
		// Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1452 9
		$childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1453 9
		$tag->cascadeInvalidationTo($childTag);
1454 9
	}
1455
1456
	/**
1457
	* Create and add an end tag for given start tag at given position
1458
	*
1459
	* @param  Tag     $startTag Start tag
1460
	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1461
	* @param  integer $prio     End tag's priority
1462
	* @return Tag
1463
	*/
1464 29
	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1465
	{
1466 29
		$tagName = $startTag->getName();
1467
1468
		// Adjust the end tag's position if whitespace is to be minimized
1469 29
		if ($startTag->getFlags() & self::RULE_IGNORE_WHITESPACE)
1470 29
		{
1471 2
			$tagPos = $this->getMagicEndPos($tagPos);
1472 2
		}
1473
1474
		// Add a 0-width end tag that is paired with the given start tag
1475 29
		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1476 29
		$endTag->pairWith($startTag);
1477
1478 29
		return $endTag;
1479
	}
1480
1481
	/**
1482
	* Compute the position of a magic end tag, adjusted for whitespace
1483
	*
1484
	* @param  integer $tagPos Rightmost possible position for the tag
1485
	* @return integer
1486
	*/
1487 2
	protected function getMagicEndPos($tagPos)
1488
	{
1489
		// Back up from given position to the cursor's position until we find a character that
1490
		// is not whitespace
1491 2
		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1492
		{
1493 2
			--$tagPos;
1494 2
		}
1495
1496 2
		return $tagPos;
1497
	}
1498
1499
	/**
1500
	* Compute the position and priority of a magic start tag, adjusted for whitespace
1501
	*
1502
	* @param  integer   $tagPos Leftmost possible position for the tag
1503
	* @return integer[]         [Tag pos, priority]
1504
	*/
1505 9
	protected function getMagicStartCoords($tagPos)
1506
	{
1507 9
		if (empty($this->tagStack))
1508 9
		{
1509
			// Set the next position outside the text boundaries
1510 3
			$nextPos  = $this->textLen + 1;
1511 3
			$nextPrio = 0;
1512 3
		}
1513
		else
1514
		{
1515 8
			$nextTag  = end($this->tagStack);
1516 8
			$nextPos  = $nextTag->getPos();
1517 8
			$nextPrio = $nextTag->getSortPriority();
1518
		}
1519
1520
		// Find the first non-whitespace position before next tag or the end of text
1521 9
		while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1522
		{
1523 1
			++$tagPos;
1524 1
		}
1525
1526
		// Set a priority that ensures this tag appears before the next tag
1527 9
		$prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1528
1529 9
		return [$tagPos, $prio];
1530
	}
1531
1532
	/**
1533
	* Test whether given start tag is immediately followed by a closing tag
1534
	*
1535
	* @param  Tag  $tag Start tag
1536
	* @return bool
1537
	*/
1538 3
	protected function isFollowedByClosingTag(Tag $tag)
1539
	{
1540 3
		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1541
	}
1542
1543
	/**
1544
	* Process all tags in the stack
1545
	*
1546
	* @return void
1547
	*/
1548 168
	protected function processTags()
1549
	{
1550 168
		if (empty($this->tagStack))
1551 168
		{
1552 22
			return;
1553
		}
1554
1555
		// Initialize the count tables
1556 146
		foreach (array_keys($this->tagsConfig) as $tagName)
1557
		{
1558 132
			$this->cntOpen[$tagName]  = 0;
1559 132
			$this->cntTotal[$tagName] = 0;
1560 146
		}
1561
1562
		// Process the tag stack, close tags that were left open and repeat until done
1563
		do
1564
		{
1565 146
			while (!empty($this->tagStack))
1566
			{
1567 146
				if (!$this->tagStackIsSorted)
1568 146
				{
1569 146
					$this->sortTags();
1570 146
				}
1571
1572 146
				$this->currentTag = array_pop($this->tagStack);
1573 146
				$this->processCurrentTag();
1574 146
			}
1575
1576
			// Close tags that were left open
1577 146
			foreach ($this->openTags as $startTag)
1578
			{
1579
				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1580
				//       the stack is processed in LIFO order, it means that tags get closed in
1581
				//       the correct order, from descendants to ancestors
1582 17
				$this->addMagicEndTag($startTag, $this->textLen);
1583 146
			}
1584
		}
1585 146
		while (!empty($this->tagStack));
1586 146
	}
1587
1588
	/**
1589
	* Process current tag
1590
	*
1591
	* @return void
1592
	*/
1593 146
	protected function processCurrentTag()
1594
	{
1595
		// Invalidate current tag if tags are disabled and current tag would not close the last open
1596
		// tag and is not a system tag
1597 146
		if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1598 146
		 && !$this->currentTag->canClose(end($this->openTags))
1599 146
		 && !$this->currentTag->isSystemTag())
1600 146
		{
1601 4
			$this->currentTag->invalidate();
1602 4
		}
1603
1604 146
		$tagPos = $this->currentTag->getPos();
1605 146
		$tagLen = $this->currentTag->getLen();
1606
1607
		// Test whether the cursor passed this tag's position already
1608 146
		if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1609 146
		{
1610
			// Test whether this tag is paired with a start tag and this tag is still open
1611 15
			$startTag = $this->currentTag->getStartTag();
1612
1613 15
			if ($startTag && in_array($startTag, $this->openTags, true))
1614 15
			{
1615
				// Create an end tag that matches current tag's start tag, which consumes as much of
1616
				// the same text as current tag and is paired with the same start tag
1617 2
				$this->addEndTag(
1618 2
					$startTag->getName(),
1619 2
					$this->pos,
1620 2
					max(0, $tagPos + $tagLen - $this->pos)
1621 2
				)->pairWith($startTag);
1622
1623
				// Note that current tag is not invalidated, it's merely replaced
1624 2
				return;
1625
			}
1626
1627
			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1628 13
			if ($this->currentTag->isIgnoreTag())
1629 13
			{
1630 2
				$ignoreLen = $tagPos + $tagLen - $this->pos;
1631
1632 2
				if ($ignoreLen > 0)
1633 2
				{
1634
					// Create a new ignore tag and move on
1635 1
					$this->addIgnoreTag($this->pos, $ignoreLen);
1636
1637 1
					return;
1638
				}
1639 1
			}
1640
1641
			// Skipped tags are invalidated
1642 12
			$this->currentTag->invalidate();
1643 12
		}
1644
1645 146
		if ($this->currentTag->isInvalid())
1646 146
		{
1647 17
			return;
1648
		}
1649
1650 146
		if ($this->currentTag->isIgnoreTag())
1651 146
		{
1652 10
			$this->outputIgnoreTag($this->currentTag);
1653 10
		}
1654 141
		elseif ($this->currentTag->isBrTag())
1655
		{
1656
			// Output the tag if it's allowed, ignore it otherwise
1657 7
			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1658 7
			{
1659 6
				$this->outputBrTag($this->currentTag);
1660 6
			}
1661 7
		}
1662 137
		elseif ($this->currentTag->isParagraphBreak())
1663
		{
1664 4
			$this->outputText($this->currentTag->getPos(), 0, true);
1665 4
		}
1666 134
		elseif ($this->currentTag->isVerbatim())
1667
		{
1668 4
			$this->outputVerbatim($this->currentTag);
1669 4
		}
1670 130
		elseif ($this->currentTag->isStartTag())
1671
		{
1672 129
			$this->processStartTag($this->currentTag);
1673 129
		}
1674
		else
1675
		{
1676 95
			$this->processEndTag($this->currentTag);
1677
		}
1678 146
	}
1679
1680
	/**
1681
	* Process given start tag (including self-closing tags) at current position
1682
	*
1683
	* @param  Tag  $tag Start tag (including self-closing)
1684
	* @return void
1685
	*/
1686 129
	protected function processStartTag(Tag $tag)
1687
	{
1688 129
		$tagName   = $tag->getName();
1689 129
		$tagConfig = $this->tagsConfig[$tagName];
1690
1691
		// 1. Check that this tag has not reached its global limit tagLimit
1692
		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1693
		// 3. Apply closeParent, closeAncestor and fosterParent rules
1694
		// 4. Check for nestingLimit
1695
		// 5. Apply requireAncestor rules
1696
		//
1697
		// This order ensures that the tag is valid and within the set limits before we attempt to
1698
		// close parents or ancestors. We need to close ancestors before we can check for nesting
1699
		// limits, whether this tag is allowed within current context (the context may change
1700
		// as ancestors are closed) or whether the required ancestors are still there (they might
1701
		// have been closed by a rule.)
1702 129
		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1703 129
		{
1704 2
			$this->logger->err(
1705 2
				'Tag limit exceeded',
1706
				[
1707 2
					'tag'      => $tag,
1708 2
					'tagName'  => $tagName,
1709 2
					'tagLimit' => $tagConfig['tagLimit']
1710 2
				]
1711 2
			);
1712 2
			$tag->invalidate();
1713
1714 2
			return;
1715
		}
1716
1717 129
		if (!$this->filterTag($tag))
1718 129
		{
1719 2
			$tag->invalidate();
1720
1721 2
			return;
1722
		}
1723
1724 128
		if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1725 128
		{
1726
			// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1727 18
			return;
1728
		}
1729
1730 128
		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1731 128
		{
1732 2
			$this->logger->err(
1733 2
				'Nesting limit exceeded',
1734
				[
1735 2
					'tag'          => $tag,
1736 2
					'tagName'      => $tagName,
1737 2
					'nestingLimit' => $tagConfig['nestingLimit']
1738 2
				]
1739 2
			);
1740 2
			$tag->invalidate();
1741
1742 2
			return;
1743
		}
1744
1745 128
		if (!$this->tagIsAllowed($tagName))
1746 128
		{
1747 7
			$msg     = 'Tag is not allowed in this context';
1748 7
			$context = ['tag' => $tag, 'tagName' => $tagName];
1749 7
			if ($tag->getLen() > 0)
1750 7
			{
1751 6
				$this->logger->warn($msg, $context);
1752 6
			}
1753
			else
1754
			{
1755 1
				$this->logger->debug($msg, $context);
1756
			}
1757 7
			$tag->invalidate();
1758
1759 7
			return;
1760
		}
1761
1762 128
		if ($this->requireAncestor($tag))
1763 128
		{
1764 2
			$tag->invalidate();
1765
1766 2
			return;
1767
		}
1768
1769
		// If this tag has an autoClose rule and it's not paired with an end tag or followed by an
1770
		// end tag, we replace it with a self-closing tag with the same properties
1771 126
		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1772 126
		 && !$tag->getEndTag()
1773 126
		 && !$this->isFollowedByClosingTag($tag))
1774 126
		{
1775 2
			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1776 2
			$newTag->setAttributes($tag->getAttributes());
1777 2
			$newTag->setFlags($tag->getFlags());
1778
1779 2
			$tag = $newTag;
1780 2
		}
1781
1782 126
		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1783 126
		 && !$tag->getEndTag()
1784 126
		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1785 126
		{
1786 1
			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1787 1
		}
1788
1789
		// This tag is valid, output it and update the context
1790 126
		$this->outputTag($tag);
1791 126
		$this->pushContext($tag);
1792
1793
		// Apply the createChild rules if applicable
1794 126
		$this->createChild($tag);
1795 126
	}
1796
1797
	/**
1798
	* Process given end tag at current position
1799
	*
1800
	* @param  Tag  $tag end tag
1801
	* @return void
1802
	*/
1803 95
	protected function processEndTag(Tag $tag)
1804
	{
1805 95
		$tagName = $tag->getName();
1806
1807 95
		if (empty($this->cntOpen[$tagName]))
1808 95
		{
1809
			// This is an end tag with no start tag
1810 9
			return;
1811
		}
1812
1813
		/**
1814
		* @var array List of tags need to be closed before given tag
1815
		*/
1816 94
		$closeTags = [];
1817
1818
		// Iterate through all open tags from last to first to find a match for our tag
1819 94
		$i = count($this->openTags);
1820 94
		while (--$i >= 0)
1821
		{
1822 94
			$openTag = $this->openTags[$i];
1823
1824 94
			if ($tag->canClose($openTag))
1825 94
			{
1826 94
				break;
1827
			}
1828
1829 22
			$closeTags[] = $openTag;
1830 22
			++$this->currentFixingCost;
1831 22
		}
1832
1833 94
		if ($i < 0)
1834 94
		{
1835
			// Did not find a matching tag
1836 2
			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1837
1838 2
			return;
1839
		}
1840
1841
		// Only reopen tags if we haven't exceeded our "fixing" budget
1842 94
		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1843
1844
		// Iterate over tags that are being closed, output their end tag and collect tags to be
1845
		// reopened
1846 94
		$reopenTags = [];
1847 94
		foreach ($closeTags as $openTag)
1848
		{
1849 21
			$openTagName = $openTag->getName();
1850
1851
			// Test whether this tag should be reopened automatically
1852
			if ($keepReopening)
1853 21
			{
1854 19
				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1855 19
				{
1856 12
					$reopenTags[] = $openTag;
1857 12
				}
1858
				else
1859
				{
1860 7
					$keepReopening = false;
1861
				}
1862 19
			}
1863
1864
			// Find the earliest position we can close this open tag
1865 21
			$tagPos = $tag->getPos();
1866 21
			if ($openTag->getFlags() & self::RULE_IGNORE_WHITESPACE)
1867 21
			{
1868 2
				$tagPos = $this->getMagicEndPos($tagPos);
1869 2
			}
1870
1871
			// Output an end tag to close this start tag, then update the context
1872 21
			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1873 21
			$endTag->setFlags($openTag->getFlags());
1874 21
			$this->outputTag($endTag);
1875 21
			$this->popContext();
1876 94
		}
1877
1878
		// Output our tag, moving the cursor past it, then update the context
1879 94
		$this->outputTag($tag);
1880 94
		$this->popContext();
1881
1882
		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1883
		// close tags that are already being closed now. Also, filter our list of tags being
1884
		// reopened by removing those that would immediately be closed
1885 94
		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1886 94
		{
1887
			/**
1888
			* @var integer Rightmost position of the portion of text to ignore
1889
			*/
1890 19
			$ignorePos = $this->pos;
1891
1892 19
			$i = count($this->tagStack);
1893 19
			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1894
			{
1895 15
				$upcomingTag = $this->tagStack[$i];
1896
1897
				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1898
				// strictly an end tag (not a start tag or a self-closing tag)
1899 15
				if ($upcomingTag->getPos() > $ignorePos
1900 15
				 || $upcomingTag->isStartTag())
1901 15
				{
1902 9
					break;
1903
				}
1904
1905
				// Test whether this tag would close any of the tags we're about to reopen
1906 10
				$j = count($closeTags);
1907
1908 10
				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1909
				{
1910 10
					if ($upcomingTag->canClose($closeTags[$j]))
1911 10
					{
1912
						// Remove the tag from the lists and reset the keys
1913 9
						array_splice($closeTags, $j, 1);
1914
1915 9
						if (isset($reopenTags[$j]))
1916 9
						{
1917 7
							array_splice($reopenTags, $j, 1);
1918 7
						}
1919
1920
						// Extend the ignored text to cover this tag
1921 9
						$ignorePos = max(
1922 9
							$ignorePos,
1923 9
							$upcomingTag->getPos() + $upcomingTag->getLen()
1924 9
						);
1925
1926 9
						break;
1927
					}
1928 6
				}
1929 10
			}
1930
1931 19
			if ($ignorePos > $this->pos)
1932 19
			{
1933
				/**
1934
				* @todo have a method that takes (pos,len) rather than a Tag
1935
				*/
1936 8
				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1937 8
			}
1938 19
		}
1939
1940
		// Re-add tags that need to be reopened, at current cursor position
1941 94
		foreach ($reopenTags as $startTag)
1942
		{
1943 8
			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1944
1945
			// Re-pair the new tag
1946 8
			$endTag = $startTag->getEndTag();
1947
			if ($endTag)
1948 8
			{
1949 1
				$newTag->pairWith($endTag);
1950 1
			}
1951 94
		}
1952 94
	}
1953
1954
	/**
1955
	* Update counters and replace current context with its parent context
1956
	*
1957
	* @return void
1958
	*/
1959 94
	protected function popContext()
1960
	{
1961 94
		$tag = array_pop($this->openTags);
1962 94
		--$this->cntOpen[$tag->getName()];
1963 94
		$this->context = $this->context['parentContext'];
1964 94
	}
1965
1966
	/**
1967
	* Update counters and replace current context with a new context based on given tag
1968
	*
1969
	* If given tag is a self-closing tag, the context won't change
1970
	*
1971
	* @param  Tag  $tag Start tag (including self-closing)
1972
	* @return void
1973
	*/
1974 126
	protected function pushContext(Tag $tag)
1975
	{
1976 126
		$tagName   = $tag->getName();
1977 126
		$tagFlags  = $tag->getFlags();
1978 126
		$tagConfig = $this->tagsConfig[$tagName];
1979
1980 126
		++$this->cntTotal[$tagName];
1981
1982
		// If this is a self-closing tag, the context remains the same
1983 126
		if ($tag->isSelfClosingTag())
1984 126
		{
1985 45
			return;
1986
		}
1987
1988
		// Recompute the allowed tags
1989 94
		$allowed = [];
1990 94
		if ($tagFlags & self::RULE_IS_TRANSPARENT)
1991 94
		{
1992 2
			foreach ($this->context['allowed'] as $k => $v)
1993
			{
1994 2
				$allowed[] = $tagConfig['allowed'][$k] & $v;
1995 2
			}
1996 2
		}
1997
		else
1998
		{
1999 93
			foreach ($this->context['allowed'] as $k => $v)
2000
			{
2001 93
				$allowed[] = $tagConfig['allowed'][$k] & (($v & 0xFF00) | ($v >> 8));
2002 93
			}
2003
		}
2004
2005
		// Use this tag's flags as a base for this context and add inherited rules
2006 94
		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
2007
2008
		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
2009 94
		if ($flags & self::RULE_DISABLE_AUTO_BR)
2010 94
		{
2011 2
			$flags &= ~self::RULE_ENABLE_AUTO_BR;
2012 2
		}
2013
2014 94
		++$this->cntOpen[$tagName];
2015 94
		$this->openTags[] = $tag;
2016 94
		$this->context = [
2017 94
			'allowed'       => $allowed,
2018 94
			'flags'         => $flags,
2019 94
			'inParagraph'   => false,
2020 94
			'parentContext' => $this->context
2021 94
		];
2022 94
	}
2023
2024
	/**
2025
	* Return whether given tag is allowed in current context
2026
	*
2027
	* @param  string $tagName
2028
	* @return bool
2029
	*/
2030 128
	protected function tagIsAllowed($tagName)
2031
	{
2032 128
		$n = $this->tagsConfig[$tagName]['bitNumber'];
2033
2034 128
		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
2035
	}
2036
2037
	//==========================================================================
2038
	// Tag stack
2039
	//==========================================================================
2040
2041
	/**
2042
	* Add a start tag
2043
	*
2044
	* @param  string  $name Name of the tag
2045
	* @param  integer $pos  Position of the tag in the text
2046
	* @param  integer $len  Length of text consumed by the tag
2047
	* @param  integer $prio Tag's priority
2048
	* @return Tag
2049
	*/
2050 112
	public function addStartTag($name, $pos, $len, $prio = 0)
2051
	{
2052 112
		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
2053
	}
2054
2055
	/**
2056
	* Add an end tag
2057
	*
2058
	* @param  string  $name Name of the tag
2059
	* @param  integer $pos  Position of the tag in the text
2060
	* @param  integer $len  Length of text consumed by the tag
2061
	* @param  integer $prio Tag's priority
2062
	* @return Tag
2063
	*/
2064 100
	public function addEndTag($name, $pos, $len, $prio = 0)
2065
	{
2066 100
		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
2067
	}
2068
2069
	/**
2070
	* Add a self-closing tag
2071
	*
2072
	* @param  string  $name Name of the tag
2073
	* @param  integer $pos  Position of the tag in the text
2074
	* @param  integer $len  Length of text consumed by the tag
2075
	* @param  integer $prio Tag's priority
2076
	* @return Tag
2077
	*/
2078 65
	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
2079
	{
2080 65
		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
2081
	}
2082
2083
	/**
2084
	* Add a 0-width "br" tag to force a line break at given position
2085
	*
2086
	* @param  integer $pos  Position of the tag in the text
2087
	* @param  integer $prio Tag's priority
2088
	* @return Tag
2089
	*/
2090 9
	public function addBrTag($pos, $prio = 0)
2091
	{
2092 9
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
2093
	}
2094
2095
	/**
2096
	* Add an "ignore" tag
2097
	*
2098
	* @param  integer $pos  Position of the tag in the text
2099
	* @param  integer $len  Length of text consumed by the tag
2100
	* @param  integer $prio Tag's priority
2101
	* @return Tag
2102
	*/
2103 12
	public function addIgnoreTag($pos, $len, $prio = 0)
2104
	{
2105 12
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
2106
	}
2107
2108
	/**
2109
	* Add a paragraph break at given position
2110
	*
2111
	* Uses a zero-width tag that is actually never output in the result
2112
	*
2113
	* @param  integer $pos  Position of the tag in the text
2114
	* @param  integer $prio Tag's priority
2115
	* @return Tag
2116
	*/
2117 5
	public function addParagraphBreak($pos, $prio = 0)
2118
	{
2119 5
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
2120
	}
2121
2122
	/**
2123
	* Add a copy of given tag at given position and length
2124
	*
2125
	* @param  Tag     $tag  Original tag
2126
	* @param  integer $pos  Copy's position
2127
	* @param  integer $len  Copy's length
2128
	* @param  integer $prio Copy's priority (same as original by default)
2129
	* @return Tag           Copy tag
2130
	*/
2131 20
	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
2132
	{
2133 20
		if (!isset($prio))
2134 20
		{
2135 11
			$prio = $tag->getSortPriority();
2136 11
		}
2137 20
		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
2138 20
		$copy->setAttributes($tag->getAttributes());
2139
2140 20
		return $copy;
2141
	}
2142
2143
	/**
2144
	* Add a tag
2145
	*
2146
	* @param  integer $type Tag's type
2147
	* @param  string  $name Name of the tag
2148
	* @param  integer $pos  Position of the tag in the text
2149
	* @param  integer $len  Length of text consumed by the tag
2150
	* @param  integer $prio Tag's priority
2151
	* @return Tag
2152
	*/
2153 172
	protected function addTag($type, $name, $pos, $len, $prio)
2154
	{
2155
		// Create the tag
2156 172
		$tag = new Tag($type, $name, $pos, $len, $prio);
2157
2158
		// Set this tag's rules bitfield
2159 172
		if (isset($this->tagsConfig[$name]))
2160 172
		{
2161 152
			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
2162 152
		}
2163
2164
		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
2165
		// position is negative or if it's out of bounds
2166 172
		if (!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
2167 172
		{
2168 2
			$tag->invalidate();
2169 2
		}
2170 170
		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
2171
		{
2172 1
			$this->logger->warn(
2173 1
				'Tag is disabled',
2174
				[
2175 1
					'tag'     => $tag,
2176
					'tagName' => $name
2177 1
				]
2178 1
			);
2179 1
			$tag->invalidate();
2180 1
		}
2181 169
		elseif ($len < 0 || $pos < 0 || $pos + $len > $this->textLen)
2182
		{
2183 6
			$tag->invalidate();
2184 6
		}
2185
		else
2186
		{
2187 165
			$this->insertTag($tag);
2188
		}
2189
2190 172
		return $tag;
2191
	}
2192
2193
	/**
2194
	* Insert given tag in the tag stack
2195
	*
2196
	* @param  Tag  $tag
2197
	* @return void
2198
	*/
2199 165
	protected function insertTag(Tag $tag)
2200
	{
2201 165
		if (!$this->tagStackIsSorted)
2202 165
		{
2203 165
			$this->tagStack[] = $tag;
2204 165
		}
2205
		else
2206
		{
2207
			// Scan the stack and copy every tag to the next slot until we find the correct index
2208 43
			$i = count($this->tagStack);
2209 43
			while ($i > 0 && self::compareTags($this->tagStack[$i - 1], $tag) > 0)
2210
			{
2211 3
				$this->tagStack[$i] = $this->tagStack[$i - 1];
2212 3
				--$i;
2213 3
			}
2214 43
			$this->tagStack[$i] = $tag;
2215
		}
2216 165
	}
2217
2218
	/**
2219
	* Add a pair of tags
2220
	*
2221
	* @param  string  $name     Name of the tags
2222
	* @param  integer $startPos Position of the start tag
2223
	* @param  integer $startLen Length of the start tag
2224
	* @param  integer $endPos   Position of the start tag
2225
	* @param  integer $endLen   Length of the start tag
2226
	* @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
2227
	* @return Tag               Start tag
2228
	*/
2229 19
	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
2230
	{
2231
		// NOTE: the end tag is added first to try to keep the stack in the correct order
2232 19
		$endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
2233 19
		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
2234 19
		$startTag->pairWith($endTag);
2235
2236 19
		return $startTag;
2237
	}
2238
2239
	/**
2240
	* Add a tag that represents a verbatim copy of the original text
2241
	*
2242
	* @param  integer $pos  Position of the tag in the text
2243
	* @param  integer $len  Length of text consumed by the tag
2244
	* @param  integer $prio Tag's priority
2245
	* @return Tag
2246
	*/
2247 4
	public function addVerbatim($pos, $len, $prio = 0)
2248
	{
2249 4
		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
2250
	}
2251
2252
	/**
2253
	* Sort tags by position and precedence
2254
	*
2255
	* @return void
2256
	*/
2257 152
	protected function sortTags()
2258
	{
2259 152
		usort($this->tagStack, __CLASS__ . '::compareTags');
2260 152
		$this->tagStackIsSorted = true;
2261 152
	}
2262
2263
	/**
2264
	* sortTags() callback
2265
	*
2266
	* Tags are stored as a stack, in LIFO order. We sort tags by position _descending_ so that they
2267
	* are processed in the order they appear in the text.
2268
	*
2269
	* @param  Tag     $a First tag to compare
2270
	* @param  Tag     $b Second tag to compare
2271
	* @return integer
2272
	*/
2273 115
	protected static function compareTags(Tag $a, Tag $b)
2274
	{
2275 115
		$aPos = $a->getPos();
2276 115
		$bPos = $b->getPos();
2277
2278
		// First we order by pos descending
2279 115
		if ($aPos !== $bPos)
2280 115
		{
2281 108
			return $bPos - $aPos;
2282
		}
2283
2284
		// If the tags start at the same position, we'll use their sortPriority if applicable. Tags
2285
		// with a lower value get sorted last, which means they'll be processed first. IOW, -10 is
2286
		// processed before 10
2287 43
		if ($a->getSortPriority() !== $b->getSortPriority())
2288 43
		{
2289 15
			return $b->getSortPriority() - $a->getSortPriority();
2290
		}
2291
2292
		// If the tags start at the same position and have the same priority, we'll sort them
2293
		// according to their length, with special considerations for  zero-width tags
2294 31
		$aLen = $a->getLen();
2295 31
		$bLen = $b->getLen();
2296
2297 31
		if (!$aLen || !$bLen)
2298 31
		{
2299
			// Zero-width end tags are ordered after zero-width start tags so that a pair that ends
2300
			// with a zero-width tag has the opportunity to be closed before another pair starts
2301
			// with a zero-width tag. For example, the pairs that would enclose each of the letters
2302
			// in the string "XY". Self-closing tags are ordered between end tags and start tags in
2303
			// an attempt to keep them out of tag pairs
2304 29
			if (!$aLen && !$bLen)
2305 29
			{
2306
				$order = [
2307 16
					Tag::END_TAG          => 0,
2308 16
					Tag::SELF_CLOSING_TAG => 1,
2309 16
					Tag::START_TAG        => 2
2310 16
				];
2311
2312 16
				return $order[$b->getType()] - $order[$a->getType()];
2313
			}
2314
2315
			// Here, we know that only one of $a or $b is a zero-width tags. Zero-width tags are
2316
			// ordered after wider tags so that they have a chance to be processed before the next
2317
			// character is consumed, which would force them to be skipped
2318 13
			return ($aLen) ? -1 : 1;
2319
		}
2320
2321
		// Here we know that both tags start at the same position and have a length greater than 0.
2322
		// We sort tags by length ascending, so that the longest matches are processed first. If
2323
		// their length is identical, the order is undefined as PHP's sort isn't stable
2324 2
		return $aLen - $bLen;
2325
	}
2326
}