Completed
Push — master ( cb0c4d...f4be8e )
by Josh
25:50
created

Parser::processEmphasisBlock()   F

Complexity

Conditions 16
Paths 1441

Size

Total Lines 66
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 50
CRAP Score 16

Importance

Changes 0
Metric Value
dl 0
loc 66
ccs 50
cts 50
cp 1
rs 2.7057
c 0
b 0
f 0
cc 16
eloc 36
nc 1441
nop 1
crap 16

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Litedown;
9
10
use s9e\TextFormatter\Parser as Rules;
11
use s9e\TextFormatter\Parser\Tag;
12
use s9e\TextFormatter\Plugins\ParserBase;
13
14
class Parser extends ParserBase
15
{
16
	/**
17
	* @var bool Whether current text contains escape characters
18
	*/
19
	protected $hasEscapedChars;
20
21
	/**
22
	* @var bool Whether current text contains references
23
	*/
24
	protected $hasRefs;
25
26
	/**
27
	* @var array Array of [label => link info]
28
	*/
29
	protected $refs;
30
31
	/**
32
	* @var string Text being parsed
33
	*/
34
	protected $text;
35
36
	/**
37
	* {@inheritdoc}
38
	*/
39 263
	public function parse($text, array $matches)
40
	{
41 263
		$this->init($text);
42
43
		// Match block-level markup as well as forced line breaks
44 263
		$this->matchBlockLevelMarkup();
45
46
		// Capture link references after block markup as been overwritten
47 263
		$this->matchLinkReferences();
48
49
		// Inline code must be done first to avoid false positives in other inline markup
50 263
		$this->matchInlineCode();
51
52
		// Do the rest of inline markup. Images must be matched before links
53 263
		$this->matchImages();
54 263
		$this->matchLinks();
55 263
		$this->matchStrikethrough();
56 263
		$this->matchSuperscript();
57 263
		$this->matchEmphasis();
58 263
		$this->matchForcedLineBreaks();
59
60
		// Unset the text to free its memory
61 263
		unset($this->text);
62 263
	}
63
64
	/**
65
	* Add an image tag for given text span
66
	*
67
	* @param  integer $startTagPos Start tag position
68
	* @param  integer $endTagPos   End tag position
69
	* @param  integer $endTagLen   End tag length
70
	* @param  string  $linkInfo    URL optionally followed by space and a title
71
	* @param  string  $alt         Value for the alt attribute
72
	* @return void
73
	*/
74 24
	protected function addImageTag($startTagPos, $endTagPos, $endTagLen, $linkInfo, $alt)
75
	{
76 24
		$tag = $this->parser->addTagPair('IMG', $startTagPos, 2, $endTagPos, $endTagLen);
77 24
		$this->setLinkAttributes($tag, $linkInfo, 'src');
78 24
		$tag->setAttribute('alt', $this->decode($alt));
79
80
		// Overwrite the markup
81 24
		$this->overwrite($startTagPos, $endTagPos + $endTagLen - $startTagPos);
82 24
	}
83
84
	/**
85
	* Add the tag pair for an inline code span
86
	*
87
	* @param  array $left  Left marker
88
	* @param  array $right Right marker
89
	* @return void
90
	*/
91 21
	protected function addInlineCodeTags($left, $right)
92
	{
93 21
		$startTagPos = $left['pos'];
94 21
		$startTagLen = $left['len'] + $left['trimAfter'];
95 21
		$endTagPos   = $right['pos'] - $right['trimBefore'];
96 21
		$endTagLen   = $right['len'] + $right['trimBefore'];
97 21
		$this->parser->addTagPair('C', $startTagPos, $startTagLen, $endTagPos, $endTagLen);
98 21
		$this->overwrite($startTagPos, $endTagPos + $endTagLen - $startTagPos);
99 21
	}
100
101
	/**
102
	* Add an image tag for given text span
103
	*
104
	* @param  integer $startTagPos Start tag position
105
	* @param  integer $endTagPos   End tag position
106
	* @param  integer $endTagLen   End tag length
107
	* @param  string  $linkInfo    URL optionally followed by space and a title
108
	* @return void
109
	*/
110 57
	protected function addLinkTag($startTagPos, $endTagPos, $endTagLen, $linkInfo)
111
	{
112
		// Give the link a slightly worse priority if this is a implicit reference and a slightly
113
		// better priority if it's an explicit reference or an inline link or to give it precedence
114
		// over possible BBCodes such as [b](https://en.wikipedia.org/wiki/B)
115 57
		$priority = ($endTagLen === 1) ? 1 : -1;
116
117 57
		$tag = $this->parser->addTagPair('URL', $startTagPos, 1, $endTagPos, $endTagLen, $priority);
118 57
		$this->setLinkAttributes($tag, $linkInfo, 'url');
119
120
		// Overwrite the markup without touching the link's text
121 57
		$this->overwrite($startTagPos, 1);
122 57
		$this->overwrite($endTagPos,   $endTagLen);
123 57
	}
124
125
	/**
126
	* Close a list at given offset
127
	*
128
	* @param  array   $list
129
	* @param  integer $textBoundary
130
	* @return void
131
	*/
132 27
	protected function closeList(array $list, $textBoundary)
133
	{
134 27
		$this->parser->addEndTag('LIST', $textBoundary, 0)->pairWith($list['listTag']);
135 27
		$this->parser->addEndTag('LI',   $textBoundary, 0)->pairWith($list['itemTag']);
136
137 27
		if ($list['tight'])
138 27
		{
139 25
			foreach ($list['itemTags'] as $itemTag)
140
			{
141 25
				$itemTag->removeFlags(Rules::RULE_CREATE_PARAGRAPHS);
142 25
			}
143 25
		}
144 27
	}
145
146
	/**
147
	* Compute the amount of text to ignore at the start of a quote line
148
	*
149
	* @param  string  $str           Original quote markup
150
	* @param  integer $maxQuoteDepth Maximum quote depth
151
	* @return integer                Number of characters to ignore
152
	*/
153 4
	protected function computeQuoteIgnoreLen($str, $maxQuoteDepth)
154
	{
155 4
		$remaining = $str;
156 4
		while (--$maxQuoteDepth >= 0)
157
		{
158 3
			$remaining = preg_replace('/^ *> ?/', '', $remaining);
159 3
		}
160
161 4
		return strlen($str) - strlen($remaining);
162
	}
163
164
	/**
165
	* Decode a chunk of encoded text to be used as an attribute value
166
	*
167
	* Decodes escaped literals and removes slashes and 0x1A characters
168
	*
169
	* @param  string $str Encoded text
170
	* @return string      Decoded text
171
	*/
172 69
	protected function decode($str)
173
	{
174 69
		if ($this->config['decodeHtmlEntities'] && strpos($str, '&') !== false)
175 69
		{
176 1
			$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
177 1
		}
178 69
		$str = str_replace("\x1A", '', $str);
179
180 69
		if ($this->hasEscapedChars)
181 69
		{
182 7
			$str = strtr(
183 7
				$str,
184
				[
185 7
					"\x1B0" => '!', "\x1B1" => '"', "\x1B2" => "'", "\x1B3" => '(',
186 7
					"\x1B4" => ')', "\x1B5" => '*', "\x1B6" => '[', "\x1B7" => '\\',
187 7
					"\x1B8" => ']', "\x1B9" => '^', "\x1BA" => '_', "\x1BB" => '`',
188
					"\x1BC" => '~'
189 7
				]
190 7
			);
191 7
		}
192
193 69
		return $str;
194
	}
195
196
	/**
197
	* Encode escaped literals that have a special meaning
198
	*
199
	* @param  string $str Original text
200
	* @return string      Encoded text
201
	*/
202 15
	protected function encode($str)
203
	{
204 15
		return strtr(
205 15
			$str,
206
			[
207 15
				'\\!' => "\x1B0", '\\"' => "\x1B1", "\\'" => "\x1B2", '\\('  => "\x1B3",
208 15
				'\\)' => "\x1B4", '\\*' => "\x1B5", '\\[' => "\x1B6", '\\\\' => "\x1B7",
209 15
				'\\]' => "\x1B8", '\\^' => "\x1B9", '\\_' => "\x1BA", '\\`'  => "\x1BB",
210
				'\\~' => "\x1BC"
211 15
			]
212 15
		);
213
	}
214
215
	/**
216
	* Return the length of the markup at the end of an ATX header
217
	*
218
	* @param  integer $startPos Start of the header's text
219
	* @param  integer $endPos   End of the header's text
220
	* @return integer
221
	*/
222 17
	protected function getAtxHeaderEndTagLen($startPos, $endPos)
223
	{
224 17
		$content = substr($this->text, $startPos, $endPos - $startPos);
225 17
		preg_match('/[ \\t]*#*[ \\t]*$/', $content, $m);
226
227 17
		return strlen($m[0]);
228
	}
229
230
	/**
231
	* Capture lines that contain a Setext-tyle header
232
	*
233
	* @return array
234
	*/
235 263
	protected function getSetextLines()
236
	{
237 263
		$setextLines = [];
238
239 263
		if (strpos($this->text, '-') === false && strpos($this->text, '=') === false)
240 263
		{
241 231
			return $setextLines;
242
		}
243
244
		// Capture the any series of - or = alone on a line, optionally preceded with the
245
		// angle brackets notation used in blockquotes
246 32
		$regexp = '/^(?=[-=>])(?:> ?)*(?=[-=])(?:-+|=+) *$/m';
247 32
		if (preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE))
248 32
		{
249 19
			foreach ($matches[0] as list($match, $matchPos))
250
			{
251
				// Compute the position of the end tag. We start on the LF character before the
252
				// match and keep rewinding until we find a non-space character
253 19
				$endTagPos = $matchPos - 1;
254 19
				while ($endTagPos > 0 && $this->text[$endTagPos - 1] === ' ')
255
				{
256 5
					--$endTagPos;
257 5
				}
258
259
				// Store at the offset of the LF character
260 19
				$setextLines[$matchPos - 1] = [
261 19
					'endTagLen'  => $matchPos + strlen($match) - $endTagPos,
262 19
					'endTagPos'  => $endTagPos,
263 19
					'quoteDepth' => substr_count($match, '>'),
264 19
					'tagName'    => ($match[0] === '=') ? 'H1' : 'H2'
265 19
				];
266 19
			}
267 19
		}
268
269 32
		return $setextLines;
270
	}
271
272
	/**
273
	* Get emphasis markup split by block
274
	*
275
	* @param  string  $regexp Regexp used to match emphasis
276
	* @param  integer $pos    Position in the text of the first emphasis character
277
	* @return array[]         Each array contains a list of [matchPos, matchLen] pairs
278
	*/
279 60
	protected function getEmphasisByBlock($regexp, $pos)
280
	{
281 60
		$block    = [];
282 60
		$blocks   = [];
283 60
		$breakPos = strpos($this->text, "\x17", $pos);
284
285 60
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE, $pos);
286 60
		foreach ($matches[0] as $m)
287
		{
288 60
			$matchPos = $m[1];
289 60
			$matchLen = strlen($m[0]);
290
291
			// Test whether we've just passed the limits of a block
292 60
			if ($matchPos > $breakPos)
293 60
			{
294 9
				$blocks[] = $block;
295 9
				$block    = [];
296 9
				$breakPos = strpos($this->text, "\x17", $matchPos);
297 9
			}
298
299
			// Test whether we should ignore this markup
300 60
			if (!$this->ignoreEmphasis($matchPos, $matchLen))
301 60
			{
302 59
				$block[] = [$matchPos, $matchLen];
303 59
			}
304 60
		}
305 60
		$blocks[] = $block;
306
307 60
		return $blocks;
308
	}
309
310
	/**
311
	* Capture and return inline code markers
312
	*
313
	* @return array
314
	*/
315 263
	protected function getInlineCodeMarkers()
316
	{
317 263
		$pos = strpos($this->text, '`');
318 263
		if ($pos === false)
319 263
		{
320 239
			return [];
321
		}
322
323 24
		preg_match_all(
324 24
			'/(`+)(\\s*)[^\\x17`]*/',
325 24
			str_replace("\x1BB", '\\`', $this->text),
326 24
			$matches,
327 24
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER,
328
			$pos
329 24
		);
330 24
		$trimNext = 0;
331 24
		$markers  = [];
332 24
		foreach ($matches as $m)
333
		{
334 24
			$markers[] = [
335 24
				'pos'        => $m[0][1],
336 24
				'len'        => strlen($m[1][0]),
337 24
				'trimBefore' => $trimNext,
338 24
				'trimAfter'  => strlen($m[2][0]),
339 24
				'next'       => $m[0][1] + strlen($m[0][0])
340 24
			];
341 24
			$trimNext = strlen($m[0][0]) - strlen(rtrim($m[0][0]));
342 24
		}
343
344 24
		return $markers;
345
	}
346
347
	/**
348
	* Capture and return labels used in current text
349
	*
350
	* @return array Labels' text position as keys, lowercased text content as values
351
	*/
352 26
	protected function getLabels()
353
	{
354 26
		preg_match_all(
355 26
			'/\\[((?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*)\\]/',
356 26
			$this->text,
357 26
			$matches,
358
			PREG_OFFSET_CAPTURE
359 26
		);
360 26
		$labels = [];
361 26
		foreach ($matches[1] as $m)
362
		{
363 26
			$labels[$m[1] - 1] = strtolower($m[0]);
364 26
		}
365
366 26
		return $labels;
367
	}
368
369
	/**
370
	* Test whether emphasis should be ignored at the given position in the text
371
	*
372
	* @param  integer $matchPos Position of the emphasis in the text
373
	* @param  integer $matchLen Length of the emphasis
374
	* @return bool
375
	*/
376 60
	protected function ignoreEmphasis($matchPos, $matchLen)
377
	{
378
		// Ignore single underscores between alphanumeric characters
379 60
		return ($this->text[$matchPos] === '_' && $matchLen === 1 && $this->isSurroundedByAlnum($matchPos, $matchLen));
380
	}
381
382
	/**
383
	* Initialize this parser with given text
384
	*
385
	* @param  string $text Text to be parsed
386
	* @return void
387
	*/
388 263
	protected function init($text)
389
	{
390 263
		if (strpos($text, '\\') === false || !preg_match('/\\\\[!"\'()*[\\\\\\]^_`~]/', $text))
391 263
		{
392 248
			$this->hasEscapedChars = false;
393 248
		}
394
		else
395
		{
396 15
			$this->hasEscapedChars = true;
397
398
			// Encode escaped literals that have a special meaning otherwise, so that we don't have
399
			// to take them into account in regexps
400 15
			$text = $this->encode($text);
401
		}
402
403
		// We append a couple of lines and a non-whitespace character at the end of the text in
404
		// order to trigger the closure of all open blocks such as quotes and lists
405 263
		$text .= "\n\n\x17";
406
407 263
		$this->text = $text;
408 263
	}
409
410
	/**
411
	* Test whether given position is preceded by whitespace
412
	*
413
	* @param  integer $pos
414
	* @return bool
415
	*/
416 59
	protected function isAfterWhitespace($pos)
417
	{
418 59
		return ($pos > 0 && $this->isWhitespace($this->text[$pos - 1]));
419
	}
420
421
	/**
422
	* Test whether given character is alphanumeric
423
	*
424
	* @param  string $chr
425
	* @return bool
426
	*/
427 8
	protected function isAlnum($chr)
428
	{
429 8
		return (strpos(' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', $chr) > 0);
430
	}
431
432
	/**
433
	* Test whether given position is followed by whitespace
434
	*
435
	* @param  integer $pos
436
	* @return bool
437
	*/
438 59
	protected function isBeforeWhitespace($pos)
439
	{
440 59
		return $this->isWhitespace($this->text[$pos + 1]);
441
	}
442
443
	/**
444
	* Test whether a length of text is surrounded by alphanumeric characters
445
	*
446
	* @param  integer $matchPos Start of the text
447
	* @param  integer $matchLen Length of the text
448
	* @return bool
449
	*/
450 8
	protected function isSurroundedByAlnum($matchPos, $matchLen)
451
	{
452 8
		return ($matchPos > 0 && $this->isAlnum($this->text[$matchPos - 1]) && $this->isAlnum($this->text[$matchPos + $matchLen]));
453
	}
454
455
	/**
456
	* Test whether given character is an ASCII whitespace character
457
	*
458
	* NOTE: newlines are normalized to LF before parsing so we don't have to check for CR
459
	*
460
	* @param  string $chr
461
	* @return bool
462
	*/
463 59
	protected function isWhitespace($chr)
464
	{
465 59
		return (strpos(" \n\t", $chr) !== false);
466
	}
467
468
	/**
469
	* Mark the boundary of a block in the original text
470
	*
471
	* @param  integer $pos
472
	* @return void
473
	*/
474 263
	protected function markBoundary($pos)
475
	{
476 263
		$this->text[$pos] = "\x17";
477 263
	}
478
479
	/**
480
	* Match block-level markup, as well as forced line breaks and headers
481
	*
482
	* @return void
483
	*/
484 263
	protected function matchBlockLevelMarkup()
485
	{
486 263
		$codeFence    = null;
487 263
		$codeIndent   = 4;
488 263
		$codeTag      = null;
489 263
		$lineIsEmpty  = true;
490 263
		$lists        = [];
491 263
		$listsCnt     = 0;
492 263
		$newContext   = false;
493 263
		$quotes       = [];
494 263
		$quotesCnt    = 0;
495 263
		$setextLines  = $this->getSetextLines();
496 263
		$textBoundary = 0;
497
498 263
		$regexp = '/^(?:(?=[-*+\\d \\t>`~#_])((?: {0,3}> ?)+)?([ \\t]+)?(\\* *\\* *\\*[* ]*$|- *- *-[- ]*$|_ *_ *_[_ ]*$|=+$)?((?:[-*+]|\\d+\\.)[ \\t]+(?=\\S))?[ \\t]*(#{1,6}[ \\t]+|```+[^`\\n]*$|~~~+[^~\\n]*$)?)?/m';
499 263
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
500
501 263
		foreach ($matches as $m)
502
		{
503 263
			$matchPos   = $m[0][1];
504 263
			$matchLen   = strlen($m[0][0]);
505 263
			$ignoreLen  = 0;
506 263
			$quoteDepth = 0;
507
508
			// If the last line was empty then this is not a continuation, and vice-versa
509 263
			$continuation = !$lineIsEmpty;
510
511
			// Capture the position of the end of the line and determine whether the line is empty
512 263
			$lfPos       = strpos($this->text, "\n", $matchPos);
513 263
			$lineIsEmpty = ($lfPos === $matchPos + $matchLen && empty($m[3][0]) && empty($m[4][0]) && empty($m[5][0]));
514
515
			// If the line is empty and it's the first empty line then we break current paragraph.
516 263
			$breakParagraph = ($lineIsEmpty && $continuation);
517
518
			// Count quote marks
519 263
			if (!empty($m[1][0]))
520 263
			{
521 31
				$quoteDepth = substr_count($m[1][0], '>');
522 31
				$ignoreLen  = strlen($m[1][0]);
523 31
				if (isset($codeTag) && $codeTag->hasAttribute('quoteDepth'))
524 31
				{
525 4
					$quoteDepth = min($quoteDepth, $codeTag->getAttribute('quoteDepth'));
526 4
					$ignoreLen  = $this->computeQuoteIgnoreLen($m[1][0], $quoteDepth);
527 4
				}
528
529
				// Overwrite quote markup
530 31
				$this->overwrite($matchPos, $ignoreLen);
531 31
			}
532
533
			// Close supernumerary quotes
534 263
			if ($quoteDepth < $quotesCnt && !$continuation)
535 263
			{
536 30
				$newContext = true;
537
538
				do
539
				{
540 30
					$this->parser->addEndTag('QUOTE', $textBoundary, 0)
541 30
					             ->pairWith(array_pop($quotes));
542
				}
543 30
				while ($quoteDepth < --$quotesCnt);
544 30
			}
545
546
			// Open new quotes
547 263
			if ($quoteDepth > $quotesCnt && !$lineIsEmpty)
548 263
			{
549 30
				$newContext = true;
550
551
				do
552
				{
553 30
					$tag = $this->parser->addStartTag('QUOTE', $matchPos, 0, $quotesCnt - 999);
554 30
					$quotes[] = $tag;
555
				}
556 30
				while ($quoteDepth > ++$quotesCnt);
557 30
			}
558
559
			// Compute the width of the indentation
560 263
			$indentWidth = 0;
561 263
			$indentPos   = 0;
562 263
			if (!empty($m[2][0]) && !$codeFence)
563 263
			{
564 39
				$indentStr = $m[2][0];
565 39
				$indentLen = strlen($indentStr);
566
				do
567
				{
568 39
					if ($indentStr[$indentPos] === ' ')
569 39
					{
570 37
						++$indentWidth;
571 37
					}
572
					else
573
					{
574 4
						$indentWidth = ($indentWidth + 4) & ~3;
575
					}
576
				}
577 39
				while (++$indentPos < $indentLen && $indentWidth < $codeIndent);
578 39
			}
579
580
			// Test whether we're out of a code block
581 263
			if (isset($codeTag) && !$codeFence && $indentWidth < $codeIndent && !$lineIsEmpty)
582 263
			{
583 18
				$newContext = true;
584 18
			}
585
586
			if ($newContext)
587 263
			{
588 45
				$newContext = false;
589
590
				// Close the code block if applicable
591 45
				if (isset($codeTag))
592 45
				{
593 18
					if ($textBoundary > $codeTag->getPos())
594 18
					{
595
						// Overwrite the whole block
596 16
						$this->overwrite($codeTag->getPos(), $textBoundary - $codeTag->getPos());
597
598 16
						$endTag = $this->parser->addEndTag('CODE', $textBoundary, 0, -1);
599 16
						$endTag->pairWith($codeTag);
600 16
					}
601
					else
602
					{
603
						// The code block is empty
604 2
						$codeTag->invalidate();
605
					}
606
607 18
					$codeTag = null;
608 18
					$codeFence = null;
609 18
				}
610
611
				// Close all the lists
612 45
				foreach ($lists as $list)
613
				{
614 2
					$this->closeList($list, $textBoundary);
615 45
				}
616 45
				$lists    = [];
617 45
				$listsCnt = 0;
618
619
				// Mark the block boundary
620
				if ($matchPos)
621 45
				{
622 45
					$this->markBoundary($matchPos - 1);
623 45
				}
624 45
			}
625
626 263
			if ($indentWidth >= $codeIndent)
627 263
			{
628 19
				if (isset($codeTag) || !$continuation)
629 19
				{
630
					// Adjust the amount of text being ignored
631 18
					$ignoreLen += $indentPos;
632
633 18
					if (!isset($codeTag))
634 18
					{
635
						// Create code block
636 18
						$codeTag = $this->parser->addStartTag('CODE', $matchPos + $ignoreLen, 0, -999);
637 18
					}
638
639
					// Clear the captures to prevent any further processing
640 18
					$m = [];
641 18
				}
642 19
			}
643
			else
644
			{
645 263
				$hasListItem = !empty($m[4][0]);
646
647 263
				if (!$indentWidth && !$continuation && !$hasListItem)
648 263
				{
649
					// Start of a new context
650 263
					$listIndex = -1;
651 263
				}
652 262
				elseif ($continuation && !$hasListItem)
653
				{
654
					// Continuation of current list item or paragraph
655 262
					$listIndex = $listsCnt - 1;
656 262
				}
657 30
				elseif (!$listsCnt)
658
				{
659
					// We're not inside of a list already, we can start one if there's a list item
660
					// and it's either not in continuation of a paragraph or immediately after a
661
					// block
662 30
					if ($hasListItem && (!$continuation || $this->text[$matchPos - 1] === "\x17"))
663 30
					{
664
						// Start of a new list
665 27
						$listIndex = 0;
666 27
					}
667
					else
668
					{
669
						// We're in a normal paragraph
670 4
						$listIndex = -1;
671
					}
672 30
				}
673
				else
674
				{
675
					// We're inside of a list but we need to compute the depth
676 20
					$listIndex = 0;
677 20
					while ($listIndex < $listsCnt && $indentWidth > $lists[$listIndex]['maxIndent'])
678
					{
679 6
						++$listIndex;
680 6
					}
681
				}
682
683
				// Close deeper lists
684 263
				while ($listIndex < $listsCnt - 1)
685
				{
686 26
					$this->closeList(array_pop($lists), $textBoundary);
687 26
					--$listsCnt;
688 26
				}
689
690
				// If there's no list item at current index, we'll need to either create one or
691
				// drop down to previous index, in which case we have to adjust maxIndent
692 263
				if ($listIndex === $listsCnt && !$hasListItem)
693 263
				{
694 1
					--$listIndex;
695 1
				}
696
697 263
				if ($hasListItem && $listIndex >= 0)
698 263
				{
699 27
					$breakParagraph = true;
700
701
					// Compute the position and amount of text consumed by the item tag
702 27
					$tagPos = $matchPos + $ignoreLen + $indentPos;
703 27
					$tagLen = strlen($m[4][0]);
704
705
					// Create a LI tag that consumes its markup
706 27
					$itemTag = $this->parser->addStartTag('LI', $tagPos, $tagLen);
707
708
					// Overwrite the markup
709 27
					$this->overwrite($tagPos, $tagLen);
710
711
					// If the list index is within current lists count it means this is not a new
712
					// list and we have to close the last item. Otherwise, it's a new list that we
713
					// have to create
714 27
					if ($listIndex < $listsCnt)
715 27
					{
716 20
						$this->parser->addEndTag('LI', $textBoundary, 0)
717 20
						             ->pairWith($lists[$listIndex]['itemTag']);
718
719
						// Record the item in the list
720 20
						$lists[$listIndex]['itemTag']    = $itemTag;
721 20
						$lists[$listIndex]['itemTags'][] = $itemTag;
722 20
					}
723
					else
724
					{
725 27
						++$listsCnt;
726
727
						if ($listIndex)
728 27
						{
729 5
							$minIndent = $lists[$listIndex - 1]['maxIndent'] + 1;
730 5
							$maxIndent = max($minIndent, $listIndex * 4);
731 5
						}
732
						else
733
						{
734 27
							$minIndent = 0;
735 27
							$maxIndent = $indentWidth;
736
						}
737
738
						// Create a 0-width LIST tag right before the item tag LI
739 27
						$listTag = $this->parser->addStartTag('LIST', $tagPos, 0);
740
741
						// Test whether the list item ends with a dot, as in "1."
742 27
						if (strpos($m[4][0], '.') !== false)
743 27
						{
744 10
							$listTag->setAttribute('type', 'decimal');
745
746 10
							$start = (int) $m[4][0];
747 10
							if ($start !== 1)
748 10
							{
749 2
								$listTag->setAttribute('start', $start);
750 2
							}
751 10
						}
752
753
						// Record the new list depth
754 27
						$lists[] = [
755 27
							'listTag'   => $listTag,
756 27
							'itemTag'   => $itemTag,
757 27
							'itemTags'  => [$itemTag],
758 27
							'minIndent' => $minIndent,
759 27
							'maxIndent' => $maxIndent,
760
							'tight'     => true
761 27
						];
762
					}
763 27
				}
764
765
				// If we're in a list, on a non-empty line preceded with a blank line...
766 263
				if ($listsCnt && !$continuation && !$lineIsEmpty)
767 263
				{
768
					// ...and this is not the first item of the list...
769 23
					if (count($lists[0]['itemTags']) > 1 || !$hasListItem)
770 23
					{
771
						// ...every list that is currently open becomes loose
772 5
						foreach ($lists as &$list)
773
						{
774 5
							$list['tight'] = false;
775 5
						}
776 5
						unset($list);
777 5
					}
778 23
				}
779
780 263
				$codeIndent = ($listsCnt + 1) * 4;
781
			}
782
783 263
			if (isset($m[5]))
784 263
			{
785
				// Headers
786 36
				if ($m[5][0][0] === '#')
787 36
				{
788 17
					$startTagLen = strlen($m[5][0]);
789 17
					$startTagPos = $matchPos + $matchLen - $startTagLen;
790 17
					$endTagLen   = $this->getAtxHeaderEndTagLen($matchPos + $matchLen, $lfPos);
791 17
					$endTagPos   = $lfPos - $endTagLen;
792
793 17
					$this->parser->addTagPair('H' . strspn($m[5][0], '#', 0, 6), $startTagPos, $startTagLen, $endTagPos, $endTagLen);
794
795
					// Mark the start and the end of the header as boundaries
796 17
					$this->markBoundary($startTagPos);
797 17
					$this->markBoundary($lfPos);
798
799
					if ($continuation)
800 17
					{
801 2
						$breakParagraph = true;
802 2
					}
803 17
				}
804
				// Code fence
805 19
				elseif ($m[5][0][0] === '`' || $m[5][0][0] === '~')
806
				{
807 19
					$tagPos = $matchPos + $ignoreLen;
808 19
					$tagLen = $lfPos - $tagPos;
809
810 19
					if (isset($codeTag) && $m[5][0] === $codeFence)
811 19
					{
812 19
						$endTag = $this->parser->addEndTag('CODE', $tagPos, $tagLen, -1);
813 19
						$endTag->pairWith($codeTag);
814
815 19
						$this->parser->addIgnoreTag($textBoundary, $tagPos - $textBoundary);
816
817
						// Overwrite the whole block
818 19
						$this->overwrite($codeTag->getPos(), $tagPos + $tagLen - $codeTag->getPos());
819 19
						$codeTag = null;
820 19
						$codeFence = null;
821 19
					}
822 19
					elseif (!isset($codeTag))
823
					{
824
						// Create code block
825 19
						$codeTag   = $this->parser->addStartTag('CODE', $tagPos, $tagLen);
826 19
						$codeFence = substr($m[5][0], 0, strspn($m[5][0], '`~'));
827 19
						$codeTag->setAttribute('quoteDepth', $quoteDepth);
828
829
						// Ignore the next character, which should be a newline
830 19
						$this->parser->addIgnoreTag($tagPos + $tagLen, 1);
831
832
						// Add the language if present, e.g. ```php
833 19
						$lang = trim(trim($m[5][0], '`~'));
834 19
						if ($lang !== '')
835 19
						{
836 4
							$codeTag->setAttribute('lang', $lang);
837 4
						}
838 19
					}
839 19
				}
840 36
			}
841 263
			elseif (!empty($m[3][0]) && !$listsCnt && $this->text[$matchPos + $matchLen] !== "\x17")
842
			{
843
				// Horizontal rule
844 9
				$this->parser->addSelfClosingTag('HR', $matchPos + $ignoreLen, $matchLen - $ignoreLen);
845 9
				$breakParagraph = true;
846
847
				// Mark the end of the line as a boundary
848 9
				$this->markBoundary($lfPos);
849 9
			}
850 263
			elseif (isset($setextLines[$lfPos]) && $setextLines[$lfPos]['quoteDepth'] === $quoteDepth && !$lineIsEmpty && !$listsCnt && !isset($codeTag))
851
			{
852
				// Setext-style header
853 11
				$this->parser->addTagPair(
854 11
					$setextLines[$lfPos]['tagName'],
855 11
					$matchPos + $ignoreLen,
856 11
					0,
857 11
					$setextLines[$lfPos]['endTagPos'],
858 11
					$setextLines[$lfPos]['endTagLen']
859 11
				);
860
861
				// Mark the end of the Setext line
862 11
				$this->markBoundary($setextLines[$lfPos]['endTagPos'] + $setextLines[$lfPos]['endTagLen']);
863 11
			}
864
865
			if ($breakParagraph)
866 263
			{
867 262
				$this->parser->addParagraphBreak($textBoundary);
868 262
				$this->markBoundary($textBoundary);
869 262
			}
870
871 263
			if (!$lineIsEmpty)
872 263
			{
873 263
				$textBoundary = $lfPos;
874 263
			}
875
876
			if ($ignoreLen)
877 263
			{
878 45
				$this->parser->addIgnoreTag($matchPos, $ignoreLen, 1000);
879 45
			}
880 263
		}
881 263
	}
882
883
	/**
884
	* Match all forms of emphasis (emphasis and strong, using underscores or asterisks)
885
	*
886
	* @return void
887
	*/
888 263
	protected function matchEmphasis()
889
	{
890 263
		$this->matchEmphasisByCharacter('*', '/\\*+/');
891 263
		$this->matchEmphasisByCharacter('_', '/_+/');
892 263
	}
893
894
	/**
895
	* Match emphasis and strong applied using given character
896
	*
897
	* @param  string $character Markup character, either * or _
898
	* @param  string $regexp    Regexp used to match the series of emphasis character
899
	* @return void
900
	*/
901 263
	protected function matchEmphasisByCharacter($character, $regexp)
902
	{
903 263
		$pos = strpos($this->text, $character);
904 263
		if ($pos === false)
905 263
		{
906 263
			return;
907
		}
908
909 60
		foreach ($this->getEmphasisByBlock($regexp, $pos) as $block)
910
		{
911 60
			$this->processEmphasisBlock($block);
912 60
		}
913 60
	}
914
915
	/**
916
	* Match forced line breaks
917
	*
918
	* @return void
919
	*/
920 263
	protected function matchForcedLineBreaks()
921
	{
922 263
		$pos = strpos($this->text, "  \n");
923 263
		while ($pos !== false)
924
		{
925 6
			$this->parser->addBrTag($pos + 2);
926 6
			$pos = strpos($this->text, "  \n", $pos + 3);
927 6
		}
928 263
	}
929
930
	/**
931
	* Match images markup
932
	*
933
	* @return void
934
	*/
935 263
	protected function matchImages()
936
	{
937 263
		$pos = strpos($this->text, '![');
938 263
		if ($pos === false)
939 263
		{
940 239
			return;
941
		}
942 24
		if (strpos($this->text, '](', $pos) !== false)
943 24
		{
944 13
			$this->matchInlineImages();
945 13
		}
946 24
		if ($this->hasRefs)
947 24
		{
948 11
			$this->matchReferenceImages();
949 11
		}
950 24
	}
951
952
	/**
953
	* Match inline images markup
954
	*
955
	* @return void
956
	*/
957 13
	protected function matchInlineImages()
958
	{
959 13
		preg_match_all(
960 13
			'/!\\[(?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*\\]\\(( *(?:[^\\x17\\s()]|\\([^\\x17\\s()]*\\))*(?=[ )]) *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))? *)\\)/',
961 13
			$this->text,
962 13
			$matches,
963 13
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER
964 13
		);
965 13
		foreach ($matches as $m)
966
		{
967 13
			$linkInfo    = $m[1][0];
968 13
			$startTagPos = $m[0][1];
969 13
			$endTagLen   = 3 + strlen($linkInfo);
970 13
			$endTagPos   = $startTagPos + strlen($m[0][0]) - $endTagLen;
971 13
			$alt         = substr($m[0][0], 2, strlen($m[0][0]) - $endTagLen - 2);
972
973 13
			$this->addImageTag($startTagPos, $endTagPos, $endTagLen, $linkInfo, $alt);
974 13
		}
975 13
	}
976
977
	/**
978
	* Match reference images markup
979
	*
980
	* @return void
981
	*/
982 11
	protected function matchReferenceImages()
983
	{
984 11
		preg_match_all(
985 11
			'/!\\[((?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*)\\](?: ?\\[([^\\x17[\\]]+)\\])?/',
986 11
			$this->text,
987 11
			$matches,
988 11
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER
989 11
		);
990 11
		foreach ($matches as $m)
991
		{
992 11
			$startTagPos = $m[0][1];
993 11
			$endTagPos   = $startTagPos + 2 + strlen($m[1][0]);
994 11
			$endTagLen   = 1;
995 11
			$alt         = $m[1][0];
996 11
			$id          = $alt;
997
998 11
			if (isset($m[2][0], $this->refs[$m[2][0]]))
999 11
			{
1000 8
				$endTagLen = strlen($m[0][0]) - strlen($alt) - 2;
1001 8
				$id        = $m[2][0];
1002 8
			}
1003 4
			elseif (!isset($this->refs[$id]))
1004
			{
1005 1
				continue;
1006
			}
1007
1008 11
			$this->addImageTag($startTagPos, $endTagPos, $endTagLen, $this->refs[$id], $alt);
1009 11
		}
1010 11
	}
1011
1012
	/**
1013
	* Match inline code spans
1014
	*
1015
	* @return void
1016
	*/
1017 263
	protected function matchInlineCode()
1018
	{
1019 263
		$markers = $this->getInlineCodeMarkers();
1020 263
		$i       = -1;
1021 263
		$cnt     = count($markers);
1022 263
		while (++$i < ($cnt - 1))
1023
		{
1024 24
			$pos = $markers[$i]['next'];
1025 24
			$j   = $i;
1026 24
			if ($this->text[$markers[$i]['pos']] !== '`')
1027 24
			{
1028
				// Adjust the left marker if its first backtick was escaped
1029 1
				++$markers[$i]['pos'];
1030 1
				--$markers[$i]['len'];
1031 1
			}
1032 24
			while (++$j < $cnt && $markers[$j]['pos'] === $pos)
1033
			{
1034 23
				if ($markers[$j]['len'] === $markers[$i]['len'])
1035 23
				{
1036 21
					$this->addInlineCodeTags($markers[$i], $markers[$j]);
1037 21
					$i = $j;
1038 21
					break;
1039
				}
1040 10
				$pos = $markers[$j]['next'];
1041 10
			}
1042 24
		}
1043 263
	}
1044
1045
	/**
1046
	* Match inline links markup
1047
	*
1048
	* @return void
1049
	*/
1050 32
	protected function matchInlineLinks()
1051
	{
1052 32
		preg_match_all(
1053 32
			'/\\[(?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*\\]\\(( *(?:[^\\x17\\s()]|\\([^\\x17\\s()]*\\))*(?=[ )]) *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))? *)\\)/',
1054 32
			$this->text,
1055 32
			$matches,
1056 32
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER
1057 32
		);
1058 32
		foreach ($matches as $m)
1059
		{
1060 32
			$linkInfo    = $m[1][0];
1061 32
			$startTagPos = $m[0][1];
1062 32
			$endTagLen   = 3 + strlen($linkInfo);
1063 32
			$endTagPos   = $startTagPos + strlen($m[0][0]) - $endTagLen;
1064
1065 32
			$this->addLinkTag($startTagPos, $endTagPos, $endTagLen, $linkInfo);
1066 32
		}
1067 32
	}
1068
1069
	/**
1070
	* Capture link reference definitions in current text
1071
	*
1072
	* @return void
1073
	*/
1074 263
	protected function matchLinkReferences()
1075
	{
1076 263
		$this->hasRefs = false;
1077 263
		$this->refs    = [];
1078 263
		if (strpos($this->text, ']:') === false)
1079 263
		{
1080 237
			return;
1081
		}
1082
1083 26
		$regexp = '/^\\x1A* {0,3}\\[([^\\x17\\]]+)\\]: *([^\\s\\x17]+ *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))?)[^\\x17\\n]*\\n?/m';
1084 26
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
1085 26
		foreach ($matches as $m)
1086
		{
1087 26
			$this->parser->addIgnoreTag($m[0][1], strlen($m[0][0]), -2);
1088
1089
			// Ignore the reference if it already exists
1090 26
			$id = strtolower($m[1][0]);
1091 26
			if (isset($this->refs[$id]))
1092 26
			{
1093 2
				continue;
1094
			}
1095
1096 26
			$this->hasRefs   = true;
1097 26
			$this->refs[$id] = $m[2][0];
1098 26
		}
1099 26
	}
1100
1101
	/**
1102
	* Match inline and reference links
1103
	*
1104
	* @return void
1105
	*/
1106 263
	protected function matchLinks()
1107
	{
1108 263
		if (strpos($this->text, '](') !== false)
1109 263
		{
1110 32
			$this->matchInlineLinks();
1111 32
		}
1112 263
		if ($this->hasRefs)
1113 263
		{
1114 26
			$this->matchReferenceLinks();
1115 26
		}
1116 263
	}
1117
1118
	/**
1119
	* Match reference links markup
1120
	*
1121
	* @return void
1122
	*/
1123 26
	protected function matchReferenceLinks()
1124
	{
1125 26
		$labels = $this->getLabels();
1126 26
		foreach ($labels as $startTagPos => $id)
1127
		{
1128 26
			$labelPos  = $startTagPos + 2 + strlen($id);
1129 26
			$endTagPos = $labelPos - 1;
1130 26
			$endTagLen = 1;
1131
1132 26
			if ($this->text[$labelPos] === ' ')
1133 26
			{
1134 8
				++$labelPos;
1135 8
			}
1136 26
			if (isset($labels[$labelPos], $this->refs[$labels[$labelPos]]))
1137 26
			{
1138 10
				$id        = $labels[$labelPos];
1139 10
				$endTagLen = $labelPos + 2 + strlen($id) - $endTagPos;
1140 10
			}
1141 26
			if (isset($this->refs[$id]))
1142 26
			{
1143 26
				$this->addLinkTag($startTagPos, $endTagPos, $endTagLen, $this->refs[$id]);
1144 26
			}
1145 26
		}
1146 26
	}
1147
1148
	/**
1149
	* Match strikethrough
1150
	*
1151
	* @return void
1152
	*/
1153 263
	protected function matchStrikethrough()
1154
	{
1155 263
		$pos = strpos($this->text, '~~');
1156 263
		if ($pos === false)
1157 263
		{
1158 256
			return;
1159
		}
1160
1161 7
		preg_match_all(
1162 7
			'/~~[^\\x17]+?~~/',
1163 7
			$this->text,
1164 7
			$matches,
1165 7
			PREG_OFFSET_CAPTURE,
1166
			$pos
1167 7
		);
1168 7
		foreach ($matches[0] as list($match, $matchPos))
1169
		{
1170 5
			$matchLen = strlen($match);
1171
1172 5
			$this->parser->addTagPair('DEL', $matchPos, 2, $matchPos + $matchLen - 2, 2);
1173 7
		}
1174 7
	}
1175
1176
	/**
1177
	* Match superscript
1178
	*
1179
	* @return void
1180
	*/
1181 263
	protected function matchSuperscript()
1182
	{
1183 263
		$pos = strpos($this->text, '^');
1184 263
		if ($pos === false)
1185 263
		{
1186 261
			return;
1187
		}
1188
1189 2
		preg_match_all(
1190 2
			'/\\^[^\\x17\\s]++/',
1191 2
			$this->text,
1192 2
			$matches,
1193 2
			PREG_OFFSET_CAPTURE,
1194
			$pos
1195 2
		);
1196 2
		foreach ($matches[0] as list($match, $matchPos))
1197
		{
1198 1
			$matchLen    = strlen($match);
1199 1
			$startTagPos = $matchPos;
1200 1
			$endTagPos   = $matchPos + $matchLen;
1201
1202 1
			$parts = explode('^', $match);
1203 1
			unset($parts[0]);
1204
1205 1
			foreach ($parts as $part)
1206
			{
1207 1
				$this->parser->addTagPair('SUP', $startTagPos, 1, $endTagPos, 0);
1208 1
				$startTagPos += 1 + strlen($part);
1209 1
			}
1210 2
		}
1211 2
	}
1212
1213
	/**
1214
	* Overwrite part of the text with substitution characters ^Z (0x1A)
1215
	*
1216
	* @param  integer $pos Start of the range
1217
	* @param  integer $len Length of text to overwrite
1218
	* @return void
1219
	*/
1220 169
	protected function overwrite($pos, $len)
1221
	{
1222 169
		if ($len > 0)
1223 169
		{
1224 169
			$this->text = substr($this->text, 0, $pos) . str_repeat("\x1A", $len) . substr($this->text, $pos + $len);
1225 169
		}
1226 169
	}
1227
1228
	/**
1229
	* Process a list of emphasis markup strings
1230
	*
1231
	* @param  array[] $block List of [matchPos, matchLen] pairs
1232
	* @return void
1233
	*/
1234 60
	protected function processEmphasisBlock(array $block)
1235
	{
1236 60
		$emPos     = null;
1237 60
		$strongPos = null;
1238 60
		foreach ($block as list($matchPos, $matchLen))
1239
		{
1240 59
			$canOpen      = !$this->isBeforeWhitespace($matchPos + $matchLen - 1);
1241 59
			$canClose     = !$this->isAfterWhitespace($matchPos);
1242 59
			$closeLen     = ($canClose) ? min($matchLen, 3) : 0;
1243 59
			$closeEm      = ($closeLen & 1) && isset($emPos);
1244 59
			$closeStrong  = ($closeLen & 2) && isset($strongPos);
1245 59
			$emEndPos     = $matchPos;
1246 59
			$strongEndPos = $matchPos;
1247 59
			$remaining    = $matchLen;
1248
1249 59
			if (isset($emPos) && $emPos === $strongPos)
1250 59
			{
1251
				if ($closeEm)
1252 13
				{
1253 11
					$emPos += 2;
1254 11
				}
1255
				else
1256
				{
1257 2
					++$strongPos;
1258
				}
1259 13
			}
1260
1261 59
			if ($closeEm && $closeStrong)
1262 59
			{
1263 11
				if ($emPos < $strongPos)
1264 11
				{
1265 1
					$emEndPos += 2;
1266 1
				}
1267
				else
1268
				{
1269 10
					++$strongEndPos;
1270
				}
1271 11
			}
1272
1273
			if ($closeEm)
1274 59
			{
1275 34
				--$remaining;
1276 34
				$this->parser->addTagPair('EM', $emPos, 1, $emEndPos, 1);
1277 34
				$emPos = null;
1278 34
			}
1279
			if ($closeStrong)
1280 59
			{
1281 23
				$remaining -= 2;
1282 23
				$this->parser->addTagPair('STRONG', $strongPos, 2, $strongEndPos, 2);
1283 23
				$strongPos = null;
1284 23
			}
1285
1286
			if ($canOpen)
1287 59
			{
1288 54
				$remaining = min($remaining, 3);
1289 54
				if ($remaining & 1)
1290 54
				{
1291 49
					$emPos     = $matchPos + $matchLen - $remaining;
1292 49
				}
1293 54
				if ($remaining & 2)
1294 54
				{
1295 29
					$strongPos = $matchPos + $matchLen - $remaining;
1296 29
				}
1297 54
			}
1298 60
		}
1299 60
	}
1300
1301
	/**
1302
	* Set a URL or IMG tag's attributes
1303
	*
1304
	* @param  Tag    $tag      URL or IMG tag
1305
	* @param  string $linkInfo Link's info: an URL optionally followed by spaces and a title
1306
	* @param  string $attrName Name of the URL attribute
1307
	* @return void
1308
	*/
1309 69
	protected function setLinkAttributes(Tag $tag, $linkInfo, $attrName)
1310
	{
1311 69
		$url   = trim($linkInfo);
1312 69
		$title = '';
1313 69
		$pos   = strpos($url, ' ');
1314 69
		if ($pos !== false)
1315 69
		{
1316 30
			$title = substr(trim(substr($url, $pos)), 1, -1);
1317 30
			$url   = substr($url, 0, $pos);
1318 30
		}
1319
1320 69
		$tag->setAttribute($attrName, $this->decode($url));
1321 69
		if ($title > '')
1322 69
		{
1323 29
			$tag->setAttribute('title', $this->decode($title));
1324 29
		}
1325
	}
1326
}