Completed
Branch wip/litedown (377511)
by Josh
03:42
created

Blocks::closeList()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 13
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 13
ccs 0
cts 10
cp 0
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 6
nc 3
nop 2
crap 12
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Litedown\Parser;
9
10
use s9e\TextFormatter\Parser as Rules;
11
12
class Blocks extends AbstractParser
13
{
14
	/**
15
	* Close a list at given offset
16
	*
17
	* @param  array   $list
18
	* @param  integer $textBoundary
19
	* @return void
20
	*/
21
	protected function closeList(array $list, $textBoundary)
22
	{
23
		$this->parser->addEndTag('LIST', $textBoundary, 0)->pairWith($list['listTag']);
24
		$this->parser->addEndTag('LI',   $textBoundary, 0)->pairWith($list['itemTag']);
25
26
		if ($list['tight'])
27
		{
28
			foreach ($list['itemTags'] as $itemTag)
29
			{
30
				$itemTag->removeFlags(Rules::RULE_CREATE_PARAGRAPHS);
31
			}
32
		}
33
	}
34
35
	/**
36
	* Compute the amount of text to ignore at the start of a quote line
37
	*
38
	* @param  string  $str           Original quote markup
39
	* @param  integer $maxQuoteDepth Maximum quote depth
40
	* @return integer                Number of characters to ignore
41
	*/
42
	protected function computeQuoteIgnoreLen($str, $maxQuoteDepth)
43
	{
44
		$remaining = $str;
45
		while (--$maxQuoteDepth >= 0)
46
		{
47
			$remaining = preg_replace('/^ *> ?/', '', $remaining);
48
		}
49
50
		return strlen($str) - strlen($remaining);
51
	}
52
53
	/**
54
	* {@inheritdoc}
55
	*/
56
	protected function execute()
57
	{
58
		$this->matchSetextLines();
59
60
		$codeFence    = null;
61
		$codeIndent   = 4;
62
		$codeTag      = null;
63
		$lineIsEmpty  = true;
64
		$lists        = [];
65
		$listsCnt     = 0;
66
		$newContext   = false;
67
		$quotes       = [];
68
		$quotesCnt    = 0;
69
		$textBoundary = 0;
70
71
		$regexp = '/^(?:(?=[-*+\\d \\t>`~#_])((?: {0,3}> ?)+)?([ \\t]+)?(\\* *\\* *\\*[* ]*$|- *- *-[- ]*$|_ *_ *_[_ ]*$|=+$)?((?:[-*+]|\\d+\\.)[ \\t]+(?=\\S))?[ \\t]*(#{1,6}[ \\t]+|```+[^`\\n]*$|~~~+[^~\\n]*$)?)?/m';
72
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
73
74
		foreach ($matches as $m)
75
		{
76
			$matchPos   = $m[0][1];
77
			$matchLen   = strlen($m[0][0]);
78
			$ignoreLen  = 0;
79
			$quoteDepth = 0;
80
81
			// If the last line was empty then this is not a continuation, and vice-versa
82
			$continuation = !$lineIsEmpty;
83
84
			// Capture the position of the end of the line and determine whether the line is empty
85
			$lfPos       = strpos($this->text, "\n", $matchPos);
86
			$lineIsEmpty = ($lfPos === $matchPos + $matchLen && empty($m[3][0]) && empty($m[4][0]) && empty($m[5][0]));
87
88
			// If the line is empty and it's the first empty line then we break current paragraph.
89
			$breakParagraph = ($lineIsEmpty && $continuation);
90
91
			// Count quote marks
92
			if (!empty($m[1][0]))
93
			{
94
				$quoteDepth = substr_count($m[1][0], '>');
95
				$ignoreLen  = strlen($m[1][0]);
96
				if (isset($codeTag) && $codeTag->hasAttribute('quoteDepth'))
97
				{
98
					$quoteDepth = min($quoteDepth, $codeTag->getAttribute('quoteDepth'));
99
					$ignoreLen  = $this->computeQuoteIgnoreLen($m[1][0], $quoteDepth);
100
				}
101
102
				// Overwrite quote markup
103
				$this->overwrite($matchPos, $ignoreLen);
104
			}
105
106
			// Close supernumerary quotes
107
			if ($quoteDepth < $quotesCnt && !$continuation)
108
			{
109
				$newContext = true;
110
111
				do
112
				{
113
					$this->parser->addEndTag('QUOTE', $textBoundary, 0)
114
					             ->pairWith(array_pop($quotes));
115
				}
116
				while ($quoteDepth < --$quotesCnt);
117
			}
118
119
			// Open new quotes
120
			if ($quoteDepth > $quotesCnt && !$lineIsEmpty)
121
			{
122
				$newContext = true;
123
124
				do
125
				{
126
					$tag = $this->parser->addStartTag('QUOTE', $matchPos, 0, $quotesCnt - 999);
127
					$quotes[] = $tag;
128
				}
129
				while ($quoteDepth > ++$quotesCnt);
130
			}
131
132
			// Compute the width of the indentation
133
			$indentWidth = 0;
134
			$indentPos   = 0;
135
			if (!empty($m[2][0]) && !$codeFence)
136
			{
137
				$indentStr = $m[2][0];
138
				$indentLen = strlen($indentStr);
139
				do
140
				{
141
					if ($indentStr[$indentPos] === ' ')
142
					{
143
						++$indentWidth;
144
					}
145
					else
146
					{
147
						$indentWidth = ($indentWidth + 4) & ~3;
148
					}
149
				}
150
				while (++$indentPos < $indentLen && $indentWidth < $codeIndent);
151
			}
152
153
			// Test whether we're out of a code block
154
			if (isset($codeTag) && !$codeFence && $indentWidth < $codeIndent && !$lineIsEmpty)
155
			{
156
				$newContext = true;
157
			}
158
159
			if ($newContext)
160
			{
161
				$newContext = false;
162
163
				// Close the code block if applicable
164
				if (isset($codeTag))
165
				{
166
					if ($textBoundary > $codeTag->getPos())
167
					{
168
						// Overwrite the whole block
169
						$this->overwrite($codeTag->getPos(), $textBoundary - $codeTag->getPos());
170
171
						$endTag = $this->parser->addEndTag('CODE', $textBoundary, 0, -1);
172
						$endTag->pairWith($codeTag);
173
					}
174
					else
175
					{
176
						// The code block is empty
177
						$codeTag->invalidate();
178
					}
179
180
					$codeTag = null;
181
					$codeFence = null;
182
				}
183
184
				// Close all the lists
185
				foreach ($lists as $list)
186
				{
187
					$this->closeList($list, $textBoundary);
188
				}
189
				$lists    = [];
190
				$listsCnt = 0;
191
192
				// Mark the block boundary
193
				if ($matchPos)
194
				{
195
					$this->markBoundary($matchPos - 1);
196
				}
197
			}
198
199
			if ($indentWidth >= $codeIndent)
200
			{
201
				if (isset($codeTag) || !$continuation)
202
				{
203
					// Adjust the amount of text being ignored
204
					$ignoreLen += $indentPos;
205
206
					if (!isset($codeTag))
207
					{
208
						// Create code block
209
						$codeTag = $this->parser->addStartTag('CODE', $matchPos + $ignoreLen, 0, -999);
210
					}
211
212
					// Clear the captures to prevent any further processing
213
					$m = [];
214
				}
215
			}
216
			else
217
			{
218
				$hasListItem = !empty($m[4][0]);
219
220
				if (!$indentWidth && !$continuation && !$hasListItem)
221
				{
222
					// Start of a new context
223
					$listIndex = -1;
224
				}
225
				elseif ($continuation && !$hasListItem)
226
				{
227
					// Continuation of current list item or paragraph
228
					$listIndex = $listsCnt - 1;
229
				}
230
				elseif (!$listsCnt)
231
				{
232
					// We're not inside of a list already, we can start one if there's a list item
233
					// and it's either not in continuation of a paragraph or immediately after a
234
					// block
235
					if ($hasListItem && (!$continuation || $this->text[$matchPos - 1] === "\x17"))
236
					{
237
						// Start of a new list
238
						$listIndex = 0;
239
					}
240
					else
241
					{
242
						// We're in a normal paragraph
243
						$listIndex = -1;
244
					}
245
				}
246
				else
247
				{
248
					// We're inside of a list but we need to compute the depth
249
					$listIndex = 0;
250
					while ($listIndex < $listsCnt && $indentWidth > $lists[$listIndex]['maxIndent'])
251
					{
252
						++$listIndex;
253
					}
254
				}
255
256
				// Close deeper lists
257
				while ($listIndex < $listsCnt - 1)
258
				{
259
					$this->closeList(array_pop($lists), $textBoundary);
260
					--$listsCnt;
261
				}
262
263
				// If there's no list item at current index, we'll need to either create one or
264
				// drop down to previous index, in which case we have to adjust maxIndent
265
				if ($listIndex === $listsCnt && !$hasListItem)
266
				{
267
					--$listIndex;
268
				}
269
270
				if ($hasListItem && $listIndex >= 0)
271
				{
272
					$breakParagraph = true;
273
274
					// Compute the position and amount of text consumed by the item tag
275
					$tagPos = $matchPos + $ignoreLen + $indentPos;
276
					$tagLen = strlen($m[4][0]);
277
278
					// Create a LI tag that consumes its markup
279
					$itemTag = $this->parser->addStartTag('LI', $tagPos, $tagLen);
280
281
					// Overwrite the markup
282
					$this->overwrite($tagPos, $tagLen);
283
284
					// If the list index is within current lists count it means this is not a new
285
					// list and we have to close the last item. Otherwise, it's a new list that we
286
					// have to create
287
					if ($listIndex < $listsCnt)
288
					{
289
						$this->parser->addEndTag('LI', $textBoundary, 0)
290
						             ->pairWith($lists[$listIndex]['itemTag']);
291
292
						// Record the item in the list
293
						$lists[$listIndex]['itemTag']    = $itemTag;
294
						$lists[$listIndex]['itemTags'][] = $itemTag;
295
					}
296
					else
297
					{
298
						++$listsCnt;
299
300
						if ($listIndex)
301
						{
302
							$minIndent = $lists[$listIndex - 1]['maxIndent'] + 1;
303
							$maxIndent = max($minIndent, $listIndex * 4);
304
						}
305
						else
306
						{
307
							$minIndent = 0;
308
							$maxIndent = $indentWidth;
309
						}
310
311
						// Create a 0-width LIST tag right before the item tag LI
312
						$listTag = $this->parser->addStartTag('LIST', $tagPos, 0);
313
314
						// Test whether the list item ends with a dot, as in "1."
315
						if (strpos($m[4][0], '.') !== false)
316
						{
317
							$listTag->setAttribute('type', 'decimal');
318
319
							$start = (int) $m[4][0];
320
							if ($start !== 1)
321
							{
322
								$listTag->setAttribute('start', $start);
323
							}
324
						}
325
326
						// Record the new list depth
327
						$lists[] = [
328
							'listTag'   => $listTag,
329
							'itemTag'   => $itemTag,
330
							'itemTags'  => [$itemTag],
331
							'minIndent' => $minIndent,
332
							'maxIndent' => $maxIndent,
333
							'tight'     => true
334
						];
335
					}
336
				}
337
338
				// If we're in a list, on a non-empty line preceded with a blank line...
339
				if ($listsCnt && !$continuation && !$lineIsEmpty)
340
				{
341
					// ...and this is not the first item of the list...
342
					if (count($lists[0]['itemTags']) > 1 || !$hasListItem)
343
					{
344
						// ...every list that is currently open becomes loose
345
						foreach ($lists as &$list)
346
						{
347
							$list['tight'] = false;
348
						}
349
						unset($list);
350
					}
351
				}
352
353
				$codeIndent = ($listsCnt + 1) * 4;
354
			}
355
356
			if (isset($m[5]))
357
			{
358
				// Headers
359
				if ($m[5][0][0] === '#')
360
				{
361
					$startTagLen = strlen($m[5][0]);
362
					$startTagPos = $matchPos + $matchLen - $startTagLen;
363
					$endTagLen   = $this->getAtxHeaderEndTagLen($matchPos + $matchLen, $lfPos);
364
					$endTagPos   = $lfPos - $endTagLen;
365
366
					$this->parser->addTagPair('H' . strspn($m[5][0], '#', 0, 6), $startTagPos, $startTagLen, $endTagPos, $endTagLen);
367
368
					// Mark the start and the end of the header as boundaries
369
					$this->markBoundary($startTagPos);
370
					$this->markBoundary($lfPos);
371
372
					if ($continuation)
373
					{
374
						$breakParagraph = true;
375
					}
376
				}
377
				// Code fence
378
				elseif ($m[5][0][0] === '`' || $m[5][0][0] === '~')
379
				{
380
					$tagPos = $matchPos + $ignoreLen;
381
					$tagLen = $lfPos - $tagPos;
382
383
					if (isset($codeTag) && $m[5][0] === $codeFence)
384
					{
385
						$endTag = $this->parser->addEndTag('CODE', $tagPos, $tagLen, -1);
386
						$endTag->pairWith($codeTag);
387
388
						$this->parser->addIgnoreTag($textBoundary, $tagPos - $textBoundary);
389
390
						// Overwrite the whole block
391
						$this->overwrite($codeTag->getPos(), $tagPos + $tagLen - $codeTag->getPos());
392
						$codeTag = null;
393
						$codeFence = null;
394
					}
395
					elseif (!isset($codeTag))
396
					{
397
						// Create code block
398
						$codeTag   = $this->parser->addStartTag('CODE', $tagPos, $tagLen);
399
						$codeFence = substr($m[5][0], 0, strspn($m[5][0], '`~'));
400
						$codeTag->setAttribute('quoteDepth', $quoteDepth);
401
402
						// Ignore the next character, which should be a newline
403
						$this->parser->addIgnoreTag($tagPos + $tagLen, 1);
404
405
						// Add the language if present, e.g. ```php
406
						$lang = trim(trim($m[5][0], '`~'));
407
						if ($lang !== '')
408
						{
409
							$codeTag->setAttribute('lang', $lang);
410
						}
411
					}
412
				}
413
			}
414
			elseif (!empty($m[3][0]) && !$listsCnt && $this->text[$matchPos + $matchLen] !== "\x17")
415
			{
416
				// Horizontal rule
417
				$this->parser->addSelfClosingTag('HR', $matchPos + $ignoreLen, $matchLen - $ignoreLen);
418
				$breakParagraph = true;
419
420
				// Mark the end of the line as a boundary
421
				$this->markBoundary($lfPos);
422
			}
423
			elseif (isset($this->setextLines[$lfPos]) && $this->setextLines[$lfPos]['quoteDepth'] === $quoteDepth && !$lineIsEmpty && !$listsCnt && !isset($codeTag))
0 ignored issues
show
Bug introduced by
The property setextLines does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
424
			{
425
				// Setext-style header
426
				$this->parser->addTagPair(
427
					$this->setextLines[$lfPos]['tagName'],
428
					$matchPos + $ignoreLen,
429
					0,
430
					$this->setextLines[$lfPos]['endTagPos'],
431
					$this->setextLines[$lfPos]['endTagLen']
432
				);
433
434
				// Mark the end of the Setext line
435
				$this->markBoundary($this->setextLines[$lfPos]['endTagPos'] + $this->setextLines[$lfPos]['endTagLen']);
436
			}
437
438
			if ($breakParagraph)
439
			{
440
				$this->parser->addParagraphBreak($textBoundary);
441
				$this->markBoundary($textBoundary);
442
			}
443
444
			if (!$lineIsEmpty)
445
			{
446
				$textBoundary = $lfPos;
447
			}
448
449
			if ($ignoreLen)
450
			{
451
				$this->parser->addIgnoreTag($matchPos, $ignoreLen, 1000);
452
			}
453
		}
454
	}
455
456
	/**
457
	* Return the length of the markup at the end of an ATX header
458
	*
459
	* @param  integer $startPos Start of the header's text
460
	* @param  integer $endPos   End of the header's text
461
	* @return integer
462
	*/
463
	protected function getAtxHeaderEndTagLen($startPos, $endPos)
464
	{
465
		$content = substr($this->text, $startPos, $endPos - $startPos);
466
		preg_match('/[ \\t]*#*[ \\t]*$/', $content, $m);
467
468
		return strlen($m[0]);
469
	}
470
471
	/**
472
	* Mark the boundary of a block in the original text
473
	*
474
	* @param  integer $pos
475
	* @return void
476
	*/
477
	protected function markBoundary($pos)
478
	{
479
		$this->text[$pos] = "\x17";
480
	}
481
482
	/**
483
	* Capture and store lines that contain a Setext-tyle header
484
	*
485
	* @return void
486
	*/
487
	protected function matchSetextLines()
488
	{
489
		$this->setextLines = [];
490
		if (strpos($this->text, '-') === false && strpos($this->text, '=') === false)
491
		{
492
			return;
493
		}
494
495
		// Capture the any series of - or = alone on a line, optionally preceded with the
496
		// angle brackets notation used in blockquotes
497
		$regexp = '/^(?=[-=>])(?:> ?)*(?=[-=])(?:-+|=+) *$/m';
498
		if (!preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE))
499
		{
500
			return;
501
		}
502
503
		foreach ($matches[0] as list($match, $matchPos))
504
		{
505
			// Compute the position of the end tag. We start on the LF character before the
506
			// match and keep rewinding until we find a non-space character
507
			$endTagPos = $matchPos - 1;
508
			while ($endTagPos > 0 && $this->text[$endTagPos - 1] === ' ')
509
			{
510
				--$endTagPos;
511
			}
512
513
			// Store at the offset of the LF character
514
			$this->setextLines[$matchPos - 1] = [
515
				'endTagLen'  => $matchPos + strlen($match) - $endTagPos,
516
				'endTagPos'  => $endTagPos,
517
				'quoteDepth' => substr_count($match, '>'),
518
				'tagName'    => ($match[0] === '=') ? 'H1' : 'H2'
519
			];
520
		}
521
	}
522
}