Completed
Branch wip/litedown (377511)
by Josh
03:42
created

Parser::matchReferenceLinks()   B

Complexity

Conditions 5
Paths 9

Size

Total Lines 24
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 13
nc 9
nop 0
dl 0
loc 24
ccs 21
cts 21
cp 1
crap 5
rs 8.5125
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Litedown;
9
10
use s9e\TextFormatter\Parser\Tag;
11
use s9e\TextFormatter\Plugins\Litedown\Parser\Blocks;
12
use s9e\TextFormatter\Plugins\Litedown\Parser\Emphasis;
13
use s9e\TextFormatter\Plugins\Litedown\Parser\ForcedLineBreaks;
14
use s9e\TextFormatter\Plugins\Litedown\Parser\InlineCode;
15
use s9e\TextFormatter\Plugins\Litedown\Parser\Strikethrough;
16
use s9e\TextFormatter\Plugins\Litedown\Parser\Superscript;
17
use s9e\TextFormatter\Plugins\ParserBase;
18
19
class Parser extends ParserBase
20
{
21
	/**
22
	* @var bool Whether current text contains escape characters
23
	*/
24
	protected $hasEscapedChars;
25
26
	/**
27
	* @var bool Whether current text contains references
28
	*/
29
	protected $hasRefs;
30
31
	/**
32
	* @var array Array of [label => link info]
33
	*/
34
	protected $refs;
35
36
	/**
37
	* @var string Text being parsed
38
	*/
39
	protected $text;
40
41
	/**
42
	* {@inheritdoc}
43
	*/
44 263
	public function parse($text, array $matches)
45
	{
46 263
		$this->init($text);
47
48
		// Match block-level markup as well as forced line breaks
49 263
		$this->text = (new Blocks($this->parser))->parse($this->text);
50
51
		// Capture link references after block markup as been overwritten
52 263
		$this->matchLinkReferences();
53
54
		// Inline code must be done first to avoid false positives in other inline markup
55 263
		$this->text = (new InlineCode($this->parser))->parse($this->text);
56
57
		// Do the rest of inline markup. Images must be matched before links
58 263
		$this->matchImages();
59 263
		$this->matchLinks();
60 263
		(new Strikethrough($this->parser))->parse($this->text);
61 263
		(new Superscript($this->parser))->parse($this->text);
62 263
		(new Emphasis($this->parser))->parse($this->text);
63 263
		(new ForcedLineBreaks($this->parser))->parse($this->text);
64
65
		// Unset the text to free its memory
66 263
		unset($this->text);
67 263
	}
68
69
	/**
70
	* Add an image tag for given text span
71
	*
72
	* @param  integer $startTagPos Start tag position
73
	* @param  integer $endTagPos   End tag position
74
	* @param  integer $endTagLen   End tag length
75
	* @param  string  $linkInfo    URL optionally followed by space and a title
76
	* @param  string  $alt         Value for the alt attribute
77
	* @return void
78
	*/
79 24
	protected function addImageTag($startTagPos, $endTagPos, $endTagLen, $linkInfo, $alt)
80
	{
81 24
		$tag = $this->parser->addTagPair('IMG', $startTagPos, 2, $endTagPos, $endTagLen);
82 24
		$this->setLinkAttributes($tag, $linkInfo, 'src');
83 24
		$tag->setAttribute('alt', $this->decode($alt));
84
85
		// Overwrite the markup
86 24
		$this->overwrite($startTagPos, $endTagPos + $endTagLen - $startTagPos);
87 24
	}
88
89
	/**
90
	* Add an image tag for given text span
91
	*
92
	* @param  integer $startTagPos Start tag position
93
	* @param  integer $endTagPos   End tag position
94
	* @param  integer $endTagLen   End tag length
95
	* @param  string  $linkInfo    URL optionally followed by space and a title
96
	* @return void
97
	*/
98 57
	protected function addLinkTag($startTagPos, $endTagPos, $endTagLen, $linkInfo)
99
	{
100
		// Give the link a slightly worse priority if this is a implicit reference and a slightly
101
		// better priority if it's an explicit reference or an inline link or to give it precedence
102
		// over possible BBCodes such as [b](https://en.wikipedia.org/wiki/B)
103 57
		$priority = ($endTagLen === 1) ? 1 : -1;
104
105 57
		$tag = $this->parser->addTagPair('URL', $startTagPos, 1, $endTagPos, $endTagLen, $priority);
106 57
		$this->setLinkAttributes($tag, $linkInfo, 'url');
107
108
		// Overwrite the markup without touching the link's text
109 57
		$this->overwrite($startTagPos, 1);
110 57
		$this->overwrite($endTagPos,   $endTagLen);
111 57
	}
112
113
	/**
114
	* Decode a chunk of encoded text to be used as an attribute value
115
	*
116
	* Decodes escaped literals and removes slashes and 0x1A characters
117
	*
118
	* @param  string $str Encoded text
119
	* @return string      Decoded text
120
	*/
121 69
	protected function decode($str)
122
	{
123 69
		if ($this->config['decodeHtmlEntities'] && strpos($str, '&') !== false)
124 69
		{
125 1
			$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
126 1
		}
127 69
		$str = str_replace("\x1A", '', $str);
128
129 69
		if ($this->hasEscapedChars)
130 69
		{
131 7
			$str = strtr(
132 7
				$str,
133
				[
134 7
					"\x1B0" => '!', "\x1B1" => '"', "\x1B2" => "'", "\x1B3" => '(',
135 7
					"\x1B4" => ')', "\x1B5" => '*', "\x1B6" => '[', "\x1B7" => '\\',
136 7
					"\x1B8" => ']', "\x1B9" => '^', "\x1BA" => '_', "\x1BB" => '`',
137
					"\x1BC" => '~'
138 7
				]
139 7
			);
140 7
		}
141
142 69
		return $str;
143
	}
144
145
	/**
146
	* Encode escaped literals that have a special meaning
147
	*
148
	* @param  string $str Original text
149
	* @return string      Encoded text
150
	*/
151 15
	protected function encode($str)
152
	{
153 15
		return strtr(
154 15
			$str,
155
			[
156 15
				'\\!' => "\x1B0", '\\"' => "\x1B1", "\\'" => "\x1B2", '\\('  => "\x1B3",
157 15
				'\\)' => "\x1B4", '\\*' => "\x1B5", '\\[' => "\x1B6", '\\\\' => "\x1B7",
158 15
				'\\]' => "\x1B8", '\\^' => "\x1B9", '\\_' => "\x1BA", '\\`'  => "\x1BB",
159
				'\\~' => "\x1BC"
160 15
			]
161 15
		);
162
	}
163
164
	/**
165
	* Capture and return labels used in current text
166
	*
167
	* @return array Labels' text position as keys, lowercased text content as values
168
	*/
169 26
	protected function getLabels()
170
	{
171 26
		preg_match_all(
172 26
			'/\\[((?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*)\\]/',
173 26
			$this->text,
174 26
			$matches,
175
			PREG_OFFSET_CAPTURE
176 26
		);
177 26
		$labels = [];
178 26
		foreach ($matches[1] as $m)
179
		{
180 26
			$labels[$m[1] - 1] = strtolower($m[0]);
181 26
		}
182
183 26
		return $labels;
184
	}
185
186
	/**
187
	* Initialize this parser with given text
188
	*
189
	* @param  string $text Text to be parsed
190
	* @return void
191
	*/
192 263
	protected function init($text)
193
	{
194 263
		if (strpos($text, '\\') === false || !preg_match('/\\\\[!"\'()*[\\\\\\]^_`~]/', $text))
195 263
		{
196 248
			$this->hasEscapedChars = false;
197 248
		}
198
		else
199
		{
200 15
			$this->hasEscapedChars = true;
201
202
			// Encode escaped literals that have a special meaning otherwise, so that we don't have
203
			// to take them into account in regexps
204 15
			$text = $this->encode($text);
205
		}
206
207
		// We append a couple of lines and a non-whitespace character at the end of the text in
208
		// order to trigger the closure of all open blocks such as quotes and lists
209 263
		$text .= "\n\n\x17";
210
211 263
		$this->text = $text;
212 263
	}
213
214
	/**
215
	* Match images markup
216
	*
217
	* @return void
218
	*/
219 263
	protected function matchImages()
220
	{
221 263
		$pos = strpos($this->text, '![');
222 263
		if ($pos === false)
223 263
		{
224 239
			return;
225
		}
226 24
		if (strpos($this->text, '](', $pos) !== false)
227 24
		{
228 13
			$this->matchInlineImages();
229 13
		}
230 24
		if ($this->hasRefs)
231 24
		{
232 11
			$this->matchReferenceImages();
233 11
		}
234 24
	}
235
236
	/**
237
	* Match inline images markup
238
	*
239
	* @return void
240
	*/
241 13
	protected function matchInlineImages()
242
	{
243 13
		preg_match_all(
244 13
			'/!\\[(?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*\\]\\(( *(?:[^\\x17\\s()]|\\([^\\x17\\s()]*\\))*(?=[ )]) *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))? *)\\)/',
245 13
			$this->text,
246 13
			$matches,
247 13
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER
248 13
		);
249 13
		foreach ($matches as $m)
250
		{
251 13
			$linkInfo    = $m[1][0];
252 13
			$startTagPos = $m[0][1];
253 13
			$endTagLen   = 3 + strlen($linkInfo);
254 13
			$endTagPos   = $startTagPos + strlen($m[0][0]) - $endTagLen;
255 13
			$alt         = substr($m[0][0], 2, strlen($m[0][0]) - $endTagLen - 2);
256
257 13
			$this->addImageTag($startTagPos, $endTagPos, $endTagLen, $linkInfo, $alt);
258 13
		}
259 13
	}
260
261
	/**
262
	* Match reference images markup
263
	*
264
	* @return void
265
	*/
266 11
	protected function matchReferenceImages()
267
	{
268 11
		preg_match_all(
269 11
			'/!\\[((?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*)\\](?: ?\\[([^\\x17[\\]]+)\\])?/',
270 11
			$this->text,
271 11
			$matches,
272 11
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER
273 11
		);
274 11
		foreach ($matches as $m)
275
		{
276 11
			$startTagPos = $m[0][1];
277 11
			$endTagPos   = $startTagPos + 2 + strlen($m[1][0]);
278 11
			$endTagLen   = 1;
279 11
			$alt         = $m[1][0];
280 11
			$id          = $alt;
281
282 11
			if (isset($m[2][0], $this->refs[$m[2][0]]))
283 11
			{
284 8
				$endTagLen = strlen($m[0][0]) - strlen($alt) - 2;
285 8
				$id        = $m[2][0];
286 8
			}
287 4
			elseif (!isset($this->refs[$id]))
288
			{
289 1
				continue;
290
			}
291
292 11
			$this->addImageTag($startTagPos, $endTagPos, $endTagLen, $this->refs[$id], $alt);
293 11
		}
294 11
	}
295
296
	/**
297
	* Match inline links markup
298
	*
299
	* @return void
300
	*/
301 32
	protected function matchInlineLinks()
302
	{
303 32
		preg_match_all(
304 32
			'/\\[(?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*\\]\\(( *(?:[^\\x17\\s()]|\\([^\\x17\\s()]*\\))*(?=[ )]) *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))? *)\\)/',
305 32
			$this->text,
306 32
			$matches,
307 32
			PREG_OFFSET_CAPTURE | PREG_SET_ORDER
308 32
		);
309 32
		foreach ($matches as $m)
310
		{
311 32
			$linkInfo    = $m[1][0];
312 32
			$startTagPos = $m[0][1];
313 32
			$endTagLen   = 3 + strlen($linkInfo);
314 32
			$endTagPos   = $startTagPos + strlen($m[0][0]) - $endTagLen;
315
316 32
			$this->addLinkTag($startTagPos, $endTagPos, $endTagLen, $linkInfo);
317 32
		}
318 32
	}
319
320
	/**
321
	* Capture link reference definitions in current text
322
	*
323
	* @return void
324
	*/
325 263
	protected function matchLinkReferences()
326
	{
327 263
		$this->hasRefs = false;
328 263
		$this->refs    = [];
329 263
		if (strpos($this->text, ']:') === false)
330 263
		{
331 237
			return;
332
		}
333
334 26
		$regexp = '/^\\x1A* {0,3}\\[([^\\x17\\]]+)\\]: *([^\\s\\x17]+ *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))?)[^\\x17\\n]*\\n?/m';
335 26
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
336 26
		foreach ($matches as $m)
337
		{
338 26
			$this->parser->addIgnoreTag($m[0][1], strlen($m[0][0]), -2);
339
340
			// Ignore the reference if it already exists
341 26
			$id = strtolower($m[1][0]);
342 26
			if (isset($this->refs[$id]))
343 26
			{
344 2
				continue;
345
			}
346
347 26
			$this->hasRefs   = true;
348 26
			$this->refs[$id] = $m[2][0];
349 26
		}
350 26
	}
351
352
	/**
353
	* Match inline and reference links
354
	*
355
	* @return void
356
	*/
357 263
	protected function matchLinks()
358
	{
359 263
		if (strpos($this->text, '](') !== false)
360 263
		{
361 32
			$this->matchInlineLinks();
362 32
		}
363 263
		if ($this->hasRefs)
364 263
		{
365 26
			$this->matchReferenceLinks();
366 26
		}
367 263
	}
368
369
	/**
370
	* Match reference links markup
371
	*
372
	* @return void
373
	*/
374 26
	protected function matchReferenceLinks()
375
	{
376 26
		$labels = $this->getLabels();
377 26
		foreach ($labels as $startTagPos => $id)
378
		{
379 26
			$labelPos  = $startTagPos + 2 + strlen($id);
380 26
			$endTagPos = $labelPos - 1;
381 26
			$endTagLen = 1;
382
383 26
			if ($this->text[$labelPos] === ' ')
384 26
			{
385 8
				++$labelPos;
386 8
			}
387 26
			if (isset($labels[$labelPos], $this->refs[$labels[$labelPos]]))
388 26
			{
389 10
				$id        = $labels[$labelPos];
390 10
				$endTagLen = $labelPos + 2 + strlen($id) - $endTagPos;
391 10
			}
392 26
			if (isset($this->refs[$id]))
393 26
			{
394 26
				$this->addLinkTag($startTagPos, $endTagPos, $endTagLen, $this->refs[$id]);
395 26
			}
396 26
		}
397 26
	}
398
399
	/**
400
	* Overwrite part of the text with substitution characters ^Z (0x1A)
401
	*
402
	* @param  integer $pos Start of the range
403
	* @param  integer $len Length of text to overwrite
404
	* @return void
405
	*/
406 69
	protected function overwrite($pos, $len)
407
	{
408 69
		if ($len > 0)
409 69
		{
410 69
			$this->text = substr($this->text, 0, $pos) . str_repeat("\x1A", $len) . substr($this->text, $pos + $len);
411 69
		}
412 69
	}
413
414
	/**
415
	* Set a URL or IMG tag's attributes
416
	*
417
	* @param  Tag    $tag      URL or IMG tag
418
	* @param  string $linkInfo Link's info: an URL optionally followed by spaces and a title
419
	* @param  string $attrName Name of the URL attribute
420
	* @return void
421
	*/
422 69
	protected function setLinkAttributes(Tag $tag, $linkInfo, $attrName)
423
	{
424 69
		$url   = trim($linkInfo);
425 69
		$title = '';
426 69
		$pos   = strpos($url, ' ');
427 69
		if ($pos !== false)
428 69
		{
429 30
			$title = substr(trim(substr($url, $pos)), 1, -1);
430 30
			$url   = substr($url, 0, $pos);
431 30
		}
432
433 69
		$tag->setAttribute($attrName, $this->decode($url));
434 69
		if ($title > '')
435 69
		{
436 29
			$tag->setAttribute('title', $this->decode($title));
437 29
		}
438
	}
439
}