1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\TextFormatter |
5
|
|
|
* @copyright Copyright (c) 2010-2017 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\TextFormatter\Plugins\Litedown; |
9
|
|
|
|
10
|
|
|
use s9e\TextFormatter\Parser\Tag; |
11
|
|
|
use s9e\TextFormatter\Plugins\Litedown\Parser\Blocks; |
12
|
|
|
use s9e\TextFormatter\Plugins\Litedown\Parser\Emphasis; |
13
|
|
|
use s9e\TextFormatter\Plugins\Litedown\Parser\ForcedLineBreaks; |
14
|
|
|
use s9e\TextFormatter\Plugins\Litedown\Parser\InlineCode; |
15
|
|
|
use s9e\TextFormatter\Plugins\Litedown\Parser\Strikethrough; |
16
|
|
|
use s9e\TextFormatter\Plugins\Litedown\Parser\Superscript; |
17
|
|
|
use s9e\TextFormatter\Plugins\ParserBase; |
18
|
|
|
|
19
|
|
|
class Parser extends ParserBase |
20
|
|
|
{ |
21
|
|
|
/** |
22
|
|
|
* @var bool Whether current text contains escape characters |
23
|
|
|
*/ |
24
|
|
|
protected $hasEscapedChars; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var bool Whether current text contains references |
28
|
|
|
*/ |
29
|
|
|
protected $hasRefs; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var array Array of [label => link info] |
33
|
|
|
*/ |
34
|
|
|
protected $refs; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var string Text being parsed |
38
|
|
|
*/ |
39
|
|
|
protected $text; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* {@inheritdoc} |
43
|
|
|
*/ |
44
|
263 |
|
public function parse($text, array $matches) |
45
|
|
|
{ |
46
|
263 |
|
$this->init($text); |
47
|
|
|
|
48
|
|
|
// Match block-level markup as well as forced line breaks |
49
|
263 |
|
$this->text = (new Blocks($this->parser))->parse($this->text); |
50
|
|
|
|
51
|
|
|
// Capture link references after block markup as been overwritten |
52
|
263 |
|
$this->matchLinkReferences(); |
53
|
|
|
|
54
|
|
|
// Inline code must be done first to avoid false positives in other inline markup |
55
|
263 |
|
$this->text = (new InlineCode($this->parser))->parse($this->text); |
56
|
|
|
|
57
|
|
|
// Do the rest of inline markup. Images must be matched before links |
58
|
263 |
|
$this->matchImages(); |
59
|
263 |
|
$this->matchLinks(); |
60
|
263 |
|
(new Strikethrough($this->parser))->parse($this->text); |
61
|
263 |
|
(new Superscript($this->parser))->parse($this->text); |
62
|
263 |
|
(new Emphasis($this->parser))->parse($this->text); |
63
|
263 |
|
(new ForcedLineBreaks($this->parser))->parse($this->text); |
64
|
|
|
|
65
|
|
|
// Unset the text to free its memory |
66
|
263 |
|
unset($this->text); |
67
|
263 |
|
} |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* Add an image tag for given text span |
71
|
|
|
* |
72
|
|
|
* @param integer $startTagPos Start tag position |
73
|
|
|
* @param integer $endTagPos End tag position |
74
|
|
|
* @param integer $endTagLen End tag length |
75
|
|
|
* @param string $linkInfo URL optionally followed by space and a title |
76
|
|
|
* @param string $alt Value for the alt attribute |
77
|
|
|
* @return void |
78
|
|
|
*/ |
79
|
24 |
|
protected function addImageTag($startTagPos, $endTagPos, $endTagLen, $linkInfo, $alt) |
80
|
|
|
{ |
81
|
24 |
|
$tag = $this->parser->addTagPair('IMG', $startTagPos, 2, $endTagPos, $endTagLen); |
82
|
24 |
|
$this->setLinkAttributes($tag, $linkInfo, 'src'); |
83
|
24 |
|
$tag->setAttribute('alt', $this->decode($alt)); |
84
|
|
|
|
85
|
|
|
// Overwrite the markup |
86
|
24 |
|
$this->overwrite($startTagPos, $endTagPos + $endTagLen - $startTagPos); |
87
|
24 |
|
} |
88
|
|
|
|
89
|
|
|
/** |
90
|
|
|
* Add an image tag for given text span |
91
|
|
|
* |
92
|
|
|
* @param integer $startTagPos Start tag position |
93
|
|
|
* @param integer $endTagPos End tag position |
94
|
|
|
* @param integer $endTagLen End tag length |
95
|
|
|
* @param string $linkInfo URL optionally followed by space and a title |
96
|
|
|
* @return void |
97
|
|
|
*/ |
98
|
57 |
|
protected function addLinkTag($startTagPos, $endTagPos, $endTagLen, $linkInfo) |
99
|
|
|
{ |
100
|
|
|
// Give the link a slightly worse priority if this is a implicit reference and a slightly |
101
|
|
|
// better priority if it's an explicit reference or an inline link or to give it precedence |
102
|
|
|
// over possible BBCodes such as [b](https://en.wikipedia.org/wiki/B) |
103
|
57 |
|
$priority = ($endTagLen === 1) ? 1 : -1; |
104
|
|
|
|
105
|
57 |
|
$tag = $this->parser->addTagPair('URL', $startTagPos, 1, $endTagPos, $endTagLen, $priority); |
106
|
57 |
|
$this->setLinkAttributes($tag, $linkInfo, 'url'); |
107
|
|
|
|
108
|
|
|
// Overwrite the markup without touching the link's text |
109
|
57 |
|
$this->overwrite($startTagPos, 1); |
110
|
57 |
|
$this->overwrite($endTagPos, $endTagLen); |
111
|
57 |
|
} |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* Decode a chunk of encoded text to be used as an attribute value |
115
|
|
|
* |
116
|
|
|
* Decodes escaped literals and removes slashes and 0x1A characters |
117
|
|
|
* |
118
|
|
|
* @param string $str Encoded text |
119
|
|
|
* @return string Decoded text |
120
|
|
|
*/ |
121
|
69 |
|
protected function decode($str) |
122
|
|
|
{ |
123
|
69 |
|
if ($this->config['decodeHtmlEntities'] && strpos($str, '&') !== false) |
124
|
69 |
|
{ |
125
|
1 |
|
$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8'); |
126
|
1 |
|
} |
127
|
69 |
|
$str = str_replace("\x1A", '', $str); |
128
|
|
|
|
129
|
69 |
|
if ($this->hasEscapedChars) |
130
|
69 |
|
{ |
131
|
7 |
|
$str = strtr( |
132
|
7 |
|
$str, |
133
|
|
|
[ |
134
|
7 |
|
"\x1B0" => '!', "\x1B1" => '"', "\x1B2" => "'", "\x1B3" => '(', |
135
|
7 |
|
"\x1B4" => ')', "\x1B5" => '*', "\x1B6" => '[', "\x1B7" => '\\', |
136
|
7 |
|
"\x1B8" => ']', "\x1B9" => '^', "\x1BA" => '_', "\x1BB" => '`', |
137
|
|
|
"\x1BC" => '~' |
138
|
7 |
|
] |
139
|
7 |
|
); |
140
|
7 |
|
} |
141
|
|
|
|
142
|
69 |
|
return $str; |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
/** |
146
|
|
|
* Encode escaped literals that have a special meaning |
147
|
|
|
* |
148
|
|
|
* @param string $str Original text |
149
|
|
|
* @return string Encoded text |
150
|
|
|
*/ |
151
|
15 |
|
protected function encode($str) |
152
|
|
|
{ |
153
|
15 |
|
return strtr( |
154
|
15 |
|
$str, |
155
|
|
|
[ |
156
|
15 |
|
'\\!' => "\x1B0", '\\"' => "\x1B1", "\\'" => "\x1B2", '\\(' => "\x1B3", |
157
|
15 |
|
'\\)' => "\x1B4", '\\*' => "\x1B5", '\\[' => "\x1B6", '\\\\' => "\x1B7", |
158
|
15 |
|
'\\]' => "\x1B8", '\\^' => "\x1B9", '\\_' => "\x1BA", '\\`' => "\x1BB", |
159
|
|
|
'\\~' => "\x1BC" |
160
|
15 |
|
] |
161
|
15 |
|
); |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
/** |
165
|
|
|
* Capture and return labels used in current text |
166
|
|
|
* |
167
|
|
|
* @return array Labels' text position as keys, lowercased text content as values |
168
|
|
|
*/ |
169
|
26 |
|
protected function getLabels() |
170
|
|
|
{ |
171
|
26 |
|
preg_match_all( |
172
|
26 |
|
'/\\[((?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*)\\]/', |
173
|
26 |
|
$this->text, |
174
|
26 |
|
$matches, |
175
|
|
|
PREG_OFFSET_CAPTURE |
176
|
26 |
|
); |
177
|
26 |
|
$labels = []; |
178
|
26 |
|
foreach ($matches[1] as $m) |
179
|
|
|
{ |
180
|
26 |
|
$labels[$m[1] - 1] = strtolower($m[0]); |
181
|
26 |
|
} |
182
|
|
|
|
183
|
26 |
|
return $labels; |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
/** |
187
|
|
|
* Initialize this parser with given text |
188
|
|
|
* |
189
|
|
|
* @param string $text Text to be parsed |
190
|
|
|
* @return void |
191
|
|
|
*/ |
192
|
263 |
|
protected function init($text) |
193
|
|
|
{ |
194
|
263 |
|
if (strpos($text, '\\') === false || !preg_match('/\\\\[!"\'()*[\\\\\\]^_`~]/', $text)) |
195
|
263 |
|
{ |
196
|
248 |
|
$this->hasEscapedChars = false; |
197
|
248 |
|
} |
198
|
|
|
else |
199
|
|
|
{ |
200
|
15 |
|
$this->hasEscapedChars = true; |
201
|
|
|
|
202
|
|
|
// Encode escaped literals that have a special meaning otherwise, so that we don't have |
203
|
|
|
// to take them into account in regexps |
204
|
15 |
|
$text = $this->encode($text); |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
// We append a couple of lines and a non-whitespace character at the end of the text in |
208
|
|
|
// order to trigger the closure of all open blocks such as quotes and lists |
209
|
263 |
|
$text .= "\n\n\x17"; |
210
|
|
|
|
211
|
263 |
|
$this->text = $text; |
212
|
263 |
|
} |
213
|
|
|
|
214
|
|
|
/** |
215
|
|
|
* Match images markup |
216
|
|
|
* |
217
|
|
|
* @return void |
218
|
|
|
*/ |
219
|
263 |
|
protected function matchImages() |
220
|
|
|
{ |
221
|
263 |
|
$pos = strpos($this->text, ' !== false) |
227
|
24 |
|
{ |
228
|
13 |
|
$this->matchInlineImages(); |
229
|
13 |
|
} |
230
|
24 |
|
if ($this->hasRefs) |
231
|
24 |
|
{ |
232
|
11 |
|
$this->matchReferenceImages(); |
233
|
11 |
|
} |
234
|
24 |
|
} |
235
|
|
|
|
236
|
|
|
/** |
237
|
|
|
* Match inline images markup |
238
|
|
|
* |
239
|
|
|
* @return void |
240
|
|
|
*/ |
241
|
13 |
|
protected function matchInlineImages() |
242
|
|
|
{ |
243
|
13 |
|
preg_match_all( |
244
|
13 |
|
'/!\\[(?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*\\]\\(( *(?:[^\\x17\\s()]|\\([^\\x17\\s()]*\\))*(?=[ )]) *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))? *)\\)/', |
245
|
13 |
|
$this->text, |
246
|
13 |
|
$matches, |
247
|
13 |
|
PREG_OFFSET_CAPTURE | PREG_SET_ORDER |
248
|
13 |
|
); |
249
|
13 |
|
foreach ($matches as $m) |
250
|
|
|
{ |
251
|
13 |
|
$linkInfo = $m[1][0]; |
252
|
13 |
|
$startTagPos = $m[0][1]; |
253
|
13 |
|
$endTagLen = 3 + strlen($linkInfo); |
254
|
13 |
|
$endTagPos = $startTagPos + strlen($m[0][0]) - $endTagLen; |
255
|
13 |
|
$alt = substr($m[0][0], 2, strlen($m[0][0]) - $endTagLen - 2); |
256
|
|
|
|
257
|
13 |
|
$this->addImageTag($startTagPos, $endTagPos, $endTagLen, $linkInfo, $alt); |
258
|
13 |
|
} |
259
|
13 |
|
} |
260
|
|
|
|
261
|
|
|
/** |
262
|
|
|
* Match reference images markup |
263
|
|
|
* |
264
|
|
|
* @return void |
265
|
|
|
*/ |
266
|
11 |
|
protected function matchReferenceImages() |
267
|
|
|
{ |
268
|
11 |
|
preg_match_all( |
269
|
11 |
|
'/!\\[((?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*)\\](?: ?\\[([^\\x17[\\]]+)\\])?/', |
270
|
11 |
|
$this->text, |
271
|
11 |
|
$matches, |
272
|
11 |
|
PREG_OFFSET_CAPTURE | PREG_SET_ORDER |
273
|
11 |
|
); |
274
|
11 |
|
foreach ($matches as $m) |
275
|
|
|
{ |
276
|
11 |
|
$startTagPos = $m[0][1]; |
277
|
11 |
|
$endTagPos = $startTagPos + 2 + strlen($m[1][0]); |
278
|
11 |
|
$endTagLen = 1; |
279
|
11 |
|
$alt = $m[1][0]; |
280
|
11 |
|
$id = $alt; |
281
|
|
|
|
282
|
11 |
|
if (isset($m[2][0], $this->refs[$m[2][0]])) |
283
|
11 |
|
{ |
284
|
8 |
|
$endTagLen = strlen($m[0][0]) - strlen($alt) - 2; |
285
|
8 |
|
$id = $m[2][0]; |
286
|
8 |
|
} |
287
|
4 |
|
elseif (!isset($this->refs[$id])) |
288
|
|
|
{ |
289
|
1 |
|
continue; |
290
|
|
|
} |
291
|
|
|
|
292
|
11 |
|
$this->addImageTag($startTagPos, $endTagPos, $endTagLen, $this->refs[$id], $alt); |
293
|
11 |
|
} |
294
|
11 |
|
} |
295
|
|
|
|
296
|
|
|
/** |
297
|
|
|
* Match inline links markup |
298
|
|
|
* |
299
|
|
|
* @return void |
300
|
|
|
*/ |
301
|
32 |
|
protected function matchInlineLinks() |
302
|
|
|
{ |
303
|
32 |
|
preg_match_all( |
304
|
32 |
|
'/\\[(?:[^\\x17[\\]]|\\[[^\\x17[\\]]*\\])*\\]\\(( *(?:[^\\x17\\s()]|\\([^\\x17\\s()]*\\))*(?=[ )]) *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))? *)\\)/', |
305
|
32 |
|
$this->text, |
306
|
32 |
|
$matches, |
307
|
32 |
|
PREG_OFFSET_CAPTURE | PREG_SET_ORDER |
308
|
32 |
|
); |
309
|
32 |
|
foreach ($matches as $m) |
310
|
|
|
{ |
311
|
32 |
|
$linkInfo = $m[1][0]; |
312
|
32 |
|
$startTagPos = $m[0][1]; |
313
|
32 |
|
$endTagLen = 3 + strlen($linkInfo); |
314
|
32 |
|
$endTagPos = $startTagPos + strlen($m[0][0]) - $endTagLen; |
315
|
|
|
|
316
|
32 |
|
$this->addLinkTag($startTagPos, $endTagPos, $endTagLen, $linkInfo); |
317
|
32 |
|
} |
318
|
32 |
|
} |
319
|
|
|
|
320
|
|
|
/** |
321
|
|
|
* Capture link reference definitions in current text |
322
|
|
|
* |
323
|
|
|
* @return void |
324
|
|
|
*/ |
325
|
263 |
|
protected function matchLinkReferences() |
326
|
|
|
{ |
327
|
263 |
|
$this->hasRefs = false; |
328
|
263 |
|
$this->refs = []; |
329
|
263 |
|
if (strpos($this->text, ']:') === false) |
330
|
263 |
|
{ |
331
|
237 |
|
return; |
332
|
|
|
} |
333
|
|
|
|
334
|
26 |
|
$regexp = '/^\\x1A* {0,3}\\[([^\\x17\\]]+)\\]: *([^\\s\\x17]+ *(?:"[^\\x17]*?"|\'[^\\x17]*?\'|\\([^\\x17)]*\\))?)[^\\x17\\n]*\\n?/m'; |
335
|
26 |
|
preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); |
336
|
26 |
|
foreach ($matches as $m) |
337
|
|
|
{ |
338
|
26 |
|
$this->parser->addIgnoreTag($m[0][1], strlen($m[0][0]), -2); |
339
|
|
|
|
340
|
|
|
// Ignore the reference if it already exists |
341
|
26 |
|
$id = strtolower($m[1][0]); |
342
|
26 |
|
if (isset($this->refs[$id])) |
343
|
26 |
|
{ |
344
|
2 |
|
continue; |
345
|
|
|
} |
346
|
|
|
|
347
|
26 |
|
$this->hasRefs = true; |
348
|
26 |
|
$this->refs[$id] = $m[2][0]; |
349
|
26 |
|
} |
350
|
26 |
|
} |
351
|
|
|
|
352
|
|
|
/** |
353
|
|
|
* Match inline and reference links |
354
|
|
|
* |
355
|
|
|
* @return void |
356
|
|
|
*/ |
357
|
263 |
|
protected function matchLinks() |
358
|
|
|
{ |
359
|
263 |
|
if (strpos($this->text, '](') !== false) |
360
|
263 |
|
{ |
361
|
32 |
|
$this->matchInlineLinks(); |
362
|
32 |
|
} |
363
|
263 |
|
if ($this->hasRefs) |
364
|
263 |
|
{ |
365
|
26 |
|
$this->matchReferenceLinks(); |
366
|
26 |
|
} |
367
|
263 |
|
} |
368
|
|
|
|
369
|
|
|
/** |
370
|
|
|
* Match reference links markup |
371
|
|
|
* |
372
|
|
|
* @return void |
373
|
|
|
*/ |
374
|
26 |
|
protected function matchReferenceLinks() |
375
|
|
|
{ |
376
|
26 |
|
$labels = $this->getLabels(); |
377
|
26 |
|
foreach ($labels as $startTagPos => $id) |
378
|
|
|
{ |
379
|
26 |
|
$labelPos = $startTagPos + 2 + strlen($id); |
380
|
26 |
|
$endTagPos = $labelPos - 1; |
381
|
26 |
|
$endTagLen = 1; |
382
|
|
|
|
383
|
26 |
|
if ($this->text[$labelPos] === ' ') |
384
|
26 |
|
{ |
385
|
8 |
|
++$labelPos; |
386
|
8 |
|
} |
387
|
26 |
|
if (isset($labels[$labelPos], $this->refs[$labels[$labelPos]])) |
388
|
26 |
|
{ |
389
|
10 |
|
$id = $labels[$labelPos]; |
390
|
10 |
|
$endTagLen = $labelPos + 2 + strlen($id) - $endTagPos; |
391
|
10 |
|
} |
392
|
26 |
|
if (isset($this->refs[$id])) |
393
|
26 |
|
{ |
394
|
26 |
|
$this->addLinkTag($startTagPos, $endTagPos, $endTagLen, $this->refs[$id]); |
395
|
26 |
|
} |
396
|
26 |
|
} |
397
|
26 |
|
} |
398
|
|
|
|
399
|
|
|
/** |
400
|
|
|
* Overwrite part of the text with substitution characters ^Z (0x1A) |
401
|
|
|
* |
402
|
|
|
* @param integer $pos Start of the range |
403
|
|
|
* @param integer $len Length of text to overwrite |
404
|
|
|
* @return void |
405
|
|
|
*/ |
406
|
69 |
|
protected function overwrite($pos, $len) |
407
|
|
|
{ |
408
|
69 |
|
if ($len > 0) |
409
|
69 |
|
{ |
410
|
69 |
|
$this->text = substr($this->text, 0, $pos) . str_repeat("\x1A", $len) . substr($this->text, $pos + $len); |
411
|
69 |
|
} |
412
|
69 |
|
} |
413
|
|
|
|
414
|
|
|
/** |
415
|
|
|
* Set a URL or IMG tag's attributes |
416
|
|
|
* |
417
|
|
|
* @param Tag $tag URL or IMG tag |
418
|
|
|
* @param string $linkInfo Link's info: an URL optionally followed by spaces and a title |
419
|
|
|
* @param string $attrName Name of the URL attribute |
420
|
|
|
* @return void |
421
|
|
|
*/ |
422
|
69 |
|
protected function setLinkAttributes(Tag $tag, $linkInfo, $attrName) |
423
|
|
|
{ |
424
|
69 |
|
$url = trim($linkInfo); |
425
|
69 |
|
$title = ''; |
426
|
69 |
|
$pos = strpos($url, ' '); |
427
|
69 |
|
if ($pos !== false) |
428
|
69 |
|
{ |
429
|
30 |
|
$title = substr(trim(substr($url, $pos)), 1, -1); |
430
|
30 |
|
$url = substr($url, 0, $pos); |
431
|
30 |
|
} |
432
|
|
|
|
433
|
69 |
|
$tag->setAttribute($attrName, $this->decode($url)); |
434
|
69 |
|
if ($title > '') |
435
|
69 |
|
{ |
436
|
29 |
|
$tag->setAttribute('title', $this->decode($title)); |
437
|
29 |
|
} |
438
|
|
|
} |
439
|
|
|
} |