1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Ahc; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser. |
7
|
|
|
* |
8
|
|
|
* Supports most of the markdown specs except deep nested elements. |
9
|
|
|
* Check readme.md for the details of its features && limitations. |
10
|
|
|
* |
11
|
|
|
* @author adhocore | Jitendra Adhikari <[email protected]> |
12
|
|
|
* @copyright (c) 2014 Jitendra Adhikari |
13
|
|
|
*/ |
14
|
|
|
class HtmlUp |
15
|
|
|
{ |
16
|
|
|
const RE_URL = '~<(https?:[\/]{2}[^\s]+?)>~'; |
17
|
|
|
const RE_RAW = '/^<\/?\w.*?\/?>/'; |
18
|
|
|
const RE_EMAIL = '~<(\S+?@\S+?)>~'; |
19
|
|
|
const RE_MD_IMG = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~'; |
20
|
|
|
const RE_MD_URL = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~'; |
21
|
|
|
const RE_MD_FONT = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!'; |
22
|
|
|
const RE_MD_QUOTE = '~^\s*(>+)\s+~'; |
23
|
|
|
const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~'; |
24
|
|
|
const RE_MD_CODE = '/^```\s*([\w-]+)?/'; |
25
|
|
|
const RE_MD_RULE = '~^(_{3,}|\*{3,}|\-{3,})$~'; |
26
|
|
|
const RE_MD_TCOL = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~'; |
27
|
|
|
const RE_MD_OL = '/^\d+\. /'; |
28
|
|
|
|
29
|
|
|
protected $lines = []; |
30
|
|
|
protected $stackList = []; |
31
|
|
|
protected $stackBlock = []; |
32
|
|
|
protected $stackTable = []; |
33
|
|
|
|
34
|
|
|
protected $pointer = -1; |
35
|
|
|
protected $listLevel = 0; |
36
|
|
|
protected $quoteLevel = 0; |
37
|
|
|
protected $indent = 0; |
38
|
|
|
protected $nextIndent = 0; |
39
|
|
|
|
40
|
|
|
protected $indentStr = ''; |
41
|
|
|
protected $line = ''; |
42
|
|
|
protected $trimmedLine = ''; |
43
|
|
|
protected $prevLine = ''; |
44
|
|
|
protected $trimmedPrevLine = ''; |
45
|
|
|
protected $nextLine = ''; |
46
|
|
|
protected $trimmedNextLine = ''; |
47
|
|
|
protected $markup = ''; |
48
|
|
|
|
49
|
|
|
protected $inList = false; |
50
|
|
|
protected $inQuote = false; |
51
|
|
|
protected $inPara = false; |
52
|
|
|
protected $inHtml = false; |
53
|
|
|
protected $inTable = false; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Constructor. |
57
|
|
|
* |
58
|
|
|
* @param string $markdown |
59
|
|
|
*/ |
60
|
|
|
public function __construct($markdown = null, $indentWidth = 4) |
61
|
|
|
{ |
62
|
|
|
$this->indentStr = $indentWidth == 2 ? ' ' : ' '; |
63
|
|
|
|
64
|
|
|
if (null !== $markdown) { |
65
|
|
|
$this->scan($markdown); |
66
|
|
|
} |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
protected function scan($markdown) |
70
|
|
|
{ |
71
|
|
|
if ('' === trim($markdown)) { |
72
|
|
|
return; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
// Normalize whitespaces |
76
|
|
|
$markdown = str_replace("\t", $this->indentStr, $markdown); |
77
|
|
|
$markdown = str_replace(["\r\n", "\r"], "\n", $markdown); |
78
|
|
|
|
79
|
|
|
$this->lines = array_merge([''], explode("\n", $markdown), ['']); |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
public function __toString() |
83
|
|
|
{ |
84
|
|
|
return $this->parse(); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
public function parse($markdown = null) |
88
|
|
|
{ |
89
|
|
|
if (null !== $markdown) { |
90
|
|
|
$this->reset(true); |
91
|
|
|
|
92
|
|
|
$this->scan($markdown); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
if ([] === $this->lines) { |
96
|
|
|
return ''; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
$this->parseBlockElements(); |
100
|
|
|
$this->parseSpanElements(); |
101
|
|
|
|
102
|
|
|
return $this->markup; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
protected function parseBlockElements() |
106
|
|
|
{ |
107
|
|
|
while (isset($this->lines[++$this->pointer])) { |
108
|
|
|
$this->init(); |
109
|
|
|
|
110
|
|
|
if ($this->flush() || $this->raw()) { |
111
|
|
|
continue; |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
$this->quote(); |
115
|
|
|
|
116
|
|
|
if ($this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt()) { |
117
|
|
|
continue; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
if ($this->inList) { |
121
|
|
|
$this->markup .= $this->trimmedLine; |
122
|
|
|
|
123
|
|
|
continue; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
$this->table() || $this->paragraph(); |
|
|
|
|
127
|
|
|
} |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
protected function init() |
131
|
|
|
{ |
132
|
|
|
list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine]; |
133
|
|
|
|
134
|
|
|
$this->line = $this->lines[$this->pointer]; |
135
|
|
|
$this->trimmedLine = trim($this->line); |
136
|
|
|
|
137
|
|
|
$this->indent = strlen($this->line) - strlen(ltrim($this->line)); |
138
|
|
|
$this->nextLine = isset($this->lines[$this->pointer + 1]) |
139
|
|
|
? $this->lines[$this->pointer + 1] |
140
|
|
|
: ''; |
141
|
|
|
$this->trimmedNextLine = trim($this->nextLine); |
142
|
|
|
$this->nextIndent = strlen($this->nextLine) - strlen(ltrim($this->nextLine)); |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
protected function parseSpanElements() |
146
|
|
|
{ |
147
|
|
|
$this->links(); |
148
|
|
|
|
149
|
|
|
$this->anchors(); |
150
|
|
|
|
151
|
|
|
$this->spans(); |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
protected function links() |
155
|
|
|
{ |
156
|
|
|
// URLs. |
157
|
|
|
$this->markup = preg_replace( |
158
|
|
|
static::RE_URL, |
159
|
|
|
'<a href="$1">$1</a>', |
160
|
|
|
$this->markup |
161
|
|
|
); |
162
|
|
|
|
163
|
|
|
// Emails. |
164
|
|
|
$this->markup = preg_replace( |
165
|
|
|
static::RE_EMAIL, |
166
|
|
|
'<a href="mailto:$1">$1</a>', |
167
|
|
|
$this->markup |
168
|
|
|
); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
protected function anchors() |
172
|
|
|
{ |
173
|
|
|
// Images. |
174
|
|
|
$this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) { |
175
|
|
|
$title = isset($img[3]) ? " title={$img[3]} " : ''; |
176
|
|
|
$alt = $img[1] ? " alt=\"{$img[1]}\" " : ''; |
177
|
|
|
|
178
|
|
|
return "<img src=\"{$img[2]}\"{$title}{$alt}/>"; |
179
|
|
|
}, $this->markup); |
180
|
|
|
|
181
|
|
|
// Anchors. |
182
|
|
|
$this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) { |
183
|
|
|
$title = isset($a[3]) ? " title={$a[3]} " : ''; |
184
|
|
|
|
185
|
|
|
return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>"; |
186
|
|
|
}, $this->markup); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
protected function spans() |
190
|
|
|
{ |
191
|
|
|
// em/code/strong/del |
192
|
|
|
$this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) { |
193
|
|
|
switch (substr($em[1], 0, 2)) { |
194
|
|
|
case '**': |
195
|
|
|
case '__': |
196
|
|
|
$tag = 'strong'; |
197
|
|
|
break; |
198
|
|
|
|
199
|
|
|
case '~~': |
200
|
|
|
$tag = 'del'; |
201
|
|
|
break; |
202
|
|
|
|
203
|
|
|
case $em[1] === '*': |
204
|
|
|
case $em[1] === '_': |
205
|
|
|
$tag = 'em'; |
206
|
|
|
break; |
207
|
|
|
|
208
|
|
|
default: |
209
|
|
|
$tag = 'code'; |
210
|
|
|
$em[2] = $this->escape($em[2]); |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
return "<$tag>{$em[2]}</$tag>"; |
214
|
|
|
}, $this->markup); |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
protected function escape($input) |
218
|
|
|
{ |
219
|
|
|
return htmlspecialchars($input); |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
protected function reset($all = false) |
223
|
|
|
{ |
224
|
|
|
$except = $all ? [] : array_fill_keys(['lines', 'pointer', 'markup'], true); |
225
|
|
|
|
226
|
|
|
// Reset all current values. |
227
|
|
|
foreach (get_class_vars(__CLASS__) as $prop => $value) { |
228
|
|
|
isset($except[$prop]) || $this->{$prop} = $value; |
229
|
|
|
} |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
protected function flush() |
233
|
|
|
{ |
234
|
|
|
if ('' !== $this->trimmedLine) { |
235
|
|
|
return false; |
236
|
|
|
} |
237
|
|
|
|
238
|
|
|
while (!empty($this->stackList)) { |
239
|
|
|
$this->markup .= array_pop($this->stackList); |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
while (!empty($this->stackBlock)) { |
243
|
|
|
$this->markup .= array_pop($this->stackBlock); |
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
while (!empty($this->stackTable)) { |
247
|
|
|
$this->markup .= array_pop($this->stackTable); |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
$this->markup .= "\n"; |
251
|
|
|
|
252
|
|
|
$this->reset(false); |
253
|
|
|
|
254
|
|
|
return true; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
protected function raw() |
258
|
|
|
{ |
259
|
|
|
if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) { |
260
|
|
|
$this->markup .= "\n$this->line"; |
261
|
|
|
if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) { |
262
|
|
|
$this->inHtml = true; |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
return true; |
266
|
|
|
} |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
protected function quote() |
270
|
|
|
{ |
271
|
|
|
if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) { |
272
|
|
|
$this->line = substr($this->line, strlen($quoteMatch[0])); |
273
|
|
|
$this->trimmedLine = trim($this->line); |
274
|
|
|
|
275
|
|
|
if (!$this->inQuote || $this->quoteLevel < strlen($quoteMatch[1])) { |
276
|
|
|
$this->markup .= "\n<blockquote>"; |
277
|
|
|
|
278
|
|
|
$this->stackBlock[] = "\n</blockquote>"; |
279
|
|
|
|
280
|
|
|
++$this->quoteLevel; |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
return $this->inQuote = true; |
284
|
|
|
} |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
protected function atx() |
288
|
|
|
{ |
289
|
|
|
if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') { |
290
|
|
|
$level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#')); |
291
|
|
|
|
292
|
|
|
if ($level < 7) { |
293
|
|
|
$this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>"; |
294
|
|
|
|
295
|
|
|
return true; |
296
|
|
|
} |
297
|
|
|
} |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
protected function setext() |
301
|
|
|
{ |
302
|
|
|
if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) { |
303
|
|
|
$level = trim($this->nextLine, '- ') === '' ? 2 : 1; |
304
|
|
|
|
305
|
|
|
$this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>"; |
306
|
|
|
|
307
|
|
|
++$this->pointer; |
308
|
|
|
|
309
|
|
|
return true; |
310
|
|
|
} |
311
|
|
|
} |
312
|
|
|
|
313
|
|
|
protected function code() |
314
|
|
|
{ |
315
|
|
|
$codeBlock = (bool) preg_match(static::RE_MD_CODE, $this->line, $codeMatch); |
316
|
|
|
|
317
|
|
|
if ($codeBlock || (empty($this->inList) && empty($this->inQuote) && $this->indent >= 4)) { |
318
|
|
|
$lang = isset($codeMatch[1]) |
319
|
|
|
? ' class="language-' . $codeMatch[1] . '"' |
320
|
|
|
: ''; |
321
|
|
|
|
322
|
|
|
$this->markup .= "\n<pre><code{$lang}>"; |
323
|
|
|
|
324
|
|
|
if (!$codeBlock) { |
325
|
|
|
$this->markup .= $this->escape(substr($this->line, 4)); |
326
|
|
|
} |
327
|
|
|
|
328
|
|
|
$this->codeInternal($codeBlock); |
329
|
|
|
|
330
|
|
|
++$this->pointer; |
331
|
|
|
|
332
|
|
|
$this->markup .= '</code></pre>'; |
333
|
|
|
|
334
|
|
|
return true; |
335
|
|
|
} |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
public function codeInternal($codeBlock) |
339
|
|
|
{ |
340
|
|
|
while (isset($this->lines[$this->pointer + 1])) { |
341
|
|
|
$this->line = $this->escape($this->lines[$this->pointer + 1]); |
342
|
|
|
|
343
|
|
|
if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```') |
344
|
|
|
|| substr($this->line, 0, 4) === $this->indentStr |
345
|
|
|
) { |
346
|
|
|
$this->markup .= "\n"; // @todo: donot use \n for first line |
347
|
|
|
$this->markup .= $codeBlock ? $this->line : substr($this->line, 4); |
348
|
|
|
|
349
|
|
|
++$this->pointer; |
350
|
|
|
} |
351
|
|
|
} |
352
|
|
|
} |
353
|
|
|
|
354
|
|
|
protected function rule() |
355
|
|
|
{ |
356
|
|
|
if ($this->trimmedPrevLine === '' |
357
|
|
|
&& preg_match(static::RE_MD_RULE, $this->trimmedLine) |
358
|
|
|
) { |
359
|
|
|
$this->markup .= "\n<hr />"; |
360
|
|
|
|
361
|
|
|
return true; |
362
|
|
|
} |
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
protected function listt() |
366
|
|
|
{ |
367
|
|
|
$isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']); |
368
|
|
|
|
369
|
|
|
if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) { |
370
|
|
|
$wrapper = $isUl ? 'ul' : 'ol'; |
371
|
|
|
|
372
|
|
|
if (!$this->inList) { |
373
|
|
|
$this->stackList[] = "</$wrapper>"; |
374
|
|
|
$this->markup .= "\n<$wrapper>\n"; |
375
|
|
|
$this->inList = true; |
376
|
|
|
|
377
|
|
|
++$this->listLevel; |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
$this->markup .= '<li>' . ltrim($this->trimmedLine, '-*0123456789. '); |
381
|
|
|
|
382
|
|
|
$this->listInternal(); |
383
|
|
|
|
384
|
|
|
return true; |
385
|
|
|
} |
386
|
|
|
} |
387
|
|
|
|
388
|
|
|
protected function listInternal() |
389
|
|
|
{ |
390
|
|
|
$isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']); |
391
|
|
|
|
392
|
|
|
if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) { |
393
|
|
|
$wrapper = $isUl ? 'ul' : 'ol'; |
394
|
|
|
if ($this->nextIndent > $this->indent) { |
395
|
|
|
$this->stackList[] = "</li>\n"; |
396
|
|
|
$this->stackList[] = "</$wrapper>"; |
397
|
|
|
$this->markup .= "\n<$wrapper>\n"; |
398
|
|
|
|
399
|
|
|
++$this->listLevel; |
400
|
|
|
} else { |
401
|
|
|
$this->markup .= "</li>\n"; |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
if ($this->nextIndent < $this->indent) { |
405
|
|
|
$shift = intval(($this->indent - $this->nextIndent) / 4); |
406
|
|
|
|
407
|
|
|
while ($shift--) { |
408
|
|
|
$this->markup .= array_pop($this->stackList); |
409
|
|
|
|
410
|
|
|
if ($this->listLevel > 2) { |
411
|
|
|
$this->markup .= array_pop($this->stackList); |
412
|
|
|
} |
413
|
|
|
} |
414
|
|
|
} |
415
|
|
|
} else { |
416
|
|
|
$this->markup .= "</li>\n"; |
417
|
|
|
} |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
protected function table() |
421
|
|
|
{ |
422
|
|
|
static $headerCount = 0; |
423
|
|
|
|
424
|
|
|
if (!$this->inTable) { |
425
|
|
|
$headerCount = substr_count(trim($this->trimmedLine, '|'), '|'); |
426
|
|
|
|
427
|
|
|
return $this->tableInternal($headerCount); |
428
|
|
|
} |
429
|
|
|
|
430
|
|
|
$this->markup .= "<tr>\n"; |
431
|
|
|
|
432
|
|
|
foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) { |
433
|
|
|
if ($i > $headerCount) { |
434
|
|
|
break; |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
$col = trim($col); |
438
|
|
|
$this->markup .= "<td>{$col}</td>\n"; |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
$this->markup .= "</tr>\n"; |
442
|
|
|
|
443
|
|
|
if (empty($this->trimmedNextLine) |
444
|
|
|
|| !substr_count(trim($this->trimmedNextLine, '|'), '|') |
445
|
|
|
) { |
446
|
|
|
$headerCount = 0; |
447
|
|
|
$this->inTable = false; |
448
|
|
|
$this->stackTable[] = "</tbody>\n</table>"; |
449
|
|
|
} |
450
|
|
|
|
451
|
|
|
return true; |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
protected function tableInternal($headerCount) |
455
|
|
|
{ |
456
|
|
|
$columnCount = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|')); |
457
|
|
|
|
458
|
|
|
if ($headerCount > 0 && $headerCount <= $columnCount) { |
459
|
|
|
++$this->pointer; |
460
|
|
|
|
461
|
|
|
$this->inTable = true; |
462
|
|
|
$this->markup .= "<table>\n<thead>\n<tr>\n"; |
463
|
|
|
$this->trimmedLine = trim($this->trimmedLine, '|'); |
464
|
|
|
|
465
|
|
|
foreach (explode('|', $this->trimmedLine) as $hdr) { |
466
|
|
|
$this->markup .= '<th>' . trim($hdr) . "</th>\n"; |
467
|
|
|
} |
468
|
|
|
|
469
|
|
|
$this->markup .= "</tr>\n</thead>\n<tbody>\n"; |
470
|
|
|
|
471
|
|
|
return true; |
472
|
|
|
} |
473
|
|
|
} |
474
|
|
|
|
475
|
|
|
protected function paragraph() |
476
|
|
|
{ |
477
|
|
|
$this->markup .= $this->inPara ? "\n<br />" : "\n<p>"; |
478
|
|
|
$this->markup .= $this->trimmedLine; |
479
|
|
|
|
480
|
|
|
if (empty($this->trimmedNextLine)) { |
481
|
|
|
$this->markup .= '</p>'; |
482
|
|
|
$this->inPara = false; |
483
|
|
|
} else { |
484
|
|
|
$this->inPara = true; |
485
|
|
|
} |
486
|
|
|
} |
487
|
|
|
} |
488
|
|
|
|
This check looks for function or method calls that always return null and whose return value is used.
The method
getObject()
can return nothing but null, so it makes no sense to use the return value.The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.