1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Ahc; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser. |
7
|
|
|
* |
8
|
|
|
* Supports most of the markdown specs except deep nested elements. |
9
|
|
|
* Check readme.md for the details of its features && limitations. |
10
|
|
|
* |
11
|
|
|
* @author adhocore | Jitendra Adhikari <[email protected]> |
12
|
|
|
* @copyright (c) 2014 Jitendra Adhikari |
13
|
|
|
*/ |
14
|
|
|
class HtmlUp |
15
|
|
|
{ |
16
|
|
|
const RE_URL = '~<(https?:[\/]{2}[^\s]+?)>~'; |
17
|
|
|
const RE_RAW = '/^<\/?\w.*?\/?>/'; |
18
|
|
|
const RE_EMAIL = '~<(\S+?@\S+?)>~'; |
19
|
|
|
const RE_MD_IMG = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~'; |
20
|
|
|
const RE_MD_URL = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~'; |
21
|
|
|
const RE_MD_FONT = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!'; |
22
|
|
|
const RE_MD_QUOTE = '~^\s*(>+)\s+~'; |
23
|
|
|
const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~'; |
24
|
|
|
const RE_MD_CODE = '/^```\s*([\w-]+)?/'; |
25
|
|
|
const RE_MD_RULE = '~^(_{3,}|\*{3,}|\-{3,})$~'; |
26
|
|
|
const RE_MD_TCOL = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~'; |
27
|
|
|
const RE_MD_OL = '/^\d+\. /'; |
28
|
|
|
|
29
|
|
|
protected $lines = []; |
30
|
|
|
protected $stackList = []; |
31
|
|
|
protected $stackBlock = []; |
32
|
|
|
protected $stackTable = []; |
33
|
|
|
|
34
|
|
|
protected $pointer = -1; |
35
|
|
|
protected $listLevel = 0; |
36
|
|
|
protected $quoteLevel = 0; |
37
|
|
|
protected $indent = 0; |
38
|
|
|
protected $nextIndent = 0; |
39
|
|
|
|
40
|
|
|
protected $indentStr = ''; |
41
|
|
|
protected $line = ''; |
42
|
|
|
protected $trimmedLine = ''; |
43
|
|
|
protected $prevLine = ''; |
44
|
|
|
protected $trimmedPrevLine = ''; |
45
|
|
|
protected $nextLine = ''; |
46
|
|
|
protected $trimmedNextLine = ''; |
47
|
|
|
protected $markup = ''; |
48
|
|
|
|
49
|
|
|
protected $inList = false; |
50
|
|
|
protected $inQuote = false; |
51
|
|
|
protected $inPara = false; |
52
|
|
|
protected $inHtml = false; |
53
|
|
|
protected $inTable = false; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Constructor. |
57
|
|
|
* |
58
|
|
|
* @param string $markdown |
59
|
|
|
*/ |
60
|
|
|
public function __construct($markdown = null, $indentWidth = 4) |
61
|
|
|
{ |
62
|
|
|
$this->indentStr = $indentWidth == 2 ? ' ' : ' '; |
63
|
|
|
|
64
|
|
|
if (null !== $markdown) { |
65
|
|
|
$this->scan($markdown); |
66
|
|
|
} |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
protected function scan($markdown) |
70
|
|
|
{ |
71
|
|
|
if ('' === trim($markdown)) { |
72
|
|
|
return; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
// Normalize whitespaces |
76
|
|
|
$markdown = str_replace("\t", $this->indentStr, $markdown); |
77
|
|
|
$markdown = str_replace(["\r\n", "\r"], "\n", $markdown); |
78
|
|
|
|
79
|
|
|
$this->lines = array_merge([''], explode("\n", $markdown), ['']); |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
public function __toString() |
83
|
|
|
{ |
84
|
|
|
return $this->parse(); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
public function parse($markdown = null) |
88
|
|
|
{ |
89
|
|
|
if (null !== $markdown) { |
90
|
|
|
$this->reset(true); |
91
|
|
|
|
92
|
|
|
$this->scan($markdown); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
if ([] === $this->lines) { |
96
|
|
|
return ''; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
$this->parseBlockElements(); |
100
|
|
|
$this->parseSpanElements(); |
101
|
|
|
|
102
|
|
|
return $this->markup; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
protected function parseBlockElements() |
106
|
|
|
{ |
107
|
|
|
while (isset($this->lines[++$this->pointer])) { |
108
|
|
|
list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine]; |
109
|
|
|
|
110
|
|
|
$this->line = $this->lines[$this->pointer]; |
111
|
|
|
$this->trimmedLine = trim($this->line); |
112
|
|
|
|
113
|
|
|
if ($this->flush() || $this->raw()) { |
114
|
|
|
continue; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
$this->indent = strlen($this->line) - strlen(ltrim($this->line)); |
118
|
|
|
$this->nextLine = isset($this->lines[$this->pointer + 1]) |
119
|
|
|
? $this->lines[$this->pointer + 1] |
120
|
|
|
: ''; |
121
|
|
|
$this->trimmedNextLine = trim($this->nextLine); |
122
|
|
|
$this->nextIndent = strlen($this->nextLine) - strlen(ltrim($this->nextLine)); |
123
|
|
|
|
124
|
|
|
$this->quote(); |
125
|
|
|
|
126
|
|
|
if ($this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt()) { |
127
|
|
|
continue; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
if ($this->inList) { |
131
|
|
|
$this->markup .= $this->trimmedLine; |
132
|
|
|
|
133
|
|
|
continue; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
$this->table() || $this->paragraph(); |
|
|
|
|
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
protected function parseSpanElements() |
141
|
|
|
{ |
142
|
|
|
$this->links(); |
143
|
|
|
|
144
|
|
|
$this->anchors(); |
145
|
|
|
|
146
|
|
|
$this->spans(); |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
protected function links() |
150
|
|
|
{ |
151
|
|
|
// URLs. |
152
|
|
|
$this->markup = preg_replace( |
153
|
|
|
static::RE_URL, |
154
|
|
|
'<a href="$1">$1</a>', |
155
|
|
|
$this->markup |
156
|
|
|
); |
157
|
|
|
|
158
|
|
|
// Emails. |
159
|
|
|
$this->markup = preg_replace( |
160
|
|
|
static::RE_EMAIL, |
161
|
|
|
'<a href="mailto:$1">$1</a>', |
162
|
|
|
$this->markup |
163
|
|
|
); |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
protected function anchors() |
167
|
|
|
{ |
168
|
|
|
// Images. |
169
|
|
|
$this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) { |
170
|
|
|
$title = isset($img[3]) ? " title={$img[3]} " : ''; |
171
|
|
|
$alt = $img[1] ? " alt=\"{$img[1]}\" " : ''; |
172
|
|
|
|
173
|
|
|
return "<img src=\"{$img[2]}\"{$title}{$alt}/>"; |
174
|
|
|
}, $this->markup); |
175
|
|
|
|
176
|
|
|
// Anchors. |
177
|
|
|
$this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) { |
178
|
|
|
$title = isset($a[3]) ? " title={$a[3]} " : ''; |
179
|
|
|
|
180
|
|
|
return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>"; |
181
|
|
|
}, $this->markup); |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
protected function spans() |
185
|
|
|
{ |
186
|
|
|
// em/code/strong/del |
187
|
|
|
$this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) { |
188
|
|
|
switch (substr($em[1], 0, 2)) { |
189
|
|
|
case '**': |
190
|
|
|
case '__': |
191
|
|
|
$tag = 'strong'; |
192
|
|
|
break; |
193
|
|
|
|
194
|
|
|
case '~~': |
195
|
|
|
$tag = 'del'; |
196
|
|
|
break; |
197
|
|
|
|
198
|
|
|
case $em[1] === '*': |
199
|
|
|
case $em[1] === '_': |
200
|
|
|
$tag = 'em'; |
201
|
|
|
break; |
202
|
|
|
|
203
|
|
|
default: |
204
|
|
|
$tag = 'code'; |
205
|
|
|
$em[2] = htmlspecialchars($em[2]); |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
return "<$tag>{$em[2]}</$tag>"; |
209
|
|
|
}, $this->markup); |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
protected function escape($input) |
213
|
|
|
{ |
214
|
|
|
return htmlspecialchars($input); |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
protected function reset($all = false) |
218
|
|
|
{ |
219
|
|
|
$except = $all ? [] : array_fill_keys(['lines', 'pointer', 'markup'], true); |
220
|
|
|
|
221
|
|
|
// Reset all current values. |
222
|
|
|
foreach (get_class_vars(__CLASS__) as $prop => $value) { |
223
|
|
|
isset($except[$prop]) || $this->{$prop} = $value; |
224
|
|
|
} |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
protected function flush() |
228
|
|
|
{ |
229
|
|
|
if ('' !== $this->trimmedLine) { |
230
|
|
|
return false; |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
while ($this->stackList) { |
|
|
|
|
234
|
|
|
$this->markup .= array_pop($this->stackList); |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
while ($this->stackBlock) { |
|
|
|
|
238
|
|
|
$this->markup .= array_pop($this->stackBlock); |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
while ($this->stackTable) { |
|
|
|
|
242
|
|
|
$this->markup .= array_pop($this->stackTable); |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
$this->markup .= "\n"; |
246
|
|
|
|
247
|
|
|
$this->reset(false); |
248
|
|
|
|
249
|
|
|
return true; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
protected function raw() |
253
|
|
|
{ |
254
|
|
|
if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) { |
255
|
|
|
$this->markup .= "\n$this->line"; |
256
|
|
|
if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) { |
257
|
|
|
$this->inHtml = true; |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
return true; |
261
|
|
|
} |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
protected function quote() |
265
|
|
|
{ |
266
|
|
|
if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) { |
267
|
|
|
$this->line = substr($this->line, strlen($quoteMatch[0])); |
268
|
|
|
$this->trimmedLine = trim($this->line); |
269
|
|
|
|
270
|
|
|
if (!$this->inQuote || $quoteLevel < strlen($quoteMatch[1])) { |
|
|
|
|
271
|
|
|
$this->markup .= "\n<blockquote>"; |
272
|
|
|
|
273
|
|
|
$stackBlock[] = "\n</blockquote>"; |
|
|
|
|
274
|
|
|
|
275
|
|
|
++$this->quoteLevel; |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
return $this->inQuote = true; |
279
|
|
|
} |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
protected function atx() |
283
|
|
|
{ |
284
|
|
|
if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') { |
285
|
|
|
$level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#')); |
286
|
|
|
|
287
|
|
|
if ($level < 7) { |
288
|
|
|
$this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>"; |
289
|
|
|
|
290
|
|
|
return true; |
291
|
|
|
} |
292
|
|
|
} |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
protected function setext() |
296
|
|
|
{ |
297
|
|
|
if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) { |
298
|
|
|
$level = trim($this->nextLine, '- ') === '' ? 2 : 1; |
299
|
|
|
|
300
|
|
|
$this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>"; |
301
|
|
|
|
302
|
|
|
++$this->pointer; |
303
|
|
|
|
304
|
|
|
return true; |
305
|
|
|
} |
306
|
|
|
} |
307
|
|
|
|
308
|
|
|
protected function code() |
309
|
|
|
{ |
310
|
|
|
$codeBlock = preg_match(static::RE_MD_CODE, $this->line, $codeMatch); |
311
|
|
|
|
312
|
|
|
if ($codeBlock || (empty($this->inList) && empty($this->inQuote) && $this->indent >= 4)) { |
313
|
|
|
$lang = isset($codeMatch[1]) |
314
|
|
|
? ' class="language-' . $codeMatch[1] . '"' |
315
|
|
|
: ''; |
316
|
|
|
|
317
|
|
|
$this->markup .= "\n<pre><code{$lang}>"; |
318
|
|
|
|
319
|
|
|
if (!$codeBlock) { |
320
|
|
|
$this->markup .= $this->escape(substr($this->line, 4)); |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
while (isset($this->lines[$this->pointer + 1])) { |
324
|
|
|
$this->line = $this->escape($this->lines[$this->pointer + 1]); |
325
|
|
|
|
326
|
|
|
if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```') |
327
|
|
|
|| substr($this->line, 0, 4) === $this->indentStr |
328
|
|
|
) { |
329
|
|
|
$this->markup .= "\n"; // @todo: donot use \n for first line |
330
|
|
|
$this->markup .= $codeBlock ? $this->line : substr($this->line, 4); |
331
|
|
|
|
332
|
|
|
++$this->pointer; |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
++$this->pointer; |
337
|
|
|
|
338
|
|
|
$this->markup .= '</code></pre>'; |
339
|
|
|
|
340
|
|
|
return true; |
341
|
|
|
} |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
protected function rule() |
345
|
|
|
{ |
346
|
|
|
if ($this->trimmedPrevLine === '' |
347
|
|
|
&& preg_match(static::RE_MD_RULE, $this->trimmedLine) |
348
|
|
|
) { |
349
|
|
|
$this->markup .= "\n<hr />"; |
350
|
|
|
|
351
|
|
|
return true; |
352
|
|
|
} |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
protected function listt() |
356
|
|
|
{ |
357
|
|
|
$isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']); |
358
|
|
|
|
359
|
|
|
if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) { |
360
|
|
|
$wrapper = $isUl ? 'ul' : 'ol'; |
361
|
|
|
|
362
|
|
|
if (!$this->inList) { |
363
|
|
|
$this->stackList[] = "</$wrapper>"; |
364
|
|
|
$this->markup .= "\n<$wrapper>\n"; |
365
|
|
|
$this->inList = true; |
366
|
|
|
|
367
|
|
|
++$this->listLevel; |
368
|
|
|
} |
369
|
|
|
|
370
|
|
|
$this->markup .= '<li>' . ltrim($this->trimmedLine, '-*0123456789. '); |
371
|
|
|
|
372
|
|
|
$isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']); |
373
|
|
|
|
374
|
|
|
if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) { |
375
|
|
|
$wrapper = $isUl ? 'ul' : 'ol'; |
376
|
|
|
if ($this->nextIndent > $this->indent) { |
377
|
|
|
$this->stackList[] = "</li>\n"; |
378
|
|
|
$this->stackList[] = "</$wrapper>"; |
379
|
|
|
$this->markup .= "\n<$wrapper>\n"; |
380
|
|
|
|
381
|
|
|
++$this->listLevel; |
382
|
|
|
} else { |
383
|
|
|
$this->markup .= "</li>\n"; |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
if ($this->nextIndent < $this->indent) { |
387
|
|
|
$shift = intval(($this->indent - $this->nextIndent) / 4); |
388
|
|
|
while ($shift--) { |
389
|
|
|
$this->markup .= array_pop($this->stackList); |
390
|
|
|
if ($this->nestLevel > 2) { |
|
|
|
|
391
|
|
|
$this->markup .= array_pop($this->stackList); |
392
|
|
|
} |
393
|
|
|
} |
394
|
|
|
} |
395
|
|
|
} else { |
396
|
|
|
$this->markup .= "</li>\n"; |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
return true; |
400
|
|
|
} |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
protected function table() |
404
|
|
|
{ |
405
|
|
|
static $hdrCt; |
406
|
|
|
|
407
|
|
|
if (!$this->inTable) { |
408
|
|
|
$hdrCt = substr_count(trim($this->trimmedLine, '|'), '|'); |
409
|
|
|
$colCt = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|')); |
410
|
|
|
|
411
|
|
|
if ($hdrCt > 0 && $colCt > 0 && $hdrCt <= $colCt) { |
412
|
|
|
++$this->pointer; |
413
|
|
|
|
414
|
|
|
$this->inTable = true; |
415
|
|
|
$this->markup .= "<table>\n<thead>\n<tr>\n"; |
416
|
|
|
$this->trimmedLine = trim($this->trimmedLine, '|'); |
417
|
|
|
|
418
|
|
|
foreach (explode('|', $this->trimmedLine) as $hdr) { |
419
|
|
|
$this->markup .= '<th>' . trim($hdr) . "</th>\n"; |
420
|
|
|
} |
421
|
|
|
|
422
|
|
|
$this->markup .= "</tr>\n</thead>\n<tbody>\n"; |
423
|
|
|
|
424
|
|
|
return true; |
425
|
|
|
} |
426
|
|
|
} else { |
427
|
|
|
$this->markup .= "<tr>\n"; |
428
|
|
|
|
429
|
|
|
foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) { |
430
|
|
|
if ($i > $hdrCt) { |
431
|
|
|
break; |
432
|
|
|
} |
433
|
|
|
$col = trim($col); |
434
|
|
|
$this->markup .= "<td>{$col}</td>\n"; |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
$this->markup .= "</tr>\n"; |
438
|
|
|
|
439
|
|
|
if (empty($this->trimmedNextLine) || !substr_count(trim($this->trimmedNextLine, '|'), '|')) { |
440
|
|
|
$hdrCt = 0; |
441
|
|
|
$this->inTable = false; |
442
|
|
|
$this->stackTable[] = "</tbody>\n</table>"; |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
return true; |
446
|
|
|
} |
447
|
|
|
} |
448
|
|
|
|
449
|
|
|
protected function paragraph() |
450
|
|
|
{ |
451
|
|
|
$this->markup .= $this->inPara ? "\n<br />" : "\n<p>"; |
452
|
|
|
$this->markup .= $this->trimmedLine; |
453
|
|
|
|
454
|
|
|
if (empty($this->trimmedNextLine)) { |
455
|
|
|
$this->markup .= '</p>'; |
456
|
|
|
$this->inPara = false; |
457
|
|
|
} else { |
458
|
|
|
$this->inPara = true; |
459
|
|
|
} |
460
|
|
|
} |
461
|
|
|
} |
462
|
|
|
|
This check looks for function or method calls that always return null and whose return value is used.
The method
getObject()
can return nothing but null, so it makes no sense to use the return value.The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.