1 | <?php |
||
2 | |||
3 | namespace Ahc; |
||
4 | |||
5 | /** |
||
6 | * HtmlUp - A **lightweight** and **fast** `markdown` to HTML Parser. |
||
7 | * |
||
8 | * Supports most of the markdown specs except deep nested elements. |
||
9 | * Check readme.md for the details of its features and limitations. |
||
10 | * **Crazy Part:** it is _single class_, _single function_ library. |
||
11 | * because hey! construct() and toString() are magics |
||
12 | * |
||
13 | * @author adhocore | Jitendra Adhikari <[email protected]> |
||
14 | * @copyright (c) 2014 Jitendra Adhikari |
||
15 | */ |
||
16 | class HtmlUp |
||
17 | { |
||
18 | private $Lines; |
||
19 | |||
20 | private $Pointer = -1; |
||
21 | |||
22 | public function __construct($markdown) |
||
23 | { |
||
24 | // some normalisations |
||
25 | $this->Lines = |
||
26 | explode("\n", # the lines ! |
||
27 | trim( # trim trailing \n |
||
28 | str_replace(array("\r\n", "\r"), "\n", # use standard newline |
||
29 | str_replace("\t", ' ', $markdown) # use 4 spaces for tab |
||
30 | ), "\n" |
||
31 | ) |
||
32 | ); |
||
33 | |||
34 | // Pad if NOT empty. Good for early return @self::parse() |
||
35 | if (false === empty($this->Lines)) { |
||
36 | array_unshift($this->Lines, ''); |
||
37 | $this->Lines[] = ''; |
||
38 | } |
||
39 | |||
40 | unset($markdown); |
||
41 | } |
||
42 | |||
43 | public function __toString() |
||
44 | { |
||
45 | return $this->parse(); |
||
46 | } |
||
47 | |||
48 | public function parse() |
||
49 | { |
||
50 | if (empty($this->Lines)) { |
||
51 | return ''; |
||
52 | } |
||
53 | |||
54 | $markup = ''; |
||
55 | $nestLevel = $quoteLevel = 0; |
||
56 | $indent = $nextIndent = 0; |
||
57 | $stackList = $stackBlock = $stackTable = array(); |
||
58 | $lastPointer = count($this->Lines) - 1; |
||
59 | |||
60 | while (isset($this->Lines[++$this->Pointer])) { |
||
61 | $line = $this->Lines[$this->Pointer]; |
||
62 | $trimmedLine = trim($line); |
||
63 | |||
64 | // flush stacks at the end of block |
||
65 | if (empty($trimmedLine)) { |
||
66 | while ($stackList) { |
||
67 | $markup .= array_pop($stackList); |
||
68 | } |
||
69 | while ($stackBlock) { |
||
70 | $markup .= array_pop($stackBlock); |
||
71 | } |
||
72 | while ($stackTable) { |
||
73 | $markup .= array_pop($stackTable); |
||
74 | } |
||
75 | |||
76 | $markup .= "\n"; |
||
77 | |||
78 | $inList = $inQuote = $inPara = $inHtml = null; |
||
79 | $nestLevel = $quoteLevel = 0; |
||
80 | continue; |
||
81 | } |
||
82 | |||
83 | // raw html |
||
84 | if (preg_match('/^<\/?\w.*?\/?>/', $trimmedLine) or |
||
85 | isset($inHtml) |
||
86 | ) { |
||
87 | $markup .= "\n$line"; |
||
88 | if (empty($inHtml) and |
||
89 | empty($this->Lines[$this->Pointer-1]) |
||
90 | ) { |
||
91 | $inHtml = true; |
||
92 | } |
||
93 | continue; |
||
94 | } |
||
95 | |||
96 | $nextLine = $this->Pointer < $lastPointer |
||
97 | ? $this->Lines[$this->Pointer + 1] |
||
98 | : null; |
||
99 | $trimmedNextLine = $nextLine ? trim($nextLine) : null; |
||
100 | |||
101 | $indent = strlen($line) - strlen(ltrim($line)); |
||
102 | $nextIndent = $nextLine ? strlen($nextLine) - strlen(ltrim($nextLine)) : 0; |
||
103 | |||
104 | $nextMark1 = isset($trimmedNextLine[0]) ? $trimmedNextLine[0] : null; |
||
105 | $nextMark12 = $trimmedNextLine ? substr($trimmedNextLine, 0, 2) : null; |
||
106 | |||
107 | // blockquote |
||
108 | if (preg_match('~^\s*(>+)\s+~', $line, $quoteMatch)) { |
||
109 | $line = substr($line, strlen($quoteMatch[0])); |
||
110 | $trimmedLine = trim($line); |
||
111 | if (empty($inQuote) or $quoteLevel < strlen($quoteMatch[1])) { |
||
112 | $markup .= "\n<blockquote>"; |
||
113 | $stackBlock[] = "\n</blockquote>"; |
||
114 | ++$quoteLevel; |
||
115 | } |
||
116 | $inQuote = true; |
||
117 | } |
||
118 | |||
119 | $mark1 = $trimmedLine[0]; |
||
120 | $mark12 = substr($trimmedLine, 0, 2); |
||
121 | |||
122 | // atx |
||
123 | if ($mark1 === '#') { |
||
124 | $level = strlen($trimmedLine) - strlen(ltrim($trimmedLine, '#')); |
||
125 | if ($level < 7) { |
||
126 | $markup .= "\n<h{$level}>".ltrim($trimmedLine, '# ')."</h{$level}>"; |
||
127 | continue; |
||
128 | } |
||
129 | } |
||
130 | |||
131 | // setext |
||
132 | if (preg_match('~^\s*(={3,}|-{3,})\s*$~', $nextLine)) { |
||
133 | $level = trim($nextLine, '- ') === '' ? '2' : '1'; |
||
134 | $markup .= "\n<h{$level}>{$trimmedLine}</h{$level}>"; |
||
135 | ++$this->Pointer; |
||
136 | continue; |
||
137 | } |
||
138 | |||
139 | // fence code |
||
140 | if ($codeBlock = preg_match('/^```\s*([\w-]+)?/', $line, $codeMatch) |
||
141 | or (empty($inList) and empty($inQuote) and $indent >= 4) |
||
142 | ) { |
||
143 | $lang = ($codeBlock and isset($codeMatch[1])) |
||
144 | ? " class=\"language-{$codeMatch[1]}\" " |
||
145 | : ''; |
||
146 | $markup .= "\n<pre><code{$lang}>"; |
||
147 | if (!$codeBlock) { |
||
148 | $markup .= htmlspecialchars(substr($line, 4)); |
||
149 | } |
||
150 | |||
151 | while (isset($this->Lines[$this->Pointer + 1]) and |
||
152 | (($line = htmlspecialchars($this->Lines[$this->Pointer + 1])) or true) and |
||
153 | (($codeBlock and substr(ltrim($line), 0, 3) !== '```') or substr($line, 0, 4) === ' ') |
||
154 | ) { |
||
155 | $markup .= "\n"; # @todo: donot use \n for first line |
||
156 | $markup .= $codeBlock ? $line : substr($line, 4); |
||
157 | ++$this->Pointer; |
||
158 | } |
||
159 | ++$this->Pointer; |
||
160 | $markup .= '</code></pre>'; |
||
161 | continue; |
||
162 | } |
||
163 | |||
164 | // rule |
||
165 | if (isset($this->Lines[$this->Pointer - 1]) and |
||
166 | trim($this->Lines[$this->Pointer - 1]) === '' and |
||
167 | preg_match('~^(_{3,}|\*{3,}|\-{3,})$~', $trimmedLine) |
||
168 | ) { |
||
169 | $markup .= "\n<hr />"; |
||
170 | continue; |
||
171 | } |
||
172 | |||
173 | // list |
||
174 | if ($ul = in_array($mark12, array('- ', '* ', '+ ')) or |
||
175 | preg_match('/^\d+\. /', $trimmedLine) |
||
176 | ) { |
||
177 | $wrapper = $ul ? 'ul' : 'ol'; |
||
178 | if (empty($inList)) { |
||
179 | $stackList[] = "</$wrapper>"; |
||
180 | $markup .= "\n<$wrapper>\n"; |
||
181 | $inList = true; |
||
182 | ++$nestLevel; |
||
183 | } |
||
184 | |||
185 | $markup .= '<li>'.ltrim($trimmedLine, '-*0123456789. '); |
||
186 | |||
187 | if ($ul = in_array($nextMark12, array('- ', '* ', '+ ')) or |
||
188 | preg_match('/^\d+\. /', $trimmedNextLine) |
||
189 | ) { |
||
190 | $wrapper = $ul ? 'ul' : 'ol'; |
||
191 | if ($nextIndent > $indent) { |
||
192 | $stackList[] = "</li>\n"; |
||
193 | $stackList[] = "</$wrapper>"; |
||
194 | $markup .= "\n<$wrapper>\n"; |
||
195 | ++$nestLevel; |
||
196 | } else { |
||
197 | $markup .= "</li>\n"; |
||
198 | } |
||
199 | |||
200 | // handle nested lists ending |
||
201 | if ($nextIndent < $indent) { |
||
202 | $shift = intval(($indent - $nextIndent) / 4); |
||
203 | while ($shift--) { |
||
204 | $markup .= array_pop($stackList); |
||
205 | if ($nestLevel > 2) { |
||
206 | $markup .= array_pop($stackList); |
||
207 | } |
||
208 | } |
||
209 | } |
||
210 | } else { |
||
211 | $markup .= "</li>\n"; |
||
212 | } |
||
213 | |||
214 | continue; |
||
215 | } |
||
216 | |||
217 | if (isset($inList)) { |
||
218 | $markup .= $trimmedLine; |
||
219 | continue; |
||
220 | } |
||
221 | |||
222 | // table |
||
223 | if (empty($inTable)) { |
||
224 | if ($hdrCt = substr_count(trim($trimmedLine, '|'), '|') and |
||
225 | $colCt = preg_match_all('~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~', trim($trimmedNextLine, '|')) and |
||
226 | $hdrCt <= $colCt |
||
227 | ) { |
||
228 | $inTable = true; |
||
229 | ++$this->Pointer; |
||
230 | $markup .= "<table>\n<thead>\n<tr>\n"; |
||
231 | $trimmedLine = trim($trimmedLine, '|'); |
||
232 | foreach (explode('|', $trimmedLine) as $hdr) { |
||
233 | $hdr = trim($hdr); |
||
234 | $markup .= "<th>{$hdr}</th>\n"; |
||
235 | } |
||
236 | $markup .= "</tr>\n</thead>\n<tbody>\n"; |
||
237 | continue; |
||
238 | } |
||
239 | } else { |
||
240 | $markup .= "<tr>\n"; |
||
241 | foreach (explode('|', trim($trimmedLine, '|')) as $i => $col) { |
||
242 | if ($i > $hdrCt) { |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
![]() |
|||
243 | break; |
||
244 | } |
||
245 | $col = trim($col); |
||
246 | $markup .= "<td>{$col}</td>\n"; |
||
247 | } |
||
248 | $markup .= "</tr>\n"; |
||
249 | if (empty($trimmedNextLine) or |
||
250 | !substr_count(trim($trimmedNextLine, '|'), '|') |
||
251 | ) { |
||
252 | $inTable = null; |
||
253 | $stackTable[] = "</tbody>\n</table>"; |
||
254 | } |
||
255 | |||
256 | continue; |
||
257 | } |
||
258 | |||
259 | // paragraph |
||
260 | if (empty($inPara)) { |
||
261 | $markup .= "\n<p>"; |
||
262 | } else { |
||
263 | $markup .= "\n<br />"; |
||
264 | } |
||
265 | $markup .= "{$trimmedLine}"; |
||
266 | if (empty($trimmedNextLine)) { |
||
267 | $markup .= '</p>'; |
||
268 | $inPara = null; |
||
269 | } else { |
||
270 | $inPara = true; |
||
271 | } |
||
272 | } |
||
273 | |||
274 | // urls |
||
275 | $markup = preg_replace( |
||
276 | '~<(https?:[\/]{2}[^\s]+?)>~', |
||
277 | '<a href="$1">$1</a>', |
||
278 | $markup |
||
279 | ); |
||
280 | |||
281 | // emails |
||
282 | $markup = preg_replace( |
||
283 | '~<(\S+?@\S+?)>~', |
||
284 | '<a href="mailto:$1">$1</a>', |
||
285 | $markup |
||
286 | ); |
||
287 | |||
288 | // images |
||
289 | $markup = preg_replace_callback('~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~', function ($img) { |
||
290 | $title = isset($img[3]) ? " title={$img[3]} " : ''; |
||
291 | $alt = $img[1] ? " alt=\"{$img[1]}\" " : ''; |
||
292 | |||
293 | return "<img src=\"{$img[2]}\"{$title}{$alt}/>"; |
||
294 | }, $markup); |
||
295 | |||
296 | // anchors |
||
297 | $markup = preg_replace_callback('~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~', function ($a) { |
||
298 | $title = isset($a[3]) ? " title={$a[3]} " : ''; |
||
299 | |||
300 | return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>"; |
||
301 | }, $markup); |
||
302 | |||
303 | // em/code/strong/del |
||
304 | $markup = preg_replace_callback('!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!', function ($em) { |
||
305 | switch (true) { |
||
306 | case substr($em[1], 0, 2) === '**': |
||
307 | case substr($em[1], 0, 2) === '__': |
||
308 | $tag = 'strong'; |
||
309 | break; |
||
310 | case substr($em[1], 0, 2) === '~~': |
||
311 | $tag = 'del'; |
||
312 | break; |
||
313 | case $em[1] === '*': case $em[1] === '_': |
||
314 | $tag = 'em'; |
||
315 | break; |
||
316 | default: |
||
317 | $tag = 'code'; |
||
318 | $em[2] = htmlspecialchars($em[2]); |
||
319 | } |
||
320 | |||
321 | return "<$tag>{$em[2]}</$tag>"; |
||
322 | }, $markup); |
||
323 | |||
324 | return $markup; |
||
325 | } |
||
326 | } |
||
327 |