Completed
Push — master ( 3fea26...b1717e )
by Jitendra
13s
created

src/HtmlUp.php (1 issue)

1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** and **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features and limitations.
10
 * **Crazy Part:** it is _single class_, _single function_ library.
11
 *                 because hey! construct() and toString() are magics
12
 *
13
 * @author adhocore | Jitendra Adhikari <[email protected]>
14
 * @copyright (c) 2014 Jitendra Adhikari
15
 */
16
class HtmlUp
17
{
18
    private $Lines;
19
20
    private $Pointer = -1;
21
22
    public function __construct($markdown)
23
    {
24
        // some normalisations
25
        $this->Lines =
26
            explode("\n",   # the lines !
27
                trim(       # trim trailing \n
28
                    str_replace(array("\r\n", "\r"), "\n",   # use standard newline
29
                        str_replace("\t", '    ', $markdown) # use 4 spaces for tab
30
                    ), "\n"
31
                )
32
            );
33
34
        // Pad if NOT empty. Good for early return @self::parse()
35
        if (false === empty($this->Lines)) {
36
            array_unshift($this->Lines, '');
37
            $this->Lines[] = '';
38
        }
39
40
        unset($markdown);
41
    }
42
43
    public function __toString()
44
    {
45
        return $this->parse();
46
    }
47
48
    public function parse()
49
    {
50
        if (empty($this->Lines)) {
51
            return '';
52
        }
53
54
        $markup = '';
55
        $nestLevel = $quoteLevel = 0;
56
        $indent = $nextIndent = 0;
57
        $stackList = $stackBlock = $stackTable = array();
58
        $lastPointer = count($this->Lines) - 1;
59
60
        while (isset($this->Lines[++$this->Pointer])) {
61
            $line = $this->Lines[$this->Pointer];
62
            $trimmedLine = trim($line);
63
64
            // flush stacks at the end of block
65
            if (empty($trimmedLine)) {
66
                while ($stackList) {
67
                    $markup .= array_pop($stackList);
68
                }
69
                while ($stackBlock) {
70
                    $markup .= array_pop($stackBlock);
71
                }
72
                while ($stackTable) {
73
                    $markup .= array_pop($stackTable);
74
                }
75
76
                $markup .= "\n";
77
78
                $inList = $inQuote = $inPara = $inHtml = null;
79
                $nestLevel = $quoteLevel = 0;
80
                continue;
81
            }
82
83
            // raw html
84
            if (preg_match('/^<\/?\w.*?\/?>/', $trimmedLine) or
85
                isset($inHtml)
86
            ) {
87
                $markup .= "\n$line";
88
                if (empty($inHtml) and
89
                    empty($this->Lines[$this->Pointer-1])
90
                ) {
91
                    $inHtml = true;
92
                }
93
                continue;
94
            }
95
96
            $nextLine = $this->Pointer < $lastPointer
97
                ? $this->Lines[$this->Pointer + 1]
98
                : null;
99
            $trimmedNextLine = $nextLine ? trim($nextLine) : null;
100
101
            $indent = strlen($line) - strlen(ltrim($line));
102
            $nextIndent = $nextLine ? strlen($nextLine) - strlen(ltrim($nextLine)) : 0;
103
104
            $nextMark1 = isset($trimmedNextLine[0]) ? $trimmedNextLine[0] : null;
105
            $nextMark12 = $trimmedNextLine ? substr($trimmedNextLine, 0, 2) : null;
106
107
            // blockquote
108
            if (preg_match('~^\s*(>+)\s+~', $line, $quoteMatch)) {
109
                $line = substr($line, strlen($quoteMatch[0]));
110
                $trimmedLine = trim($line);
111
                if (empty($inQuote) or $quoteLevel < strlen($quoteMatch[1])) {
112
                    $markup .= "\n<blockquote>";
113
                    $stackBlock[] = "\n</blockquote>";
114
                    ++$quoteLevel;
115
                }
116
                $inQuote = true;
117
            }
118
119
            $mark1 = $trimmedLine[0];
120
            $mark12 = substr($trimmedLine, 0, 2);
121
122
            // atx
123
            if ($mark1 === '#') {
124
                $level = strlen($trimmedLine) - strlen(ltrim($trimmedLine, '#'));
125
                if ($level < 7) {
126
                    $markup .= "\n<h{$level}>".ltrim($trimmedLine, '# ')."</h{$level}>";
127
                    continue;
128
                }
129
            }
130
131
            // setext
132
            if (preg_match('~^\s*(={3,}|-{3,})\s*$~', $nextLine)) {
133
                $level = trim($nextLine, '- ') === '' ? '2' : '1';
134
                $markup .= "\n<h{$level}>{$trimmedLine}</h{$level}>";
135
                ++$this->Pointer;
136
                continue;
137
            }
138
139
            // fence code
140
            if ($codeBlock = preg_match('/^```\s*([\w-]+)?/', $line, $codeMatch)
141
                or (empty($inList) and empty($inQuote) and $indent >= 4)
142
            ) {
143
                $lang = ($codeBlock and isset($codeMatch[1]))
144
                    ? " class=\"language-{$codeMatch[1]}\" "
145
                    : '';
146
                $markup .= "\n<pre><code{$lang}>";
147
                if (!$codeBlock) {
148
                    $markup .= htmlspecialchars(substr($line, 4));
149
                }
150
151
                while (isset($this->Lines[$this->Pointer + 1]) and
152
                    (($line = htmlspecialchars($this->Lines[$this->Pointer + 1])) or true) and
153
                    (($codeBlock and substr(ltrim($line), 0, 3) !== '```') or substr($line, 0, 4) === '    ')
154
                ) {
155
                    $markup .= "\n"; # @todo: donot use \n for first line
156
                    $markup .= $codeBlock ? $line : substr($line, 4);
157
                    ++$this->Pointer;
158
                }
159
                ++$this->Pointer;
160
                $markup .= '</code></pre>';
161
                continue;
162
            }
163
164
            // rule
165
            if (isset($this->Lines[$this->Pointer - 1]) and
166
                trim($this->Lines[$this->Pointer - 1]) === '' and
167
                preg_match('~^(_{3,}|\*{3,}|\-{3,})$~', $trimmedLine)
168
            ) {
169
                $markup .= "\n<hr />";
170
                continue;
171
            }
172
173
            // list
174
            if ($ul = in_array($mark12, array('- ', '* ', '+ ')) or
175
                preg_match('/^\d+\. /', $trimmedLine)
176
            ) {
177
                $wrapper = $ul ? 'ul' : 'ol';
178
                if (empty($inList)) {
179
                    $stackList[] = "</$wrapper>";
180
                    $markup .= "\n<$wrapper>\n";
181
                    $inList = true;
182
                    ++$nestLevel;
183
                }
184
185
                $markup .= '<li>'.ltrim($trimmedLine, '-*0123456789. ');
186
187
                if ($ul = in_array($nextMark12, array('- ', '* ', '+ ')) or
188
                    preg_match('/^\d+\. /', $trimmedNextLine)
189
                ) {
190
                    $wrapper = $ul ? 'ul' : 'ol';
191
                    if ($nextIndent > $indent) {
192
                        $stackList[] = "</li>\n";
193
                        $stackList[] = "</$wrapper>";
194
                        $markup .= "\n<$wrapper>\n";
195
                        ++$nestLevel;
196
                    } else {
197
                        $markup .= "</li>\n";
198
                    }
199
200
                    // handle nested lists ending
201
                    if ($nextIndent < $indent) {
202
                        $shift = intval(($indent - $nextIndent) / 4);
203
                        while ($shift--) {
204
                            $markup .= array_pop($stackList);
205
                            if ($nestLevel > 2) {
206
                                $markup .= array_pop($stackList);
207
                            }
208
                        }
209
                    }
210
                } else {
211
                    $markup .= "</li>\n";
212
                }
213
214
                continue;
215
            }
216
217
            if (isset($inList)) {
218
                $markup .= $trimmedLine;
219
                continue;
220
            }
221
222
            // table
223
            if (empty($inTable)) {
224
                if ($hdrCt = substr_count(trim($trimmedLine, '|'), '|') and
225
                    $colCt = preg_match_all('~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~', trim($trimmedNextLine, '|')) and
226
                    $hdrCt <= $colCt
227
                ) {
228
                    $inTable = true;
229
                    ++$this->Pointer;
230
                    $markup .= "<table>\n<thead>\n<tr>\n";
231
                    $trimmedLine = trim($trimmedLine, '|');
232
                    foreach (explode('|', $trimmedLine) as $hdr) {
233
                        $hdr = trim($hdr);
234
                        $markup .= "<th>{$hdr}</th>\n";
235
                    }
236
                    $markup .= "</tr>\n</thead>\n<tbody>\n";
237
                    continue;
238
                }
239
            } else {
240
                $markup .= "<tr>\n";
241
                foreach (explode('|', trim($trimmedLine, '|')) as $i => $col) {
242
                    if ($i > $hdrCt) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $hdrCt does not seem to be defined for all execution paths leading up to this point.
Loading history...
243
                        break;
244
                    }
245
                    $col = trim($col);
246
                    $markup .= "<td>{$col}</td>\n";
247
                }
248
                $markup .= "</tr>\n";
249
                if (empty($trimmedNextLine) or
250
                    !substr_count(trim($trimmedNextLine, '|'), '|')
251
                ) {
252
                    $inTable = null;
253
                    $stackTable[] = "</tbody>\n</table>";
254
                }
255
256
                continue;
257
            }
258
259
            // paragraph
260
            if (empty($inPara)) {
261
                $markup .= "\n<p>";
262
            } else {
263
                $markup .= "\n<br />";
264
            }
265
            $markup .= "{$trimmedLine}";
266
            if (empty($trimmedNextLine)) {
267
                $markup .= '</p>';
268
                $inPara = null;
269
            } else {
270
                $inPara = true;
271
            }
272
        }
273
274
        // urls
275
        $markup = preg_replace(
276
            '~<(https?:[\/]{2}[^\s]+?)>~',
277
            '<a href="$1">$1</a>',
278
            $markup
279
        );
280
281
        // emails
282
        $markup = preg_replace(
283
            '~<(\S+?@\S+?)>~',
284
            '<a href="mailto:$1">$1</a>',
285
            $markup
286
        );
287
288
        // images
289
        $markup = preg_replace_callback('~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~', function ($img) {
290
            $title = isset($img[3]) ? " title={$img[3]} " : '';
291
            $alt = $img[1] ? " alt=\"{$img[1]}\" " : '';
292
293
            return "<img src=\"{$img[2]}\"{$title}{$alt}/>";
294
        }, $markup);
295
296
        // anchors
297
        $markup = preg_replace_callback('~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~', function ($a) {
298
            $title = isset($a[3]) ? " title={$a[3]} " : '';
299
300
            return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>";
301
        }, $markup);
302
303
        // em/code/strong/del
304
        $markup = preg_replace_callback('!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!', function ($em) {
305
            switch (true) {
306
                case substr($em[1], 0, 2) === '**':
307
                case substr($em[1], 0, 2) === '__':
308
                    $tag = 'strong';
309
                    break;
310
                case substr($em[1], 0, 2) === '~~':
311
                    $tag = 'del';
312
                    break;
313
                case $em[1] === '*': case $em[1] === '_':
314
                    $tag = 'em';
315
                    break;
316
                default:
317
                    $tag = 'code';
318
                    $em[2] = htmlspecialchars($em[2]);
319
            }
320
321
            return "<$tag>{$em[2]}</$tag>";
322
        }, $markup);
323
324
        return $markup;
325
    }
326
}
327