Completed
Pull Request — master (#13)
by Jitendra
02:45
created

HtmlUp::parseSpanElements()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nc 1
nop 0
dl 0
loc 7
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features && limitations.
10
 *
11
 * @author    adhocore | Jitendra Adhikari <[email protected]>
12
 * @copyright (c) 2014 Jitendra Adhikari
13
 */
14
class HtmlUp
15
{
16
    use HtmlHelper;
17
18
    const RE_MD_QUOTE  = '~^\s*(>+)\s+~';
19
    const RE_RAW       = '/^<\/?\w.*?\/?>/';
20
    const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~';
21
    const RE_MD_CODE   = '/^```\s*([\w-]+)?/';
22
    const RE_MD_RULE   = '~^(_{3,}|\*{3,}|\-{3,})$~';
23
    const RE_MD_TCOL   = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~';
24
    const RE_MD_OL     = '/^\d+\. /';
25
26
    protected $lines       = [];
27
    protected $stackList   = [];
28
    protected $stackBlock  = [];
29
    protected $stackTable  = [];
30
31
    protected $pointer     = -1;
32
    protected $listLevel   = 0;
33
    protected $quoteLevel  = 0;
34
    protected $indent      = 0;
35
    protected $nextIndent  = 0;
36
    protected $indentLen   = 4;
37
38
    protected $indentStr       = '    ';
39
    protected $line            = '';
40
    protected $trimmedLine     = '';
41
    protected $prevLine        = '';
42
    protected $trimmedPrevLine = '';
43
    protected $nextLine        = '';
44
    protected $trimmedNextLine = '';
45
    protected $markup          = '';
46
47
    protected $inList  = \false;
48
    protected $inQuote = \false;
49
    protected $inPara  = \false;
50
    protected $inHtml  = \false;
51
    protected $inTable = \false;
52
53
    /**
54
     * Constructor.
55
     *
56
     * @param string $markdown
57
     * @param int    $indentWidth
58
     */
59
    public function __construct($markdown = \null, $indentWidth = 4)
60
    {
61
        $this->scan($markdown, $indentWidth);
62
    }
63
64
    protected function scan($markdown, $indentWidth = 4)
65
    {
66
        if ('' === \trim($markdown)) {
67
            return;
68
        }
69
70
        $this->indentLen = $indentWidth == 2 ? 2 : 4;
71
        $this->indentStr = $indentWidth == 2 ? '  ' : '    ';
72
73
        // Normalize whitespaces
74
        $markdown = \str_replace("\t", $this->indentStr, $markdown);
75
        $markdown = \str_replace(["\r\n", "\r"], "\n", $markdown);
76
77
        $this->lines = \array_merge([''], \explode("\n", $markdown), ['']);
78
    }
79
80
    public function __toString()
81
    {
82
        return $this->parse();
83
    }
84
85
    /**
86
     * Parse markdown.
87
     *
88
     * @param string  $markdown
89
     * @param int     $indentWidth
90
     *
91
     * @return string
92
     */
93
    public function parse($markdown = \null, $indentWidth = 4)
94
    {
95
        if (\null !== $markdown) {
96
            $this->reset(\true);
97
98
            $this->scan($markdown, $indentWidth);
99
        }
100
101
        if (empty($this->lines)) {
102
            return '';
103
        }
104
105
        $this->parseBlockElements();
106
107
        return (new SpanElementParser)->parse($this->markup);
108
    }
109
110
    protected function parseBlockElements()
111
    {
112
        while (isset($this->lines[++$this->pointer])) {
113
            $this->init();
114
115
            if ($this->flush() || $this->raw()) {
116
                continue;
117
            }
118
119
            $this->quote();
120
121
            if (($block = $this->isBlock()) || $this->inList) {
122
                $this->markup .= $block ? '' : $this->trimmedLine;
123
124
                continue;
125
            }
126
127
            $this->table() || $this->paragraph();
128
        }
129
    }
130
131
    protected function isBlock()
132
    {
133
        return $this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt();
134
    }
135
136
    protected function init()
137
    {
138
        list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine];
139
140
        $this->line        = $this->lines[$this->pointer];
141
        $this->trimmedLine = \trim($this->line);
142
143
        $this->indent          = \strlen($this->line) - \strlen(\ltrim($this->line));
144
        $this->nextLine        = isset($this->lines[$this->pointer + 1])
145
            ? $this->lines[$this->pointer + 1]
146
            : '';
147
        $this->trimmedNextLine = \trim($this->nextLine);
148
        $this->nextIndent      = \strlen($this->nextLine) - \strlen(\ltrim($this->nextLine));
149
    }
150
151
    protected function reset($all = \false)
152
    {
153
        $except = $all ? [] : \array_flip(['lines', 'pointer', 'markup', 'indentStr', 'indentLen']);
154
155
        // Reset all current values.
156
        foreach (\get_class_vars(__CLASS__) as $prop => $value) {
157
            isset($except[$prop]) || $this->{$prop} = $value;
158
        }
159
    }
160
161
    protected function flush()
162
    {
163
        if ('' !== $this->trimmedLine) {
164
            return \false;
165
        }
166
167
        while (!empty($this->stackList)) {
168
            $this->markup .= \array_pop($this->stackList);
169
        }
170
171
        while (!empty($this->stackBlock)) {
172
            $this->markup .= \array_pop($this->stackBlock);
173
        }
174
175
        while (!empty($this->stackTable)) {
176
            $this->markup .= \array_pop($this->stackTable);
177
        }
178
179
        $this->markup .= "\n";
180
181
        $this->reset(\false);
182
183
        return \true;
184
    }
185
186
    protected function raw()
187
    {
188
        if ($this->inHtml || \preg_match(static::RE_RAW, $this->trimmedLine)) {
189
            $this->markup .= "\n$this->line";
190
            if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) {
191
                $this->inHtml = \true;
192
            }
193
194
            return \true;
195
        }
196
    }
197
198
    protected function quote()
199
    {
200
        if (\preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) {
201
            $this->line        = \substr($this->line, \strlen($quoteMatch[0]));
202
            $this->trimmedLine = \trim($this->line);
203
204
            if (!$this->inQuote || $this->quoteLevel < \strlen($quoteMatch[1])) {
205
                $this->markup .= "\n<blockquote>";
206
207
                $this->stackBlock[] = "\n</blockquote>";
208
209
                ++$this->quoteLevel;
210
            }
211
212
            return $this->inQuote = \true;
213
        }
214
    }
215
216
    protected function atx()
217
    {
218
        if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') {
219
            $level = \strlen($this->trimmedLine) - \strlen(\ltrim($this->trimmedLine, '#'));
220
221
            if ($level < 7) {
222
                $this->markup .= "\n<h{$level}>" . \ltrim(\ltrim($this->trimmedLine, '# ')) . "</h{$level}>";
223
224
                return \true;
225
            }
226
        }
227
    }
228
229
    protected function setext()
230
    {
231
        if (\preg_match(static::RE_MD_SETEXT, $this->nextLine)) {
232
            $level = \trim($this->nextLine, '- ') === '' ? 2 : 1;
233
234
            $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>";
235
236
            ++$this->pointer;
237
238
            return \true;
239
        }
240
    }
241
242
    protected function code()
243
    {
244
        $isShifted = ($this->indent - $this->nextIndent) >= $this->indentLen;
245
        $codeBlock = \preg_match(static::RE_MD_CODE, $this->line, $codeMatch);
246
247
        if ($codeBlock || (!$this->inList && !$this->inQuote && $isShifted)) {
248
            $lang = isset($codeMatch[1])
249
                ? ' class="language-' . $codeMatch[1] . '"'
250
                : '';
251
252
            $this->markup .= "\n<pre><code{$lang}>";
253
254
            if (!$codeBlock) {
255
                $this->markup .= $this->escape(\substr($this->line, $this->indentLen));
256
            }
257
258
            $this->codeInternal($codeBlock);
259
260
            ++$this->pointer;
261
262
            $this->markup .= '</code></pre>';
263
264
            return \true;
265
        }
266
    }
267
268
    public function codeInternal($codeBlock)
269
    {
270
        while (isset($this->lines[$this->pointer + 1])) {
271
            $this->line = $this->escape($this->lines[$this->pointer + 1]);
272
273
            if (($codeBlock && \substr(\ltrim($this->line), 0, 3) !== '```')
274
                || \strpos($this->line, $this->indentStr) === 0
275
            ) {
276
                $this->markup .= "\n"; // @todo: donot use \n for first line
277
                $this->markup .= $codeBlock ? $this->line : \substr($this->line, $this->indentLen);
278
279
                ++$this->pointer;
280
            } else {
281
                break;
282
            }
283
        }
284
    }
285
286
    protected function rule()
287
    {
288
        if ($this->trimmedPrevLine === ''
289
            && \preg_match(static::RE_MD_RULE, $this->trimmedLine)
290
        ) {
291
            $this->markup .= "\n<hr />";
292
293
            return \true;
294
        }
295
    }
296
297
    protected function listt()
298
    {
299
        $isUl = \in_array(\substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']);
300
301
        if ($isUl || \preg_match(static::RE_MD_OL, $this->trimmedLine)) {
302
            $wrapper = $isUl ? 'ul' : 'ol';
303
304
            if (!$this->inList) {
305
                $this->stackList[] = "</$wrapper>";
306
                $this->markup     .= "\n<$wrapper>\n";
307
                $this->inList      = \true;
308
309
                ++$this->listLevel;
310
            }
311
312
            $this->markup .= '<li>' . \ltrim($this->trimmedLine, '+-*0123456789. ');
313
314
            $this->listInternal();
315
316
            return \true;
317
        }
318
    }
319
320
    protected function listInternal()
321
    {
322
        $isUl = \in_array(\substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']);
323
324
        if ($isUl || \preg_match(static::RE_MD_OL, $this->trimmedNextLine)) {
325
            $wrapper = $isUl ? 'ul' : 'ol';
326
            if ($this->nextIndent > $this->indent) {
327
                $this->stackList[] = "</li>\n";
328
                $this->stackList[] = "</$wrapper>";
329
                $this->markup     .= "\n<$wrapper>\n";
330
331
                ++$this->listLevel;
332
            } else {
333
                $this->markup .= "</li>\n";
334
            }
335
336
            if ($this->nextIndent < $this->indent) {
337
                $shift = \intval(($this->indent - $this->nextIndent) / $this->indentLen);
338
339
                while ($shift--) {
340
                    $this->markup .= \array_pop($this->stackList);
341
342
                    if ($this->listLevel > 2) {
343
                        $this->markup .= \array_pop($this->stackList);
344
                    }
345
                }
346
            }
347
       } else {
348
            $this->markup .= "</li>\n";
349
        }
350
    }
351
352
    protected function table()
353
    {
354
        static $headerCount = 0;
355
356
        if (!$this->inTable) {
357
            $headerCount = \substr_count(\trim($this->trimmedLine, '|'), '|');
358
359
            return $this->tableInternal($headerCount);
360
        }
361
362
        $this->markup .= "<tr>\n";
363
364
        foreach (\explode('|', \trim($this->trimmedLine, '|')) as $i => $col) {
365
            if ($i > $headerCount) {
366
                break;
367
            }
368
369
            $col           = \trim($col);
370
            $this->markup .= "<td>{$col}</td>\n";
371
        }
372
373
        $this->markup .= "</tr>\n";
374
375
        if (empty($this->trimmedNextLine)
376
            || !\substr_count(\trim($this->trimmedNextLine, '|'), '|')
377
        ) {
378
            $headerCount        = 0;
379
            $this->inTable      = \false;
380
            $this->stackTable[] = "</tbody>\n</table>";
381
        }
382
383
        return \true;
384
    }
385
386
    protected function tableInternal($headerCount)
387
    {
388
        $columnCount = \preg_match_all(static::RE_MD_TCOL, \trim($this->trimmedNextLine, '|'));
389
390
        if ($headerCount > 0 && $headerCount <= $columnCount) {
391
            ++$this->pointer;
392
393
            $this->inTable     = \true;
394
            $this->markup     .= "<table>\n<thead>\n<tr>\n";
395
            $this->trimmedLine = \trim($this->trimmedLine, '|');
396
397
            foreach (\explode('|', $this->trimmedLine) as $hdr) {
398
                $this->markup .= '<th>' . \trim($hdr) . "</th>\n";
399
            }
400
401
            $this->markup .= "</tr>\n</thead>\n<tbody>\n";
402
403
            return \true;
404
        }
405
    }
406
407
    protected function paragraph()
408
    {
409
        $this->markup .= $this->inPara ? "\n<br />" : "\n<p>";
410
        $this->markup .= $this->trimmedLine;
411
412
        if (empty($this->trimmedNextLine)) {
413
            $this->markup .= '</p>';
414
            $this->inPara = \false;
415
        } else {
416
            $this->inPara = \true;
417
        }
418
    }
419
}
420