Completed
Push — master ( b1717e...1225b6 )
by Jitendra
13s
created

HtmlUp::parseBlockElements()   B

Complexity

Conditions 8
Paths 6

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 9
nc 6
nop 0
dl 0
loc 18
rs 7.7777
c 0
b 0
f 0
1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features && limitations.
10
 *
11
 * @author    adhocore | Jitendra Adhikari <[email protected]>
12
 * @copyright (c) 2014 Jitendra Adhikari
13
 */
14
class HtmlUp
15
{
16
    const RE_URL       = '~<(https?:[\/]{2}[^\s]+?)>~';
17
    const RE_RAW       = '/^<\/?\w.*?\/?>/';
18
    const RE_EMAIL     = '~<(\S+?@\S+?)>~';
19
    const RE_MD_IMG    = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
20
    const RE_MD_URL    = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
21
    const RE_MD_FONT   = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!';
22
    const RE_MD_QUOTE  = '~^\s*(>+)\s+~';
23
    const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~';
24
    const RE_MD_CODE   = '/^```\s*([\w-]+)?/';
25
    const RE_MD_RULE   = '~^(_{3,}|\*{3,}|\-{3,})$~';
26
    const RE_MD_TCOL   = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~';
27
    const RE_MD_OL     = '/^\d+\. /';
28
29
    protected $lines       = [];
30
    protected $stackList   = [];
31
    protected $stackBlock  = [];
32
    protected $stackTable  = [];
33
34
    protected $pointer     = -1;
35
    protected $listLevel   = 0;
36
    protected $quoteLevel  = 0;
37
    protected $indent      = 0;
38
    protected $nextIndent  = 0;
39
    protected $indentLen   = 4;
40
41
    protected $indentStr       = '    ';
42
    protected $line            = '';
43
    protected $trimmedLine     = '';
44
    protected $prevLine        = '';
45
    protected $trimmedPrevLine = '';
46
    protected $nextLine        = '';
47
    protected $trimmedNextLine = '';
48
    protected $markup          = '';
49
50
    protected $inList  = false;
51
    protected $inQuote = false;
52
    protected $inPara  = false;
53
    protected $inHtml  = false;
54
    protected $inTable = false;
55
56
    /**
57
     * Constructor.
58
     *
59
     * @param string $markdown
60
     * @param int    $indentWidth
61
     */
62
    public function __construct($markdown = null, $indentWidth = 4)
63
    {
64
        $this->scan($markdown, $indentWidth);
65
    }
66
67
    protected function scan($markdown, $indentWidth = 4)
68
    {
69
        if ('' === trim($markdown)) {
70
            return;
71
        }
72
73
        $this->indentLen = $indentWidth == 2 ? 2 : 4;
74
        $this->indentStr = $indentWidth == 2 ? '  ' : '    ';
75
76
        // Normalize whitespaces
77
        $markdown = str_replace("\t", $this->indentStr, $markdown);
78
        $markdown = str_replace(["\r\n", "\r"], "\n", $markdown);
79
80
        $this->lines = array_merge([''], explode("\n", $markdown), ['']);
81
    }
82
83
    public function __toString()
84
    {
85
        return $this->parse();
86
    }
87
88
    public function parse($markdown = null, $indentWidth = 4)
89
    {
90
        if (null !== $markdown) {
91
            $this->reset(true);
92
93
            $this->scan($markdown, $indentWidth);
94
        }
95
96
        if (empty($this->lines)) {
97
            return '';
98
        }
99
100
        $this->parseBlockElements();
101
        $this->parseSpanElements();
102
103
        return $this->markup;
104
    }
105
106
    protected function parseBlockElements()
107
    {
108
        while (isset($this->lines[++$this->pointer])) {
109
            $this->init();
110
111
            if ($this->flush() || $this->raw()) {
112
                continue;
113
            }
114
115
            $this->quote();
116
117
            if (($block = $this->isBlock()) || $this->inList) {
118
                $this->markup .= $block ? '' : $this->trimmedLine;
119
120
                continue;
121
            }
122
123
            $this->table() || $this->paragraph();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->paragraph() targeting Ahc\HtmlUp::paragraph() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
124
        }
125
    }
126
127
    protected function isBlock()
128
    {
129
        return $this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt();
130
    }
131
132
    protected function init()
133
    {
134
        list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine];
135
136
        $this->line        = $this->lines[$this->pointer];
137
        $this->trimmedLine = trim($this->line);
138
139
        $this->indent          = strlen($this->line) - strlen(ltrim($this->line));
140
        $this->nextLine        = isset($this->lines[$this->pointer + 1])
141
            ? $this->lines[$this->pointer + 1]
142
            : '';
143
        $this->trimmedNextLine = trim($this->nextLine);
144
        $this->nextIndent      = strlen($this->nextLine) - strlen(ltrim($this->nextLine));
145
    }
146
147
    protected function parseSpanElements()
148
    {
149
        $this->links();
150
151
        $this->anchors();
152
153
        $this->spans();
154
    }
155
156
    protected function links()
157
    {
158
        // URLs.
159
        $this->markup = preg_replace(
160
            static::RE_URL,
161
            '<a href="$1">$1</a>',
162
            $this->markup
163
        );
164
165
        // Emails.
166
        $this->markup = preg_replace(
167
            static::RE_EMAIL,
168
            '<a href="mailto:$1">$1</a>',
169
            $this->markup
170
        );
171
    }
172
173
    protected function anchors()
174
    {
175
        // Images.
176
        $this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) {
177
            $title = isset($img[3]) ? " title={$img[3]} " : '';
178
            $alt   = $img[1] ? " alt=\"{$img[1]}\" " : '';
179
180
            return "<img src=\"{$img[2]}\"{$title}{$alt}/>";
181
        }, $this->markup);
182
183
        // Anchors.
184
        $this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) {
185
            $title = isset($a[3]) ? " title={$a[3]} " : '';
186
187
            return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>";
188
        }, $this->markup);
189
    }
190
191
    protected function spans()
192
    {
193
        // em/code/strong/del
194
        $this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) {
195
            switch (substr($em[1], 0, 2)) {
196
                case  '**':
197
                case '__':
198
                    $tag = 'strong';
199
                    break;
200
201
                case '~~':
202
                    $tag = 'del';
203
                    break;
204
205
                case $em[1] === '*':
206
                case $em[1] === '_':
207
                    $tag = 'em';
208
                    break;
209
210
                default:
211
                    $tag = 'code';
212
                    $em[2] = $this->escape($em[2]);
213
            }
214
215
            return "<$tag>{$em[2]}</$tag>";
216
        }, $this->markup);
217
    }
218
219
    protected function escape($input)
220
    {
221
        return htmlspecialchars($input);
222
    }
223
224
    protected function reset($all = false)
225
    {
226
        $except = $all ? [] : array_flip(['lines', 'pointer', 'markup', 'indentStr', 'indentLen']);
227
228
        // Reset all current values.
229
        foreach (get_class_vars(__CLASS__) as $prop => $value) {
230
            isset($except[$prop]) || $this->{$prop} = $value;
231
        }
232
    }
233
234
    protected function flush()
235
    {
236
        if ('' !== $this->trimmedLine) {
237
            return false;
238
        }
239
240
        while (!empty($this->stackList)) {
241
            $this->markup .= array_pop($this->stackList);
242
        }
243
244
        while (!empty($this->stackBlock)) {
245
            $this->markup .= array_pop($this->stackBlock);
246
        }
247
248
        while (!empty($this->stackTable)) {
249
            $this->markup .= array_pop($this->stackTable);
250
        }
251
252
        $this->markup .= "\n";
253
254
        $this->reset(false);
255
256
        return true;
257
    }
258
259
    protected function raw()
260
    {
261
        if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) {
262
            $this->markup .= "\n$this->line";
263
            if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) {
264
                $this->inHtml = true;
265
            }
266
267
            return true;
268
        }
269
    }
270
271
    protected function quote()
272
    {
273
        if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) {
274
            $this->line        = substr($this->line, strlen($quoteMatch[0]));
275
            $this->trimmedLine = trim($this->line);
276
277
            if (!$this->inQuote || $this->quoteLevel < strlen($quoteMatch[1])) {
278
                $this->markup .= "\n<blockquote>";
279
280
                $this->stackBlock[] = "\n</blockquote>";
281
282
                ++$this->quoteLevel;
283
            }
284
285
            return $this->inQuote = true;
286
        }
287
    }
288
289
    protected function atx()
290
    {
291
        if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') {
292
            $level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#'));
293
294
            if ($level < 7) {
295
                $this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>";
296
297
                return true;
298
            }
299
        }
300
    }
301
302
    protected function setext()
303
    {
304
        if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) {
305
            $level = trim($this->nextLine, '- ') === '' ? 2 : 1;
306
307
            $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>";
308
309
            ++$this->pointer;
310
311
            return true;
312
        }
313
    }
314
315
    protected function code()
316
    {
317
        $isShifted = ($this->indent - $this->nextIndent) >= $this->indentLen;
318
        $codeBlock = preg_match(static::RE_MD_CODE, $this->line, $codeMatch);
319
320
        if ($codeBlock || (!$this->inList && !$this->inQuote && $isShifted)) {
321
            $lang = isset($codeMatch[1])
322
                ? ' class="language-' . $codeMatch[1] . '"'
323
                : '';
324
325
            $this->markup .= "\n<pre><code{$lang}>";
326
327
            if (!$codeBlock) {
328
                $this->markup .= $this->escape(substr($this->line, $this->indentLen));
329
            }
330
331
            $this->codeInternal($codeBlock);
332
333
            ++$this->pointer;
334
335
            $this->markup .= '</code></pre>';
336
337
            return true;
338
        }
339
    }
340
341
    public function codeInternal($codeBlock)
342
    {
343
        while (isset($this->lines[$this->pointer + 1])) {
344
            $this->line = $this->escape($this->lines[$this->pointer + 1]);
345
346
            if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```')
347
                || strpos($this->line, $this->indentStr) === 0
348
            ) {
349
                $this->markup .= "\n"; // @todo: donot use \n for first line
350
                $this->markup .= $codeBlock ? $this->line : substr($this->line, $this->indentLen);
351
352
                ++$this->pointer;
353
            } else {
354
                break;
355
            }
356
        }
357
    }
358
359
    protected function rule()
360
    {
361
        if ($this->trimmedPrevLine === ''
362
            && preg_match(static::RE_MD_RULE, $this->trimmedLine)
363
        ) {
364
            $this->markup .= "\n<hr />";
365
366
            return true;
367
        }
368
    }
369
370
    protected function listt()
371
    {
372
        $isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']);
373
374
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) {
375
            $wrapper = $isUl ? 'ul' : 'ol';
376
377
            if (!$this->inList) {
378
                $this->stackList[] = "</$wrapper>";
379
                $this->markup     .= "\n<$wrapper>\n";
380
                $this->inList      = true;
381
382
                ++$this->listLevel;
383
            }
384
385
            $this->markup .= '<li>' . ltrim($this->trimmedLine, '+-*0123456789. ');
386
387
            $this->listInternal();
388
389
            return true;
390
        }
391
    }
392
393
    protected function listInternal()
394
    {
395
        $isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']);
396
397
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) {
398
            $wrapper = $isUl ? 'ul' : 'ol';
399
            if ($this->nextIndent > $this->indent) {
400
                $this->stackList[] = "</li>\n";
401
                $this->stackList[] = "</$wrapper>";
402
                $this->markup     .= "\n<$wrapper>\n";
403
404
                ++$this->listLevel;
405
            } else {
406
                $this->markup .= "</li>\n";
407
            }
408
409
            if ($this->nextIndent < $this->indent) {
410
                $shift = intval(($this->indent - $this->nextIndent) / $this->indentLen);
411
412
                while ($shift--) {
413
                    $this->markup .= array_pop($this->stackList);
414
415
                    if ($this->listLevel > 2) {
416
                        $this->markup .= array_pop($this->stackList);
417
                    }
418
                }
419
            }
420
        } else {
421
            $this->markup .= "</li>\n";
422
        }
423
    }
424
425
    protected function table()
426
    {
427
        static $headerCount = 0;
428
429
        if (!$this->inTable) {
430
            $headerCount = substr_count(trim($this->trimmedLine, '|'), '|');
431
432
            return $this->tableInternal($headerCount);
433
        }
434
435
        $this->markup .= "<tr>\n";
436
437
        foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) {
438
            if ($i > $headerCount) {
439
                break;
440
            }
441
442
            $col           = trim($col);
443
            $this->markup .= "<td>{$col}</td>\n";
444
        }
445
446
        $this->markup .= "</tr>\n";
447
448
        if (empty($this->trimmedNextLine)
449
            || !substr_count(trim($this->trimmedNextLine, '|'), '|')
450
        ) {
451
            $headerCount        = 0;
452
            $this->inTable      = false;
453
            $this->stackTable[] = "</tbody>\n</table>";
454
        }
455
456
        return true;
457
    }
458
459
    protected function tableInternal($headerCount)
460
    {
461
        $columnCount = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|'));
462
463
        if ($headerCount > 0 && $headerCount <= $columnCount) {
464
            ++$this->pointer;
465
466
            $this->inTable     = true;
467
            $this->markup     .= "<table>\n<thead>\n<tr>\n";
468
            $this->trimmedLine = trim($this->trimmedLine, '|');
469
470
            foreach (explode('|', $this->trimmedLine) as $hdr) {
471
                $this->markup .= '<th>' . trim($hdr) . "</th>\n";
472
            }
473
474
            $this->markup .= "</tr>\n</thead>\n<tbody>\n";
475
476
            return true;
477
        }
478
    }
479
480
    protected function paragraph()
481
    {
482
        $this->markup .= $this->inPara ? "\n<br />" : "\n<p>";
483
        $this->markup .= $this->trimmedLine;
484
485
        if (empty($this->trimmedNextLine)) {
486
            $this->markup .= '</p>';
487
            $this->inPara = false;
488
        } else {
489
            $this->inPara = true;
490
        }
491
    }
492
}
493