Completed
Push — master ( 3fea26...b1717e )
by Jitendra
13s
created

HtmlUp::scan()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 1
dl 0
loc 11
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features && limitations.
10
 *
11
 * @author    adhocore | Jitendra Adhikari <[email protected]>
12
 * @copyright (c) 2014 Jitendra Adhikari
13
 */
14
class HtmlUp
15
{
16
    const RE_URL       = '~<(https?:[\/]{2}[^\s]+?)>~';
17
    const RE_RAW       = '/^<\/?\w.*?\/?>/';
18
    const RE_EMAIL     = '~<(\S+?@\S+?)>~';
19
    const RE_MD_IMG    = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
20
    const RE_MD_URL    = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
21
    const RE_MD_FONT   = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!';
22
    const RE_MD_QUOTE  = '~^\s*(>+)\s+~';
23
    const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~';
24
    const RE_MD_CODE   = '/^```\s*([\w-]+)?/';
25
    const RE_MD_RULE   = '~^(_{3,}|\*{3,}|\-{3,})$~';
26
    const RE_MD_TCOL   = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~';
27
    const RE_MD_OL     = '/^\d+\. /';
28
29
    protected $lines       = [];
30
    protected $stackList   = [];
31
    protected $stackBlock  = [];
32
    protected $stackTable  = [];
33
34
    protected $pointer     = -1;
35
    protected $listLevel   = 0;
36
    protected $quoteLevel  = 0;
37
    protected $indent      = 0;
38
    protected $nextIndent  = 0;
39
40
    protected $indentStr       = '';
41
    protected $line            = '';
42
    protected $trimmedLine     = '';
43
    protected $prevLine        = '';
44
    protected $trimmedPrevLine = '';
45
    protected $nextLine        = '';
46
    protected $trimmedNextLine = '';
47
    protected $markup          = '';
48
49
    protected $inList  = false;
50
    protected $inQuote = false;
51
    protected $inPara  = false;
52
    protected $inHtml  = false;
53
    protected $inTable = false;
54
55
    /**
56
     * Constructor.
57
     *
58
     * @param string $markdown
59
     */
60
    public function __construct($markdown = null, $indentWidth = 4)
61
    {
62
        $this->indentStr = $indentWidth == 2 ? '  ' : '    ';
63
64
        if (null !== $markdown) {
65
            $this->scan($markdown);
66
        }
67
    }
68
69
    protected function scan($markdown)
70
    {
71
        if ('' === trim($markdown)) {
72
            return;
73
        }
74
75
        // Normalize whitespaces
76
        $markdown = str_replace("\t", $this->indentStr, $markdown);
77
        $markdown = str_replace(["\r\n", "\r"], "\n", $markdown);
78
79
        $this->lines = array_merge([''], explode("\n", $markdown), ['']);
80
    }
81
82
    public function __toString()
83
    {
84
        return $this->parse();
85
    }
86
87
    public function parse($markdown = null)
88
    {
89
        if (null !== $markdown) {
90
            $this->reset(true);
91
92
            $this->scan($markdown);
93
        }
94
95
        if ([] === $this->lines) {
96
            return '';
97
        }
98
99
        $this->parseBlockElements();
100
        $this->parseSpanElements();
101
102
        return $this->markup;
103
    }
104
105
    protected function parseBlockElements()
106
    {
107
        while (isset($this->lines[++$this->pointer])) {
108
            $this->init();
109
110
            if ($this->flush() || $this->raw()) {
111
                continue;
112
            }
113
114
            $this->quote();
115
116
            if ($this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt()) {
117
                continue;
118
            }
119
120
            if ($this->inList) {
121
                $this->markup .= $this->trimmedLine;
122
123
                continue;
124
            }
125
126
            $this->table() || $this->paragraph();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->paragraph() targeting Ahc\HtmlUp::paragraph() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
127
        }
128
    }
129
130
    protected function init()
131
    {
132
        list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine];
133
134
        $this->line        = $this->lines[$this->pointer];
135
        $this->trimmedLine = trim($this->line);
136
137
        $this->indent          = strlen($this->line) - strlen(ltrim($this->line));
138
        $this->nextLine        = isset($this->lines[$this->pointer + 1])
139
            ? $this->lines[$this->pointer + 1]
140
            : '';
141
        $this->trimmedNextLine = trim($this->nextLine);
142
        $this->nextIndent      = strlen($this->nextLine) - strlen(ltrim($this->nextLine));
143
    }
144
145
    protected function parseSpanElements()
146
    {
147
        $this->links();
148
149
        $this->anchors();
150
151
        $this->spans();
152
    }
153
154
    protected function links()
155
    {
156
        // URLs.
157
        $this->markup = preg_replace(
158
            static::RE_URL,
159
            '<a href="$1">$1</a>',
160
            $this->markup
161
        );
162
163
        // Emails.
164
        $this->markup = preg_replace(
165
            static::RE_EMAIL,
166
            '<a href="mailto:$1">$1</a>',
167
            $this->markup
168
        );
169
    }
170
171
    protected function anchors()
172
    {
173
        // Images.
174
        $this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) {
175
            $title = isset($img[3]) ? " title={$img[3]} " : '';
176
            $alt   = $img[1] ? " alt=\"{$img[1]}\" " : '';
177
178
            return "<img src=\"{$img[2]}\"{$title}{$alt}/>";
179
        }, $this->markup);
180
181
        // Anchors.
182
        $this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) {
183
            $title = isset($a[3]) ? " title={$a[3]} " : '';
184
185
            return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>";
186
        }, $this->markup);
187
    }
188
189
    protected function spans()
190
    {
191
        // em/code/strong/del
192
        $this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) {
193
            switch (substr($em[1], 0, 2)) {
194
                case  '**':
195
                case '__':
196
                    $tag = 'strong';
197
                    break;
198
199
                case '~~':
200
                    $tag = 'del';
201
                    break;
202
203
                case $em[1] === '*':
204
                case $em[1] === '_':
205
                    $tag = 'em';
206
                    break;
207
208
                default:
209
                    $tag = 'code';
210
                    $em[2] = $this->escape($em[2]);
211
            }
212
213
            return "<$tag>{$em[2]}</$tag>";
214
        }, $this->markup);
215
    }
216
217
    protected function escape($input)
218
    {
219
        return htmlspecialchars($input);
220
    }
221
222
    protected function reset($all = false)
223
    {
224
        $except = $all ? [] : array_fill_keys(['lines', 'pointer', 'markup'], true);
225
226
        // Reset all current values.
227
        foreach (get_class_vars(__CLASS__) as $prop => $value) {
228
            isset($except[$prop]) || $this->{$prop} = $value;
229
        }
230
    }
231
232
    protected function flush()
233
    {
234
        if ('' !== $this->trimmedLine) {
235
            return false;
236
        }
237
238
        while (!empty($this->stackList)) {
239
            $this->markup .= array_pop($this->stackList);
240
        }
241
242
        while (!empty($this->stackBlock)) {
243
            $this->markup .= array_pop($this->stackBlock);
244
        }
245
246
        while (!empty($this->stackTable)) {
247
            $this->markup .= array_pop($this->stackTable);
248
        }
249
250
        $this->markup .= "\n";
251
252
        $this->reset(false);
253
254
        return true;
255
    }
256
257
    protected function raw()
258
    {
259
        if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) {
260
            $this->markup .= "\n$this->line";
261
            if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) {
262
                $this->inHtml = true;
263
            }
264
265
            return true;
266
        }
267
    }
268
269
    protected function quote()
270
    {
271
        if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) {
272
            $this->line        = substr($this->line, strlen($quoteMatch[0]));
273
            $this->trimmedLine = trim($this->line);
274
275
            if (!$this->inQuote || $this->quoteLevel < strlen($quoteMatch[1])) {
276
                $this->markup .= "\n<blockquote>";
277
278
                $this->stackBlock[] = "\n</blockquote>";
279
280
                ++$this->quoteLevel;
281
            }
282
283
            return $this->inQuote = true;
284
        }
285
    }
286
287
    protected function atx()
288
    {
289
        if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') {
290
            $level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#'));
291
292
            if ($level < 7) {
293
                $this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>";
294
295
                return true;
296
            }
297
        }
298
    }
299
300
    protected function setext()
301
    {
302
        if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) {
303
            $level = trim($this->nextLine, '- ') === '' ? 2 : 1;
304
305
            $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>";
306
307
            ++$this->pointer;
308
309
            return true;
310
        }
311
    }
312
313
    protected function code()
314
    {
315
        $codeBlock = (bool) preg_match(static::RE_MD_CODE, $this->line, $codeMatch);
316
317
        if ($codeBlock || (empty($this->inList) && empty($this->inQuote) && $this->indent >= 4)) {
318
            $lang = isset($codeMatch[1])
319
                ? ' class="language-' . $codeMatch[1] . '"'
320
                : '';
321
322
            $this->markup .= "\n<pre><code{$lang}>";
323
324
            if (!$codeBlock) {
325
                $this->markup .= $this->escape(substr($this->line, 4));
326
            }
327
328
            $this->codeInternal($codeBlock);
329
330
            ++$this->pointer;
331
332
            $this->markup .= '</code></pre>';
333
334
            return true;
335
        }
336
    }
337
338
    public function codeInternal($codeBlock)
339
    {
340
        while (isset($this->lines[$this->pointer + 1])) {
341
            $this->line = $this->escape($this->lines[$this->pointer + 1]);
342
343
            if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```')
344
                || substr($this->line, 0, 4) === $this->indentStr
345
            ) {
346
                $this->markup .= "\n"; // @todo: donot use \n for first line
347
                $this->markup .= $codeBlock ? $this->line : substr($this->line, 4);
348
349
                ++$this->pointer;
350
            }
351
        }
352
    }
353
354
    protected function rule()
355
    {
356
        if ($this->trimmedPrevLine === ''
357
            && preg_match(static::RE_MD_RULE, $this->trimmedLine)
358
        ) {
359
            $this->markup .= "\n<hr />";
360
361
            return true;
362
        }
363
    }
364
365
    protected function listt()
366
    {
367
        $isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']);
368
369
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) {
370
            $wrapper = $isUl ? 'ul' : 'ol';
371
372
            if (!$this->inList) {
373
                $this->stackList[] = "</$wrapper>";
374
                $this->markup .= "\n<$wrapper>\n";
375
                $this->inList      = true;
376
377
                ++$this->listLevel;
378
            }
379
380
            $this->markup .= '<li>' . ltrim($this->trimmedLine, '-*0123456789. ');
381
382
            $this->listInternal();
383
384
            return true;
385
        }
386
    }
387
388
    protected function listInternal()
389
    {
390
        $isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']);
391
392
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) {
393
            $wrapper = $isUl ? 'ul' : 'ol';
394
            if ($this->nextIndent > $this->indent) {
395
                $this->stackList[] = "</li>\n";
396
                $this->stackList[] = "</$wrapper>";
397
                $this->markup .= "\n<$wrapper>\n";
398
399
                ++$this->listLevel;
400
            } else {
401
                $this->markup .= "</li>\n";
402
            }
403
404
            if ($this->nextIndent < $this->indent) {
405
                $shift = intval(($this->indent - $this->nextIndent) / 4);
406
407
                while ($shift--) {
408
                    $this->markup .= array_pop($this->stackList);
409
410
                    if ($this->listLevel > 2) {
411
                        $this->markup .= array_pop($this->stackList);
412
                    }
413
                }
414
            }
415
        } else {
416
            $this->markup .= "</li>\n";
417
        }
418
    }
419
420
    protected function table()
421
    {
422
        static $headerCount = 0;
423
424
        if (!$this->inTable) {
425
            $headerCount = substr_count(trim($this->trimmedLine, '|'), '|');
426
427
            return $this->tableInternal($headerCount);
428
        }
429
430
        $this->markup .= "<tr>\n";
431
432
        foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) {
433
            if ($i > $headerCount) {
434
                break;
435
            }
436
437
            $col           = trim($col);
438
            $this->markup .= "<td>{$col}</td>\n";
439
        }
440
441
        $this->markup .= "</tr>\n";
442
443
        if (empty($this->trimmedNextLine)
444
            || !substr_count(trim($this->trimmedNextLine, '|'), '|')
445
        ) {
446
            $headerCount        = 0;
447
            $this->inTable      = false;
448
            $this->stackTable[] = "</tbody>\n</table>";
449
        }
450
451
        return true;
452
    }
453
454
    protected function tableInternal($headerCount)
455
    {
456
        $columnCount = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|'));
457
458
        if ($headerCount > 0 && $headerCount <= $columnCount) {
459
            ++$this->pointer;
460
461
            $this->inTable     = true;
462
            $this->markup .= "<table>\n<thead>\n<tr>\n";
463
            $this->trimmedLine = trim($this->trimmedLine, '|');
464
465
            foreach (explode('|', $this->trimmedLine) as $hdr) {
466
                $this->markup .= '<th>' . trim($hdr) . "</th>\n";
467
            }
468
469
            $this->markup .= "</tr>\n</thead>\n<tbody>\n";
470
471
            return true;
472
        }
473
    }
474
475
    protected function paragraph()
476
    {
477
        $this->markup .= $this->inPara ? "\n<br />" : "\n<p>";
478
        $this->markup .= $this->trimmedLine;
479
480
        if (empty($this->trimmedNextLine)) {
481
            $this->markup .= '</p>';
482
            $this->inPara = false;
483
        } else {
484
            $this->inPara = true;
485
        }
486
    }
487
}
488