Passed
Pull Request — master (#5)
by Jitendra
01:34
created

HtmlUp::paragraph()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 10
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 7
nc 4
nop 0
dl 0
loc 10
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features && limitations.
10
 *
11
 * @author    adhocore | Jitendra Adhikari <[email protected]>
12
 * @copyright (c) 2014 Jitendra Adhikari
13
 */
14
class HtmlUp
15
{
16
    const RE_URL       = '~<(https?:[\/]{2}[^\s]+?)>~';
17
    const RE_RAW       = '/^<\/?\w.*?\/?>/';
18
    const RE_EMAIL     = '~<(\S+?@\S+?)>~';
19
    const RE_MD_IMG    = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
20
    const RE_MD_URL    = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
21
    const RE_MD_FONT   = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!';
22
    const RE_MD_QUOTE  = '~^\s*(>+)\s+~';
23
    const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~';
24
    const RE_MD_CODE   = '/^```\s*([\w-]+)?/';
25
    const RE_MD_RULE   = '~^(_{3,}|\*{3,}|\-{3,})$~';
26
    const RE_MD_TCOL   = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~';
27
    const RE_MD_OL     = '/^\d+\. /';
28
29
    protected $lines       = [];
30
    protected $stackList   = [];
31
    protected $stackBlock  = [];
32
    protected $stackTable  = [];
33
34
    protected $pointer     = -1;
35
    protected $listLevel   = 0;
36
    protected $quoteLevel  = 0;
37
    protected $indent      = 0;
38
    protected $nextIndent  = 0;
39
40
    protected $indentStr       = '';
41
    protected $line            = '';
42
    protected $trimmedLine     = '';
43
    protected $prevLine        = '';
44
    protected $trimmedPrevLine = '';
45
    protected $nextLine        = '';
46
    protected $trimmedNextLine = '';
47
    protected $markup          = '';
48
49
    protected $inList  = false;
50
    protected $inQuote = false;
51
    protected $inPara  = false;
52
    protected $inHtml  = false;
53
    protected $inTable = false;
54
55
    /**
56
     * Constructor.
57
     *
58
     * @param string $markdown
59
     */
60
    public function __construct($markdown = null, $indentWidth = 4)
61
    {
62
        $this->indentStr = $indentWidth == 2 ? '  ' : '    ';
63
64
        if (null !== $markdown) {
65
            $this->scan($markdown);
66
        }
67
    }
68
69
    protected function scan($markdown)
70
    {
71
        if ('' === trim($markdown)) {
72
            return;
73
        }
74
75
        // Normalize whitespaces
76
        $markdown = str_replace("\t", $this->indentStr, $markdown);
77
        $markdown = str_replace(["\r\n", "\r"], "\n", $markdown);
78
79
        $this->lines = array_merge([''], explode("\n", $markdown), ['']);
80
    }
81
82
    public function __toString()
83
    {
84
        return $this->parse();
85
    }
86
87
    public function parse($markdown = null)
88
    {
89
        if (null !== $markdown) {
90
            $this->reset(true);
91
92
            $this->scan($markdown);
93
        }
94
95
        if ([] === $this->lines) {
96
            return '';
97
        }
98
99
        $this->parseBlockElements();
100
        $this->parseSpanElements();
101
102
        return $this->markup;
103
    }
104
105
    protected function parseBlockElements()
106
    {
107
        while (isset($this->lines[++$this->pointer])) {
108
            $this->init();
109
110
            if ($this->flush() || $this->raw()) {
111
                continue;
112
            }
113
114
            $this->quote();
115
116
            if ($this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt()) {
117
                continue;
118
            }
119
120
            if ($this->inList) {
121
                $this->markup .= $this->trimmedLine;
122
123
                continue;
124
            }
125
126
            $this->table() || $this->paragraph();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->paragraph() targeting Ahc\HtmlUp::paragraph() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
127
        }
128
    }
129
130
    protected function init()
131
    {
132
        list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine];
133
134
        $this->line        = $this->lines[$this->pointer];
135
        $this->trimmedLine = trim($this->line);
136
137
        $this->indent          = strlen($this->line) - strlen(ltrim($this->line));
138
        $this->nextLine        = isset($this->lines[$this->pointer + 1])
139
            ? $this->lines[$this->pointer + 1]
140
            : '';
141
        $this->trimmedNextLine = trim($this->nextLine);
142
        $this->nextIndent      = strlen($this->nextLine) - strlen(ltrim($this->nextLine));
143
    }
144
145
146
    protected function parseSpanElements()
147
    {
148
        $this->links();
149
150
        $this->anchors();
151
152
        $this->spans();
153
    }
154
155
    protected function links()
156
    {
157
        // URLs.
158
        $this->markup = preg_replace(
159
            static::RE_URL,
160
            '<a href="$1">$1</a>',
161
            $this->markup
162
        );
163
164
        // Emails.
165
        $this->markup = preg_replace(
166
            static::RE_EMAIL,
167
            '<a href="mailto:$1">$1</a>',
168
            $this->markup
169
        );
170
    }
171
172
    protected function anchors()
173
    {
174
        // Images.
175
        $this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) {
176
            $title = isset($img[3]) ? " title={$img[3]} " : '';
177
            $alt   = $img[1] ? " alt=\"{$img[1]}\" " : '';
178
179
            return "<img src=\"{$img[2]}\"{$title}{$alt}/>";
180
        }, $this->markup);
181
182
        // Anchors.
183
        $this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) {
184
            $title = isset($a[3]) ? " title={$a[3]} " : '';
185
186
            return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>";
187
        }, $this->markup);
188
    }
189
190
    protected function spans()
191
    {
192
        // em/code/strong/del
193
        $this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) {
194
            switch (substr($em[1], 0, 2)) {
195
                case  '**':
196
                case '__':
197
                    $tag = 'strong';
198
                    break;
199
200
                case '~~':
201
                    $tag = 'del';
202
                    break;
203
204
                case $em[1] === '*':
205
                case $em[1] === '_':
206
                    $tag = 'em';
207
                    break;
208
209
                default:
210
                    $tag = 'code';
211
                    $em[2] = $this->escape($em[2]);
212
            }
213
214
            return "<$tag>{$em[2]}</$tag>";
215
        }, $this->markup);
216
    }
217
218
    protected function escape($input)
219
    {
220
        return htmlspecialchars($input);
221
    }
222
223
    protected function reset($all = false)
224
    {
225
        $except = $all ? [] : array_fill_keys(['lines', 'pointer', 'markup'], true);
226
227
        // Reset all current values.
228
        foreach (get_class_vars(__CLASS__) as $prop => $value) {
229
            isset($except[$prop]) || $this->{$prop} = $value;
230
        }
231
    }
232
233
    protected function flush()
234
    {
235
        if ('' !== $this->trimmedLine) {
236
            return false;
237
        }
238
239
        while (!empty($this->stackList)) {
240
            $this->markup .= array_pop($this->stackList);
241
        }
242
243
        while (!empty($this->stackBlock)) {
244
            $this->markup .= array_pop($this->stackBlock);
245
        }
246
247
        while (!empty($this->stackTable)) {
248
            $this->markup .= array_pop($this->stackTable);
249
        }
250
251
        $this->markup .= "\n";
252
253
        $this->reset(false);
254
255
        return true;
256
    }
257
258
    protected function raw()
259
    {
260
        if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) {
261
            $this->markup .= "\n$this->line";
262
            if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) {
263
                $this->inHtml = true;
264
            }
265
266
            return true;
267
        }
268
    }
269
270
    protected function quote()
271
    {
272
        if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) {
273
            $this->line        = substr($this->line, strlen($quoteMatch[0]));
274
            $this->trimmedLine = trim($this->line);
275
276
            if (!$this->inQuote || $this->quoteLevel < strlen($quoteMatch[1])) {
277
                $this->markup .= "\n<blockquote>";
278
279
                $this->stackBlock[] = "\n</blockquote>";
280
281
                ++$this->quoteLevel;
282
            }
283
284
            return $this->inQuote = true;
285
        }
286
    }
287
288
    protected function atx()
289
    {
290
        if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') {
291
            $level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#'));
292
293
            if ($level < 7) {
294
                $this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>";
295
296
                return true;
297
            }
298
        }
299
    }
300
301
    protected function setext()
302
    {
303
        if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) {
304
            $level = trim($this->nextLine, '- ') === '' ? 2 : 1;
305
306
            $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>";
307
308
            ++$this->pointer;
309
310
            return true;
311
        }
312
    }
313
314
    protected function code()
315
    {
316
        $codeBlock = (bool) preg_match(static::RE_MD_CODE, $this->line, $codeMatch);
317
318
        if ($codeBlock || (empty($this->inList) && empty($this->inQuote) && $this->indent >= 4)) {
319
            $lang = isset($codeMatch[1])
320
                ? ' class="language-' . $codeMatch[1] . '"'
321
                : '';
322
323
            $this->markup .= "\n<pre><code{$lang}>";
324
325
            if (!$codeBlock) {
326
                $this->markup .= $this->escape(substr($this->line, 4));
327
            }
328
329
            $this->codeInternal($codeBlock);
330
331
            ++$this->pointer;
332
333
            $this->markup .= '</code></pre>';
334
335
            return true;
336
        }
337
    }
338
339
    public function codeInternal($codeBlock)
340
    {
341
        while (isset($this->lines[$this->pointer + 1])) {
342
            $this->line = $this->escape($this->lines[$this->pointer + 1]);
343
344
            if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```')
345
                || substr($this->line, 0, 4) === $this->indentStr
346
            ) {
347
                $this->markup .= "\n"; // @todo: donot use \n for first line
348
                $this->markup .= $codeBlock ? $this->line : substr($this->line, 4);
349
350
                ++$this->pointer;
351
            }
352
        }
353
    }
354
355
    protected function rule()
356
    {
357
        if ($this->trimmedPrevLine === ''
358
            && preg_match(static::RE_MD_RULE, $this->trimmedLine)
359
        ) {
360
            $this->markup .= "\n<hr />";
361
362
            return true;
363
        }
364
    }
365
366
    protected function listt()
367
    {
368
        $isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']);
369
370
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) {
371
            $wrapper = $isUl ? 'ul' : 'ol';
372
373
            if (!$this->inList) {
374
                $this->stackList[] = "</$wrapper>";
375
                $this->markup .= "\n<$wrapper>\n";
376
                $this->inList      = true;
377
378
                ++$this->listLevel;
379
            }
380
381
            $this->markup .= '<li>' . ltrim($this->trimmedLine, '-*0123456789. ');
382
383
            $this->listInternal();
384
385
            return true;
386
        }
387
    }
388
389
    protected function listInternal()
390
    {
391
        $isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']);
392
393
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) {
394
            $wrapper = $isUl ? 'ul' : 'ol';
395
            if ($this->nextIndent > $this->indent) {
396
                $this->stackList[] = "</li>\n";
397
                $this->stackList[] = "</$wrapper>";
398
                $this->markup .= "\n<$wrapper>\n";
399
400
                ++$this->listLevel;
401
            } else {
402
                $this->markup .= "</li>\n";
403
            }
404
405
            if ($this->nextIndent < $this->indent) {
406
                $shift = intval(($this->indent - $this->nextIndent) / 4);
407
408
                while ($shift--) {
409
                    $this->markup .= array_pop($this->stackList);
410
411
                    if ($this->listLevel > 2) {
412
                        $this->markup .= array_pop($this->stackList);
413
                    }
414
                }
415
            }
416
        } else {
417
            $this->markup .= "</li>\n";
418
        }
419
    }
420
421
    protected function table()
422
    {
423
        static $headerCount = 0;
424
425
        if (!$this->inTable) {
426
            $headerCount = substr_count(trim($this->trimmedLine, '|'), '|');
427
428
            return $this->tableInternal($headerCount);
429
        }
430
431
        $this->markup .= "<tr>\n";
432
433
        foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) {
434
            if ($i > $headerCount) {
435
                break;
436
            }
437
438
            $col           = trim($col);
439
            $this->markup .= "<td>{$col}</td>\n";
440
        }
441
442
        $this->markup .= "</tr>\n";
443
444
        if (empty($this->trimmedNextLine)
445
            || !substr_count(trim($this->trimmedNextLine, '|'), '|')
446
        ) {
447
            $headerCount        = 0;
448
            $this->inTable      = false;
449
            $this->stackTable[] = "</tbody>\n</table>";
450
        }
451
452
        return true;
453
    }
454
455
    protected function tableInternal($headerCount)
456
    {
457
        $columnCount = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|'));
458
459
        if ($headerCount > 0 && $headerCount <= $columnCount) {
460
            ++$this->pointer;
461
462
            $this->inTable     = true;
463
            $this->markup .= "<table>\n<thead>\n<tr>\n";
464
            $this->trimmedLine = trim($this->trimmedLine, '|');
465
466
            foreach (explode('|', $this->trimmedLine) as $hdr) {
467
                $this->markup .= '<th>' . trim($hdr) . "</th>\n";
468
            }
469
470
            $this->markup .= "</tr>\n</thead>\n<tbody>\n";
471
472
            return true;
473
        }
474
    }
475
476
    protected function paragraph()
477
    {
478
        $this->markup .= $this->inPara ? "\n<br />" : "\n<p>";
479
        $this->markup .= $this->trimmedLine;
480
481
        if (empty($this->trimmedNextLine)) {
482
            $this->markup .= '</p>';
483
            $this->inPara = false;
484
        } else {
485
            $this->inPara = true;
486
        }
487
    }
488
}
489