Passed
Pull Request — master (#5)
by Jitendra
02:12
created

HtmlUp::parseBlockElements()   C

Complexity

Conditions 12
Paths 10

Size

Total Lines 32
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 12
eloc 19
nc 10
nop 0
dl 0
loc 32
rs 5.1612
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features && limitations.
10
 *
11
 * @author    adhocore | Jitendra Adhikari <[email protected]>
12
 * @copyright (c) 2014 Jitendra Adhikari
13
 */
14
class HtmlUp
15
{
16
    const RE_URL       = '~<(https?:[\/]{2}[^\s]+?)>~';
17
    const RE_RAW       = '/^<\/?\w.*?\/?>/';
18
    const RE_EMAIL     = '~<(\S+?@\S+?)>~';
19
    const RE_MD_IMG    = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
20
    const RE_MD_URL    = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
21
    const RE_MD_FONT   = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!';
22
    const RE_MD_QUOTE  = '~^\s*(>+)\s+~';
23
    const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~';
24
    const RE_MD_CODE   = '/^```\s*([\w-]+)?/';
25
    const RE_MD_RULE   = '~^(_{3,}|\*{3,}|\-{3,})$~';
26
    const RE_MD_TCOL   = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~';
27
    const RE_MD_OL     = '/^\d+\. /';
28
29
    protected $lines       = [];
30
    protected $stackList   = [];
31
    protected $stackBlock  = [];
32
    protected $stackTable  = [];
33
34
    protected $pointer     = -1;
35
    protected $listLevel   = 0;
36
    protected $quoteLevel  = 0;
37
    protected $indent      = 0;
38
    protected $nextIndent  = 0;
39
40
    protected $indentStr       = '';
41
    protected $line            = '';
42
    protected $trimmedLine     = '';
43
    protected $prevLine        = '';
44
    protected $trimmedPrevLine = '';
45
    protected $nextLine        = '';
46
    protected $trimmedNextLine = '';
47
    protected $markup          = '';
48
49
    protected $inList  = false;
50
    protected $inQuote = false;
51
    protected $inPara  = false;
52
    protected $inHtml  = false;
53
    protected $inTable = false;
54
55
    /**
56
     * Constructor.
57
     *
58
     * @param string $markdown
59
     */
60
    public function __construct($markdown = null, $indentWidth = 4)
61
    {
62
        $this->indentStr = $indentWidth == 2 ? '  ' : '    ';
63
64
        if (null !== $markdown) {
65
            $this->scan($markdown);
66
        }
67
    }
68
69
    protected function scan($markdown)
70
    {
71
        if ('' === trim($markdown)) {
72
            return;
73
        }
74
75
        // Normalize whitespaces
76
        $markdown = str_replace("\t", $this->indentStr, $markdown);
77
        $markdown = str_replace(["\r\n", "\r"], "\n", $markdown);
78
79
        $this->lines = array_merge([''], explode("\n", $markdown), ['']);
80
    }
81
82
    public function __toString()
83
    {
84
        return $this->parse();
85
    }
86
87
    public function parse($markdown = null)
88
    {
89
        if (null !== $markdown) {
90
            $this->reset(true);
91
92
            $this->scan($markdown);
93
        }
94
95
        if ([] === $this->lines) {
96
            return '';
97
        }
98
99
        $this->parseBlockElements();
100
        $this->parseSpanElements();
101
102
        return $this->markup;
103
    }
104
105
    protected function parseBlockElements()
106
    {
107
        while (isset($this->lines[++$this->pointer])) {
108
            list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine];
109
110
            $this->line        = $this->lines[$this->pointer];
111
            $this->trimmedLine = trim($this->line);
112
113
            if ($this->flush() || $this->raw()) {
114
                continue;
115
            }
116
117
            $this->indent          = strlen($this->line) - strlen(ltrim($this->line));
118
            $this->nextLine        = isset($this->lines[$this->pointer + 1])
119
                ? $this->lines[$this->pointer + 1]
120
                : '';
121
            $this->trimmedNextLine = trim($this->nextLine);
122
            $this->nextIndent      = strlen($this->nextLine) - strlen(ltrim($this->nextLine));
123
124
            $this->quote();
125
126
            if ($this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt()) {
127
                continue;
128
            }
129
130
            if ($this->inList) {
131
                $this->markup .= $this->trimmedLine;
132
133
                continue;
134
            }
135
136
            $this->table() || $this->paragraph();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->paragraph() targeting Ahc\HtmlUp::paragraph() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
137
        }
138
    }
139
140
    protected function parseSpanElements()
141
    {
142
        $this->links();
143
144
        $this->anchors();
145
146
        $this->spans();
147
    }
148
149
    protected function links()
150
    {
151
        // URLs.
152
        $this->markup = preg_replace(
153
            static::RE_URL,
154
            '<a href="$1">$1</a>',
155
            $this->markup
156
        );
157
158
        // Emails.
159
        $this->markup = preg_replace(
160
            static::RE_EMAIL,
161
            '<a href="mailto:$1">$1</a>',
162
            $this->markup
163
        );
164
    }
165
166
    protected function anchors()
167
    {
168
        // Images.
169
        $this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) {
170
            $title = isset($img[3]) ? " title={$img[3]} " : '';
171
            $alt   = $img[1] ? " alt=\"{$img[1]}\" " : '';
172
173
            return "<img src=\"{$img[2]}\"{$title}{$alt}/>";
174
        }, $this->markup);
175
176
        // Anchors.
177
        $this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) {
178
            $title = isset($a[3]) ? " title={$a[3]} " : '';
179
180
            return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>";
181
        }, $this->markup);
182
    }
183
184
    protected function spans()
185
    {
186
        // em/code/strong/del
187
        $this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) {
188
            switch (substr($em[1], 0, 2)) {
189
                case  '**':
190
                case '__':
191
                    $tag = 'strong';
192
                    break;
193
194
                case '~~':
195
                    $tag = 'del';
196
                    break;
197
198
                case $em[1] === '*':
199
                case $em[1] === '_':
200
                    $tag = 'em';
201
                    break;
202
203
                default:
204
                    $tag = 'code';
205
                    $em[2] = htmlspecialchars($em[2]);
206
            }
207
208
            return "<$tag>{$em[2]}</$tag>";
209
        }, $this->markup);
210
    }
211
212
    protected function escape($input)
213
    {
214
        return htmlspecialchars($input);
215
    }
216
217
    protected function reset($all = false)
218
    {
219
        $except = $all ? [] : array_fill_keys(['lines', 'pointer', 'markup'], true);
220
221
        // Reset all current values.
222
        foreach (get_class_vars(__CLASS__) as $prop => $value) {
223
            isset($except[$prop]) || $this->{$prop} = $value;
224
        }
225
    }
226
227
    protected function flush()
228
    {
229
        if ('' !== $this->trimmedLine) {
230
            return false;
231
        }
232
233
        while ($this->stackList) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->stackList of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
234
            $this->markup .= array_pop($this->stackList);
235
        }
236
237
        while ($this->stackBlock) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->stackBlock of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
238
            $this->markup .= array_pop($this->stackBlock);
239
        }
240
241
        while ($this->stackTable) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->stackTable of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
242
            $this->markup .= array_pop($this->stackTable);
243
        }
244
245
        $this->markup .= "\n";
246
247
        $this->reset(false);
248
249
        return true;
250
    }
251
252
    protected function raw()
253
    {
254
        if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) {
255
            $this->markup .= "\n$this->line";
256
            if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) {
257
                $this->inHtml = true;
258
            }
259
260
            return true;
261
        }
262
    }
263
264
    protected function quote()
265
    {
266
        if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) {
267
            $this->line        = substr($this->line, strlen($quoteMatch[0]));
268
            $this->trimmedLine = trim($this->line);
269
270
            if (!$this->inQuote || $quoteLevel < strlen($quoteMatch[1])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $quoteLevel seems to be never defined.
Loading history...
271
                $this->markup .= "\n<blockquote>";
272
273
                $stackBlock[] = "\n</blockquote>";
0 ignored issues
show
Comprehensibility Best Practice introduced by
$stackBlock was never initialized. Although not strictly required by PHP, it is generally a good practice to add $stackBlock = array(); before regardless.
Loading history...
274
275
                ++$this->quoteLevel;
276
            }
277
278
            return $this->inQuote = true;
279
        }
280
    }
281
282
    protected function atx()
283
    {
284
        if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') {
285
            $level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#'));
286
287
            if ($level < 7) {
288
                $this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>";
289
290
                return true;
291
            }
292
        }
293
    }
294
295
    protected function setext()
296
    {
297
        if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) {
298
            $level = trim($this->nextLine, '- ') === '' ? 2 : 1;
299
300
            $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>";
301
302
            ++$this->pointer;
303
304
            return true;
305
        }
306
    }
307
308
    protected function code()
309
    {
310
        $codeBlock = preg_match(static::RE_MD_CODE, $this->line, $codeMatch);
311
312
        if ($codeBlock || (empty($this->inList) && empty($this->inQuote) && $this->indent >= 4)) {
313
            $lang = isset($codeMatch[1])
314
                ? ' class="language-' . $codeMatch[1] . '"'
315
                : '';
316
317
            $this->markup .= "\n<pre><code{$lang}>";
318
319
            if (!$codeBlock) {
320
                $this->markup .= $this->escape(substr($this->line, 4));
321
            }
322
323
            while (isset($this->lines[$this->pointer + 1])) {
324
                $this->line = $this->escape($this->lines[$this->pointer + 1]);
325
326
                if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```')
327
                    || substr($this->line, 0, 4) === $this->indentStr
328
                ) {
329
                    $this->markup .= "\n"; // @todo: donot use \n for first line
330
                    $this->markup .= $codeBlock ? $this->line : substr($this->line, 4);
331
332
                    ++$this->pointer;
333
                }
334
            }
335
336
            ++$this->pointer;
337
338
            $this->markup .= '</code></pre>';
339
340
            return true;
341
        }
342
    }
343
344
    protected function rule()
345
    {
346
        if ($this->trimmedPrevLine === ''
347
            && preg_match(static::RE_MD_RULE, $this->trimmedLine)
348
        ) {
349
            $this->markup .= "\n<hr />";
350
351
            return true;
352
        }
353
    }
354
355
    protected function listt()
356
    {
357
        $isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']);
358
359
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) {
360
            $wrapper = $isUl ? 'ul' : 'ol';
361
362
            if (!$this->inList) {
363
                $this->stackList[] = "</$wrapper>";
364
                $this->markup .= "\n<$wrapper>\n";
365
                $this->inList      = true;
366
367
                ++$this->listLevel;
368
            }
369
370
            $this->markup .= '<li>' . ltrim($this->trimmedLine, '-*0123456789. ');
371
372
            $isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']);
373
374
            if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) {
375
                $wrapper = $isUl ? 'ul' : 'ol';
376
                if ($this->nextIndent > $this->indent) {
377
                    $this->stackList[] = "</li>\n";
378
                    $this->stackList[] = "</$wrapper>";
379
                    $this->markup .= "\n<$wrapper>\n";
380
381
                    ++$this->listLevel;
382
                } else {
383
                    $this->markup .= "</li>\n";
384
                }
385
386
                if ($this->nextIndent < $this->indent) {
387
                    $shift = intval(($this->indent - $this->nextIndent) / 4);
388
                    while ($shift--) {
389
                        $this->markup .= array_pop($this->stackList);
390
                        if ($this->nestLevel > 2) {
0 ignored issues
show
Bug Best Practice introduced by
The property nestLevel does not exist on Ahc\HtmlUp. Did you maybe forget to declare it?
Loading history...
391
                            $this->markup .= array_pop($this->stackList);
392
                        }
393
                    }
394
                }
395
            } else {
396
                $this->markup .= "</li>\n";
397
            }
398
399
            return true;
400
        }
401
    }
402
403
    protected function table()
404
    {
405
        static $hdrCt;
406
407
        if (!$this->inTable) {
408
            $hdrCt = substr_count(trim($this->trimmedLine, '|'), '|');
409
            $colCt = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|'));
410
411
            if ($hdrCt > 0 && $colCt > 0 && $hdrCt <= $colCt) {
412
                ++$this->pointer;
413
414
                $this->inTable     = true;
415
                $this->markup .= "<table>\n<thead>\n<tr>\n";
416
                $this->trimmedLine = trim($this->trimmedLine, '|');
417
418
                foreach (explode('|', $this->trimmedLine) as $hdr) {
419
                    $this->markup .= '<th>' . trim($hdr) . "</th>\n";
420
                }
421
422
                $this->markup .= "</tr>\n</thead>\n<tbody>\n";
423
424
                return true;
425
            }
426
        } else {
427
            $this->markup .= "<tr>\n";
428
429
            foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) {
430
                if ($i > $hdrCt) {
431
                    break;
432
                }
433
                $col           = trim($col);
434
                $this->markup .= "<td>{$col}</td>\n";
435
            }
436
437
            $this->markup .= "</tr>\n";
438
439
            if (empty($this->trimmedNextLine) || !substr_count(trim($this->trimmedNextLine, '|'), '|')) {
440
                $hdrCt              = 0;
441
                $this->inTable      = false;
442
                $this->stackTable[] = "</tbody>\n</table>";
443
            }
444
445
            return true;
446
        }
447
    }
448
449
    protected function paragraph()
450
    {
451
        $this->markup .= $this->inPara ? "\n<br />" : "\n<p>";
452
        $this->markup .= $this->trimmedLine;
453
454
        if (empty($this->trimmedNextLine)) {
455
            $this->markup .= '</p>';
456
            $this->inPara = false;
457
        } else {
458
            $this->inPara = true;
459
        }
460
    }
461
}
462