Passed
Pull Request — master (#5)
by Jitendra
01:38
created

HtmlUp::table()   D

Complexity

Conditions 10
Paths 9

Size

Total Lines 43
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
eloc 26
nc 9
nop 0
dl 0
loc 43
rs 4.8196
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Ahc;
4
5
/**
6
 * HtmlUp - A **lightweight** && **fast** `markdown` to HTML Parser.
7
 *
8
 * Supports most of the markdown specs except deep nested elements.
9
 * Check readme.md for the details of its features && limitations.
10
 *
11
 * @author    adhocore | Jitendra Adhikari <[email protected]>
12
 * @copyright (c) 2014 Jitendra Adhikari
13
 */
14
class HtmlUp
15
{
16
    const RE_URL       = '~<(https?:[\/]{2}[^\s]+?)>~';
17
    const RE_RAW       = '/^<\/?\w.*?\/?>/';
18
    const RE_EMAIL     = '~<(\S+?@\S+?)>~';
19
    const RE_MD_IMG    = '~!\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
20
    const RE_MD_URL    = '~\[(.+?)\]\s*\((.+?)\s*(".+?")?\)~';
21
    const RE_MD_FONT   = '!(\*{1,2}|_{1,2}|`|~~)(.+?)\\1!';
22
    const RE_MD_QUOTE  = '~^\s*(>+)\s+~';
23
    const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~';
24
    const RE_MD_CODE   = '/^```\s*([\w-]+)?/';
25
    const RE_MD_RULE   = '~^(_{3,}|\*{3,}|\-{3,})$~';
26
    const RE_MD_TCOL   = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~';
27
    const RE_MD_OL     = '/^\d+\. /';
28
29
    protected $lines       = [];
30
    protected $stackList   = [];
31
    protected $stackBlock  = [];
32
    protected $stackTable  = [];
33
34
    protected $pointer     = -1;
35
    protected $listLevel   = 0;
36
    protected $quoteLevel  = 0;
37
    protected $indent      = 0;
38
    protected $nextIndent  = 0;
39
40
    protected $indentStr       = '';
41
    protected $line            = '';
42
    protected $trimmedLine     = '';
43
    protected $prevLine        = '';
44
    protected $trimmedPrevLine = '';
45
    protected $nextLine        = '';
46
    protected $trimmedNextLine = '';
47
    protected $markup          = '';
48
49
    protected $inList  = false;
50
    protected $inQuote = false;
51
    protected $inPara  = false;
52
    protected $inHtml  = false;
53
    protected $inTable = false;
54
55
    /**
56
     * Constructor.
57
     *
58
     * @param string $markdown
59
     */
60
    public function __construct($markdown = null, $indentWidth = 4)
61
    {
62
        $this->indentStr = $indentWidth == 2 ? '  ' : '    ';
63
64
        if (null !== $markdown) {
65
            $this->scan($markdown);
66
        }
67
    }
68
69
    protected function scan($markdown)
70
    {
71
        if ('' === trim($markdown)) {
72
            return;
73
        }
74
75
        // Normalize whitespaces
76
        $markdown = str_replace("\t", $this->indentStr, $markdown);
77
        $markdown = str_replace(["\r\n", "\r"], "\n", $markdown);
78
79
        $this->lines = array_merge([''], explode("\n", $markdown), ['']);
80
    }
81
82
    public function __toString()
83
    {
84
        return $this->parse();
85
    }
86
87
    public function parse($markdown = null)
88
    {
89
        if (null !== $markdown) {
90
            $this->reset(true);
91
92
            $this->scan($markdown);
93
        }
94
95
        if ([] === $this->lines) {
96
            return '';
97
        }
98
99
        $this->parseBlockElements();
100
        $this->parseSpanElements();
101
102
        return $this->markup;
103
    }
104
105
    protected function parseBlockElements()
106
    {
107
        while (isset($this->lines[++$this->pointer])) {
108
            list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine];
109
110
            $this->line        = $this->lines[$this->pointer];
111
            $this->trimmedLine = trim($this->line);
112
113
            if ($this->flush() || $this->raw()) {
114
                continue;
115
            }
116
117
            $this->indent          = strlen($this->line) - strlen(ltrim($this->line));
118
            $this->nextLine        = isset($this->lines[$this->pointer + 1])
119
                ? $this->lines[$this->pointer + 1]
120
                : '';
121
            $this->trimmedNextLine = trim($this->nextLine);
122
            $this->nextIndent      = strlen($this->nextLine) - strlen(ltrim($this->nextLine));
123
124
            $this->quote();
125
126
            if ($this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt()) {
127
                continue;
128
            }
129
130
            if ($this->inList) {
131
                $this->markup .= $this->trimmedLine;
132
133
                continue;
134
            }
135
136
            $this->table() || $this->paragraph();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->paragraph() targeting Ahc\HtmlUp::paragraph() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
137
        }
138
    }
139
140
    protected function parseSpanElements()
141
    {
142
        $this->links();
143
144
        $this->anchors();
145
146
        $this->spans();
147
    }
148
149
    protected function links()
150
    {
151
        // URLs.
152
        $this->markup = preg_replace(
153
            static::RE_URL,
154
            '<a href="$1">$1</a>',
155
            $this->markup
156
        );
157
158
        // Emails.
159
        $this->markup = preg_replace(
160
            static::RE_EMAIL,
161
            '<a href="mailto:$1">$1</a>',
162
            $this->markup
163
        );
164
    }
165
166
    protected function anchors()
167
    {
168
        // Images.
169
        $this->markup = preg_replace_callback(static::RE_MD_IMG, function ($img) {
170
            $title = isset($img[3]) ? " title={$img[3]} " : '';
171
            $alt   = $img[1] ? " alt=\"{$img[1]}\" " : '';
172
173
            return "<img src=\"{$img[2]}\"{$title}{$alt}/>";
174
        }, $this->markup);
175
176
        // Anchors.
177
        $this->markup = preg_replace_callback(static::RE_MD_URL, function ($a) {
178
            $title = isset($a[3]) ? " title={$a[3]} " : '';
179
180
            return "<a href=\"{$a[2]}\"{$title}>{$a[1]}</a>";
181
        }, $this->markup);
182
183
    }
184
185
    protected function spans()
186
    {
187
        // em/code/strong/del
188
        $this->markup = preg_replace_callback(static::RE_MD_FONT, function ($em) {
189
            switch (substr($em[1], 0, 2)) {
190
                case  '**':
191
                case '__':
192
                    $tag = 'strong';
193
                    break;
194
195
                case '~~':
196
                    $tag = 'del';
197
                    break;
198
199
                case $em[1] === '*':
200
                case $em[1] === '_':
201
                    $tag = 'em';
202
                    break;
203
204
                default:
205
                    $tag = 'code';
206
                    $em[2] = htmlspecialchars($em[2]);
207
            }
208
209
            return "<$tag>{$em[2]}</$tag>";
210
        }, $this->markup);
211
    }
212
213
    protected function escape($input)
214
    {
215
        return htmlspecialchars($input);
216
    }
217
218
    protected function reset($all = false)
219
    {
220
        $except = $all ? [] : array_fill_keys(['lines', 'pointer', 'markup'], true);
221
222
        // Reset all current values.
223
        foreach (get_class_vars(__CLASS__) as $prop => $value) {
224
            isset($except[$prop]) || $this->{$prop} = $value;
225
        }
226
    }
227
228
    protected function flush()
229
    {
230
        if ('' !== $this->trimmedLine) {
231
            return false;
232
        }
233
234
        while ($this->stackList) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->stackList of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
235
            $this->markup .= array_pop($this->stackList);
236
        }
237
238
        while ($this->stackBlock) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->stackBlock of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
239
            $this->markup .= array_pop($this->stackBlock);
240
        }
241
242
        while ($this->stackTable) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->stackTable of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
243
            $this->markup .= array_pop($this->stackTable);
244
        }
245
246
        $this->markup .= "\n";
247
248
        $this->reset(false);
249
250
        return true;
251
    }
252
253
    protected function raw()
254
    {
255
        if ($this->inHtml || preg_match(static::RE_RAW, $this->trimmedLine)) {
256
            $this->markup .= "\n$this->line";
257
            if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) {
258
                $this->inHtml = true;
259
            }
260
261
            return true;
262
        }
263
    }
264
265
    protected function quote()
266
    {
267
        if (preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) {
268
            $this->line        = substr($this->line, strlen($quoteMatch[0]));
269
            $this->trimmedLine = trim($this->line);
270
271
            if (!$this->inQuote || $quoteLevel < strlen($quoteMatch[1])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $quoteLevel seems to be never defined.
Loading history...
272
                $this->markup .= "\n<blockquote>";
273
274
                $stackBlock[] = "\n</blockquote>";
0 ignored issues
show
Comprehensibility Best Practice introduced by
$stackBlock was never initialized. Although not strictly required by PHP, it is generally a good practice to add $stackBlock = array(); before regardless.
Loading history...
275
276
                ++$this->quoteLevel;
277
            }
278
279
            return $this->inQuote = true;
280
        }
281
    }
282
283
    protected function atx()
284
    {
285
        if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') {
286
            $level = strlen($this->trimmedLine) - strlen(ltrim($this->trimmedLine, '#'));
287
288
            if ($level < 7) {
289
                $this->markup .= "\n<h{$level}>" . ltrim(ltrim($this->trimmedLine, '# ')) . "</h{$level}>";
290
                
291
                return true;
292
            }
293
        }
294
    }
295
296
    protected function setext()
297
    {
298
        if (preg_match(static::RE_MD_SETEXT, $this->nextLine)) {
299
            $level = trim($this->nextLine, '- ') === '' ? 2 : 1;
300
301
            $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>";
302
303
            ++$this->pointer;
304
305
            return true;
306
        }
307
    }
308
309
    protected function code()
310
    {
311
        $codeBlock = preg_match(static::RE_MD_CODE, $this->line, $codeMatch);
312
313
        if ($codeBlock || (empty($this->inList) && empty($this->inQuote) && $this->indent >= 4)) {
314
            $lang = isset($codeMatch[1])
315
                ? ' class="language-' . $codeMatch[1] . '"'
316
                : '';
317
318
            $this->markup .= "\n<pre><code{$lang}>";
319
320
            if (!$codeBlock) {
321
                $this->markup .= $this->escape(substr($this->line, 4));
322
            }
323
324
            while (isset($this->lines[$this->pointer + 1])) {
325
                $this->line = $this->escape($this->lines[$this->pointer + 1]);
326
327
                if (($codeBlock && substr(ltrim($this->line), 0, 3) !== '```')
328
                    || substr($this->line, 0, 4) === $this->indentStr
329
                ) {
330
                    $this->markup .= "\n"; # @todo: donot use \n for first line
331
                    $this->markup .= $codeBlock ? $this->line : substr($this->line, 4);
332
333
                    ++$this->pointer;
334
                }
335
            }
336
337
            ++$this->pointer;
338
339
            $this->markup .= '</code></pre>';
340
341
            return true;
342
        }
343
    }
344
345
    protected function rule()
346
    {
347
        if ($this->trimmedPrevLine === ''
348
            && preg_match(static::RE_MD_RULE, $this->trimmedLine)
349
        ) {
350
            $this->markup .= "\n<hr />";
351
352
            return true;
353
        }
354
    }
355
356
    protected function listt()
357
    {
358
        $isUl = in_array(substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']);
359
360
        if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedLine)) {
361
            $wrapper = $isUl ? 'ul' : 'ol';
362
363
            if (!$this->inList) {
364
                $this->stackList[] = "</$wrapper>";
365
                $this->markup     .= "\n<$wrapper>\n";
366
                $this->inList      = true;
367
368
                ++$this->listLevel;
369
            }
370
371
            $this->markup .= '<li>'.ltrim($this->trimmedLine, '-*0123456789. ');
372
373
            $isUl = in_array(substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']);
374
375
            if ($isUl || preg_match(static::RE_MD_OL, $this->trimmedNextLine)) {
376
                $wrapper = $isUl ? 'ul' : 'ol';
377
                if ($this->nextIndent > $this->indent) {
378
                    $this->stackList[] = "</li>\n";
379
                    $this->stackList[] = "</$wrapper>";
380
                    $this->markup     .= "\n<$wrapper>\n";
381
382
                    ++$this->listLevel;
383
                } else {
384
                    $this->markup .= "</li>\n";
385
                }
386
387
                if ($this->nextIndent < $this->indent) {
388
                    $shift = intval(($this->indent - $this->nextIndent) / 4);
389
                    while ($shift--) {
390
                        $this->markup .= array_pop($this->stackList);
391
                        if ($this->nestLevel > 2) {
0 ignored issues
show
Bug Best Practice introduced by
The property nestLevel does not exist on Ahc\HtmlUp. Did you maybe forget to declare it?
Loading history...
392
                            $this->markup .= array_pop($this->stackList);
393
                        }
394
                    }
395
                }
396
            } else {
397
                $this->markup .= "</li>\n";
398
            }
399
400
            return true;
401
        }
402
    }
403
404
    protected function table()
405
    {
406
        static $hdrCt;
407
408
        if (!$this->inTable) {
409
            $hdrCt = substr_count(trim($this->trimmedLine, '|'), '|');
410
            $colCt = preg_match_all(static::RE_MD_TCOL, trim($this->trimmedNextLine, '|'));
411
412
            if ($hdrCt > 0 && $colCt > 0 && $hdrCt <= $colCt) {
413
                ++$this->pointer;
414
 
415
                $this->inTable     = true;
416
                $this->markup     .= "<table>\n<thead>\n<tr>\n";
417
                $this->trimmedLine = trim($this->trimmedLine, '|');
418
419
                foreach (explode('|', $this->trimmedLine) as $hdr) {
420
                    $this->markup .= '<th>' . trim($hdr) . "</th>\n";
421
                }
422
423
                $this->markup .= "</tr>\n</thead>\n<tbody>\n";
424
425
                return true;
426
            }
427
        } else {
428
            $this->markup .= "<tr>\n";
429
430
            foreach (explode('|', trim($this->trimmedLine, '|')) as $i => $col) {
431
                if ($i > $hdrCt) {
432
                    break;
433
                }
434
                $col           = trim($col);
435
                $this->markup .= "<td>{$col}</td>\n";
436
            }
437
438
            $this->markup .= "</tr>\n";
439
440
            if (empty($this->trimmedNextLine) || !substr_count(trim($this->trimmedNextLine, '|'), '|')) {
441
                $hdrCt              = 0;
442
                $this->inTable      = false;
443
                $this->stackTable[] = "</tbody>\n</table>";
444
            }
445
446
            return true;
447
        }
448
    }
449
450
    protected function paragraph()
451
    {
452
        $this->markup .= $this->inPara ? "\n<br />" : "\n<p>";
453
        $this->markup .= $this->trimmedLine;
454
455
        if (empty($this->trimmedNextLine)) {
456
            $this->markup .= '</p>';
457
            $this->inPara = false;
458
        } else {
459
            $this->inPara = true;
460
        }
461
    }
462
}
463