Passed
Pull Request — master (#806)
by Maxim
19:17
created

HTMLGrammar::parseGrammar()   C

Complexity

Conditions 17
Paths 44

Size

Total Lines 91
Code Lines 56

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 53
CRAP Score 17

Importance

Changes 0
Metric Value
cc 17
eloc 56
nc 44
nop 1
dl 0
loc 91
ccs 53
cts 53
cp 1
crap 17
rs 5.2166
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Spiral\Stempler\Lexer\Grammar;
6
7
use Spiral\Stempler\Lexer\Buffer;
8
use Spiral\Stempler\Lexer\Byte;
9
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
10
use Spiral\Stempler\Lexer\GrammarInterface;
11
use Spiral\Stempler\Lexer\Token;
12
13
/**
14
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
15
 */
16
final class HTMLGrammar implements GrammarInterface
17
{
18
    use TokenTrait;
0 ignored issues
show
introduced by
The trait Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait requires some properties which are not provided by Spiral\Stempler\Lexer\Grammar\HTMLGrammar: $char, $content
Loading history...
19
20
    // HTML grammar tokens
21
    public const TYPE_RAW         = 0;
22
    public const TYPE_KEYWORD     = 1;
23
    public const TYPE_OPEN        = 2;
24
    public const TYPE_OPEN_SHORT  = 3;
25
    public const TYPE_CLOSE       = 4;
26
    public const TYPE_CLOSE_SHORT = 5;
27
    public const TYPE_EQUAL       = 6;
28
    public const TYPE_ATTRIBUTE   = 7;
29
    public const TYPE_WHITESPACE  = 9;
30
    public const TYPE_VERBATIM    = 10;
31
32
    // Content within given tags must not be parsed
33
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
34
35
    // whitespace
36
    private const REGEXP_WHITESPACE = '/\\s/';
37
38
    // Allowed keyword characters.
39
    private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';
40
41
    private array $whitespace = [];
42
    /**
43
     * @var array<array-key, Byte|Token>|array{0: Byte}
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<array-key, Byte|Token>|array{0: Byte} at position 2 could not be parsed: Unknown type name 'array-key' at position 2 in array<array-key, Byte|Token>|array{0: Byte}.
Loading history...
44
     */
45
    private array $attribute = [];
46
    private array $keyword = [];
47
48 161
    public function parse(Buffer $src): \Generator
49
    {
50 161
        while ($n = $src->next()) {
51 161
            if (!$n instanceof Byte || $n->char !== '<') {
52 116
                yield $n;
53 116
                continue;
54
            }
55
56
            // work with isolated token stream!
57 130
            $tag = (clone $this)->parseGrammar($src);
58 130
            if ($tag === null) {
59 15
                yield $n;
60 15
                $src->replay($n->offset);
61 15
                continue;
62
            }
63
64 120
            $tagName = $this->tagName($tag);
65
66
            // todo: add support for custom tag list
67 120
            if (\in_array($tagName, self::VERBATIM_TAGS)) {
68 9
                yield from $tag;
69 9
                yield from $this->parseVerbatim($src, $tagName);
70 9
                continue;
71
            }
72
73 111
            yield from $tag;
74
        }
75
    }
76
77
    /**
78
     * @codeCoverageIgnore
79
     */
80
    public static function tokenName(int $token): string
81
    {
82
        return match ($token) {
83
            self::TYPE_RAW => 'HTML:RAW',
84
            self::TYPE_KEYWORD => 'HTML:KEYWORD',
85
            self::TYPE_OPEN => 'HTML:OPEN_TAG',
86
            self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
87
            self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
88
            self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
89
            self::TYPE_EQUAL => 'HTML:EQUAL',
90
            self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
91
            self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
92
            self::TYPE_VERBATIM => 'HTML:VERBATIM',
93
            default => 'HTML:UNDEFINED',
94
        };
95
    }
96
97 9
    private function parseVerbatim(Buffer $src, string $verbatim): \Generator
98
    {
99 9
        $chunks = [];
100
101 9
        while ($n = $src->next()) {
102 9
            if ($n instanceof Token) {
103 2
                $chunks[] = $n;
104 2
                continue;
105
            }
106
107 9
            switch ($n->char) {
108 9
                case '"':
109 9
                case "'":
110 9
                case '`':
111 7
                    $chunks[] = $n;
112
113
                    // language inclusions allow nested strings
114 7
                    while ($nc = $src->next()) {
115 7
                        $chunks[] = $nc;
116 7
                        if ($nc instanceof Token) {
117 1
                            continue;
118
                        }
119
120 7
                        if ($nc->char === $n->char) {
121 7
                            break;
122
                        }
123
                    }
124
125 7
                    break;
126
127 9
                case '/':
128 3
                    $chunks[] = $n;
129
130 3
                    $multiline = false;
131 3
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
132 3
                        if ($src->lookaheadByte(1) === '*') {
133 1
                            $multiline = true;
134
                        }
135
136 3
                        $chunks[] = $src->next();
137
138
                        // language inclusions allow nested strings
139 3
                        while ($nc = $src->next()) {
140 3
                            if ($nc instanceof Token) {
141
                                continue;
142
                            }
143
144 3
                            if ($nc->char === '<') {
145 2
                                $tag = (clone $this)->parseGrammar($src);
146 2
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
147
                                    $src->replay($n->offset);
148
                                    break;
149
                                }
150
                                // back to primary loop
151 2
                                $src->replay($nc->offset - 1);
152 2
                                break 2;
153
                            }
154
155 3
                            $chunks[] = $nc;
156
157 3
                            if ($multiline) {
158 1
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
159 1
                                    $chunks[] = $src->next();
160 1
                                    break;
161
                                }
162 2
                            } elseif ($nc->char === "\n") {
163
                                break;
164
                            }
165
                        }
166
                    }
167
168 1
                    break;
169
170 9
                case '<':
171
                    // tag beginning?
172 9
                    $tag = (clone $this)->parseGrammar($src);
173 9
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
174
                        $chunks[] = $n;
175
                        $src->replay($n->offset);
176
                        break;
177
                    }
178
179
                    // found closing verbatim tag
180 9
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
181 9
                    yield from $tag;
182
183 9
                    break 2;
184
185
                default:
186 9
                    $chunks[] = $n;
187
            }
188
        }
189
    }
190
191 120
    private function tagName(array $tag): string
192
    {
193 120
        foreach ($tag as $token) {
194 120
            if ($token->type === self::TYPE_KEYWORD) {
195 120
                return \strtolower($token->content);
196
            }
197
        }
198
199
        return '';
200
    }
201
202
    /**
203
     * TODO issue #767
204
     * @link https://github.com/spiral/framework/issues/767
205
     * @psalm-suppress UndefinedPropertyFetch
206
     */
207 130
    private function parseGrammar(Buffer $src): ?array
208
    {
209 130
        $this->tokens = [
210 130
            new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
211
        ];
212
213 130
        if ($src->lookaheadByte() === '/') {
214 98
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
215 98
            $this->tokens[0]->content .= $src->next()->char;
0 ignored issues
show
Bug introduced by
The property char does not seem to exist on Spiral\Stempler\Lexer\Token.
Loading history...
216
        }
217
218 130
        while ($n = $src->next()) {
219 130
            if ($this->attribute !== []) {
220 92
                $this->attribute[] = $n;
221
222 92
                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
223 91
                    $this->flushAttribute();
224
                }
225
226 92
                continue;
227
            }
228
229 130
            if ($n instanceof Token) {
230 5
                $this->keyword[] = $n;
231 5
                continue;
232
            }
233
234 130
            switch ($n->char) {
235 130
                case '"':
236 130
                case "'":
237 130
                case '`':
238 92
                    $this->flush();
239 92
                    $this->attribute[] = $n;
240 92
                    break;
241
242 130
                case '=':
243 91
                    $this->flush();
244 91
                    $this->tokens[] = new Token(
245
                        self::TYPE_EQUAL,
246 91
                        $n->offset,
247 91
                        $n->char
248
                    );
249 91
                    break;
250
251 130
                case '/':
252 69
                    if ($src->lookaheadByte() === '>') {
253 68
                        $this->flush();
254 68
                        $this->tokens[] = new Token(
255
                            self::TYPE_CLOSE_SHORT,
256 68
                            $n->offset,
257 68
                            $n->char . $src->next()->char
258
                        );
259
260 68
                        break 2;
261
                    }
262
263
                    // unexpected "/"
264 1
                    return null;
265
266 130
                case '>':
267 115
                    $this->flush();
268 115
                    $this->tokens[] = new Token(
269
                        self::TYPE_CLOSE,
270 115
                        $n->offset,
271 115
                        $n->char
272
                    );
273 115
                    break 2;
274
275
                default:
276 125
                    if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
277 94
                        $this->flushKeyword();
278 94
                        $this->whitespace[] = $n;
279 94
                        break;
280
                    }
281 125
                    $this->flushWhitespace();
282
283
284 125
                    if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
285
                        // unexpected char
286 8
                        return null;
287
                    }
288
289 123
                    $this->keyword[] = $n;
290
            }
291
        }
292
293 127
        if (!$this->isValid()) {
294 7
            return null;
295
        }
296
297 120
        return $this->tokens;
298
    }
299
300 127
    private function isValid(): bool
301
    {
302
        // tag is too short or does not have name keyword
303 127
        if (\count($this->tokens) < 3) {
304 2
            return false;
305
        }
306
307 125
        $last = $this->tokens[\count($this->tokens) - 1];
308 125
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
309 2
            return false;
310
        }
311
312 123
        foreach ($this->tokens as $token) {
313 123
            switch ($token->type) {
314
                case self::TYPE_WHITESPACE:
315
                    // ignore
316 3
                    continue 2;
317
318
                case self::TYPE_ATTRIBUTE:
319
                case self::TYPE_EQUAL:
320 3
                    return false;
321
322
                case self::TYPE_KEYWORD:
323 120
                    return true;
324
            }
325
        }
326
327
        return false;
328
    }
329
330
    /**
331
     * Flush whitespace or keyword tokens.
332
     */
333 128
    private function flush(): void
334
    {
335 128
        $this->flushWhitespace();
336 128
        $this->flushKeyword();
337
    }
338
339
    /**
340
     * Flush keyword content.
341
     */
342 130
    private function flushWhitespace(): void
343
    {
344 130
        if ($this->whitespace === []) {
345 130
            return;
346
        }
347
348 94
        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
349 94
        $this->whitespace = [];
350
    }
351
352
    /**
353
     * Flush keyword content.
354
     */
355 128
    private function flushKeyword(): void
356
    {
357 128
        if ($this->keyword === []) {
358 96
            return;
359
        }
360
361 125
        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
362 125
        $this->keyword = [];
363
    }
364
365
    /**
366
     * Flush attribute content.
367
     */
368 91
    private function flushAttribute(): void
369
    {
370 91
        if ($this->attribute === []) {
371
            return;
372
        }
373
374 91
        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
375 91
        $this->attribute = [];
376
    }
377
}
378