HTMLGrammar   F
last analyzed

Complexity

Total Complexity 68

Size/Duplication

Total Lines 360
Duplicated Lines 0 %

Test Coverage

Coverage 93.9%

Importance

Changes 0
Metric Value
wmc 68
eloc 187
dl 0
loc 360
ccs 154
cts 164
cp 0.939
rs 2.96
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A parse() 0 26 6
A flushWhitespace() 0 8 2
B isValid() 0 28 9
A flush() 0 4 1
D parseVerbatim() 0 90 25
A tokenName() 0 14 1
A tagName() 0 9 3
A flushKeyword() 0 8 2
A flushAttribute() 0 8 2
C parseGrammar() 0 91 17

How to fix   Complexity   

Complex Class

Complex classes like HTMLGrammar often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use HTMLGrammar, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Spiral\Stempler\Lexer\Grammar;
6
7
use Spiral\Stempler\Lexer\Buffer;
8
use Spiral\Stempler\Lexer\Byte;
9
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
10
use Spiral\Stempler\Lexer\GrammarInterface;
11
use Spiral\Stempler\Lexer\Token;
12
13
/**
14
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
15
 */
16
final class HTMLGrammar implements GrammarInterface
17
{
18
    use TokenTrait;
0 ignored issues
show
introduced by
The trait Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait requires some properties which are not provided by Spiral\Stempler\Lexer\Grammar\HTMLGrammar: $char, $content
Loading history...
19
20
    // HTML grammar tokens
21
    public const TYPE_RAW         = 0;
22
    public const TYPE_KEYWORD     = 1;
23
    public const TYPE_OPEN        = 2;
24
    public const TYPE_OPEN_SHORT  = 3;
25
    public const TYPE_CLOSE       = 4;
26
    public const TYPE_CLOSE_SHORT = 5;
27
    public const TYPE_EQUAL       = 6;
28
    public const TYPE_ATTRIBUTE   = 7;
29
    public const TYPE_WHITESPACE  = 9;
30
    public const TYPE_VERBATIM    = 10;
31
32
    // Content within given tags must not be parsed
33
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
34
35
    // whitespace
36
    private const REGEXP_WHITESPACE = '/\\s/';
37
38
    // Allowed keyword characters.
39
    private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';
40
41
    private array $whitespace = [];
42
    /**
43
     * @var array<array-key, Byte|Token>|array{0: Byte}
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<array-key, Byte|Token>|array{0: Byte} at position 2 could not be parsed: Unknown type name 'array-key' at position 2 in array<array-key, Byte|Token>|array{0: Byte}.
Loading history...
44
     */
45
    private array $attribute = [];
46
    private array $keyword = [];
47
48 180
    public function parse(Buffer $src): \Generator
49
    {
50 180
        while ($n = $src->next()) {
51 180
            if (!$n instanceof Byte || $n->char !== '<') {
52 134
                yield $n;
53 134
                continue;
54
            }
55
56
            // work with isolated token stream!
57 137
            $tag = (clone $this)->parseGrammar($src);
58 137
            if ($tag === null) {
59 15
                yield $n;
60 15
                $src->replay($n->offset);
61 15
                continue;
62
            }
63
64 127
            $tagName = $this->tagName($tag);
65
66
            // todo: add support for custom tag list
67 127
            if (\in_array($tagName, self::VERBATIM_TAGS)) {
68 9
                yield from $tag;
69 9
                yield from $this->parseVerbatim($src, $tagName);
70 9
                continue;
71
            }
72
73 118
            yield from $tag;
74
        }
75
    }
76
77
    /**
78
     * @codeCoverageIgnore
79
     */
80
    public static function tokenName(int $token): string
81
    {
82
        return match ($token) {
83
            self::TYPE_RAW => 'HTML:RAW',
84
            self::TYPE_KEYWORD => 'HTML:KEYWORD',
85
            self::TYPE_OPEN => 'HTML:OPEN_TAG',
86
            self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
87
            self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
88
            self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
89
            self::TYPE_EQUAL => 'HTML:EQUAL',
90
            self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
91
            self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
92
            self::TYPE_VERBATIM => 'HTML:VERBATIM',
93
            default => 'HTML:UNDEFINED',
94
        };
95
    }
96
97 9
    private function parseVerbatim(Buffer $src, string $verbatim): \Generator
98
    {
99 9
        $chunks = [];
100
101 9
        while ($n = $src->next()) {
102 9
            if ($n instanceof Token) {
103 2
                $chunks[] = $n;
104 2
                continue;
105
            }
106
107 9
            switch ($n->char) {
108 9
                case '"':
109 9
                case "'":
110 9
                case '`':
111 7
                    $chunks[] = $n;
112
113
                    // language inclusions allow nested strings
114 7
                    while ($nc = $src->next()) {
115 7
                        $chunks[] = $nc;
116 7
                        if ($nc instanceof Token) {
117 1
                            continue;
118
                        }
119
120 7
                        if ($nc->char === $n->char) {
121 7
                            break;
122
                        }
123
                    }
124
125 7
                    break;
126
127 9
                case '/':
128 3
                    $chunks[] = $n;
129
130 3
                    $multiline = false;
131 3
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
132 3
                        if ($src->lookaheadByte(1) === '*') {
133 1
                            $multiline = true;
134
                        }
135
136 3
                        $chunks[] = $src->next();
137
138
                        // language inclusions allow nested strings
139 3
                        while ($nc = $src->next()) {
140 3
                            if ($nc instanceof Token) {
141
                                continue;
142
                            }
143
144 3
                            if ($nc->char === '<') {
145 2
                                $tag = (clone $this)->parseGrammar($src);
146 2
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
147
                                    $src->replay($n->offset);
148
                                    break;
149
                                }
150
                                // back to primary loop
151 2
                                $src->replay($nc->offset - 1);
152 2
                                break 2;
153
                            }
154
155 3
                            $chunks[] = $nc;
156
157 3
                            if ($multiline) {
158 1
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
159 1
                                    $chunks[] = $src->next();
160 1
                                    break;
161
                                }
162 2
                            } elseif ($nc->char === "\n") {
163
                                break;
164
                            }
165
                        }
166
                    }
167
168 1
                    break;
169
170 9
                case '<':
171
                    // tag beginning?
172 9
                    $tag = (clone $this)->parseGrammar($src);
173 9
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
174
                        $chunks[] = $n;
175
                        $src->replay($n->offset);
176
                        break;
177
                    }
178
179
                    // found closing verbatim tag
180 9
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
181 9
                    yield from $tag;
182
183 9
                    break 2;
184
185
                default:
186 9
                    $chunks[] = $n;
187
            }
188
        }
189
    }
190
191 127
    private function tagName(array $tag): string
192
    {
193 127
        foreach ($tag as $token) {
194 127
            if ($token->type === self::TYPE_KEYWORD) {
195 127
                return \strtolower($token->content);
196
            }
197
        }
198
199
        return '';
200
    }
201
202
    /**
203
     * TODO issue #767
204
     * @link https://github.com/spiral/framework/issues/767
205
     * @psalm-suppress UndefinedPropertyFetch
206
     */
207 137
    private function parseGrammar(Buffer $src): ?array
208
    {
209 137
        $this->tokens = [
210 137
            new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
211 137
        ];
212
213 137
        if ($src->lookaheadByte() === '/') {
214 103
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
215 103
            $this->tokens[0]->content .= $src->next()->char;
0 ignored issues
show
Bug introduced by
The property char does not seem to exist on Spiral\Stempler\Lexer\Token.
Loading history...
216
        }
217
218 137
        while ($n = $src->next()) {
219 137
            if ($this->attribute !== []) {
220 94
                $this->attribute[] = $n;
221
222 94
                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
223 93
                    $this->flushAttribute();
224
                }
225
226 94
                continue;
227
            }
228
229 137
            if ($n instanceof Token) {
230 5
                $this->keyword[] = $n;
231 5
                continue;
232
            }
233
234 137
            switch ($n->char) {
235 137
                case '"':
236 137
                case "'":
237 137
                case '`':
238 94
                    $this->flush();
239 94
                    $this->attribute[] = $n;
240 94
                    break;
241
242 137
                case '=':
243 93
                    $this->flush();
244 93
                    $this->tokens[] = new Token(
245 93
                        self::TYPE_EQUAL,
246 93
                        $n->offset,
247 93
                        $n->char
248 93
                    );
249 93
                    break;
250
251 137
                case '/':
252 70
                    if ($src->lookaheadByte() === '>') {
253 69
                        $this->flush();
254 69
                        $this->tokens[] = new Token(
255 69
                            self::TYPE_CLOSE_SHORT,
256 69
                            $n->offset,
257 69
                            $n->char . $src->next()->char
258 69
                        );
259
260 69
                        break 2;
261
                    }
262
263
                    // unexpected "/"
264 1
                    return null;
265
266 137
                case '>':
267 121
                    $this->flush();
268 121
                    $this->tokens[] = new Token(
269 121
                        self::TYPE_CLOSE,
270 121
                        $n->offset,
271 121
                        $n->char
272 121
                    );
273 121
                    break 2;
274
275
                default:
276 132
                    if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
277 96
                        $this->flushKeyword();
278 96
                        $this->whitespace[] = $n;
279 96
                        break;
280
                    }
281 132
                    $this->flushWhitespace();
282
283
284 132
                    if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
285
                        // unexpected char
286 8
                        return null;
287
                    }
288
289 130
                    $this->keyword[] = $n;
290
            }
291
        }
292
293 134
        if (!$this->isValid()) {
294 7
            return null;
295
        }
296
297 127
        return $this->tokens;
298
    }
299
300 134
    private function isValid(): bool
301
    {
302
        // tag is too short or does not have name keyword
303 134
        if (\count($this->tokens) < 3) {
304 2
            return false;
305
        }
306
307 132
        $last = $this->tokens[\count($this->tokens) - 1];
308 132
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
309 2
            return false;
310
        }
311
312 130
        foreach ($this->tokens as $token) {
313 130
            switch ($token->type) {
314
                case self::TYPE_WHITESPACE:
315
                    // ignore
316 3
                    continue 2;
317
318
                case self::TYPE_ATTRIBUTE:
319
                case self::TYPE_EQUAL:
320 3
                    return false;
321
322
                case self::TYPE_KEYWORD:
323 127
                    return true;
324
            }
325
        }
326
327
        return false;
328
    }
329
330
    /**
331
     * Flush whitespace or keyword tokens.
332
     */
333 135
    private function flush(): void
334
    {
335 135
        $this->flushWhitespace();
336 135
        $this->flushKeyword();
337
    }
338
339
    /**
340
     * Flush keyword content.
341
     */
342 137
    private function flushWhitespace(): void
343
    {
344 137
        if ($this->whitespace === []) {
345 137
            return;
346
        }
347
348 96
        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
349 96
        $this->whitespace = [];
350
    }
351
352
    /**
353
     * Flush keyword content.
354
     */
355 135
    private function flushKeyword(): void
356
    {
357 135
        if ($this->keyword === []) {
358 98
            return;
359
        }
360
361 132
        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
362 132
        $this->keyword = [];
363
    }
364
365
    /**
366
     * Flush attribute content.
367
     */
368 93
    private function flushAttribute(): void
369
    {
370 93
        if ($this->attribute === []) {
371
            return;
372
        }
373
374 93
        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
375 93
        $this->attribute = [];
376
    }
377
}
378