HTMLGrammar::tagName()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 3.072

Importance

Changes 0
Metric Value
cc 3
eloc 4
nc 3
nop 1
dl 0
loc 9
ccs 4
cts 5
cp 0.8
crap 3.072
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Spiral\Stempler\Lexer\Grammar;
6
7
use Spiral\Stempler\Lexer\Buffer;
8
use Spiral\Stempler\Lexer\Byte;
9
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
10
use Spiral\Stempler\Lexer\GrammarInterface;
11
use Spiral\Stempler\Lexer\Token;
12
13
/**
14
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
15
 */
16
final class HTMLGrammar implements GrammarInterface
17
{
18
    use TokenTrait;
0 ignored issues
show
introduced by
The trait Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait requires some properties which are not provided by Spiral\Stempler\Lexer\Grammar\HTMLGrammar: $char, $content
Loading history...
19
20
    // HTML grammar tokens
21
    public const TYPE_RAW         = 0;
22
    public const TYPE_KEYWORD     = 1;
23
    public const TYPE_OPEN        = 2;
24
    public const TYPE_OPEN_SHORT  = 3;
25
    public const TYPE_CLOSE       = 4;
26
    public const TYPE_CLOSE_SHORT = 5;
27
    public const TYPE_EQUAL       = 6;
28
    public const TYPE_ATTRIBUTE   = 7;
29
    public const TYPE_WHITESPACE  = 9;
30
    public const TYPE_VERBATIM    = 10;
31
32
    // Content within given tags must not be parsed
33
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
34
35
    // whitespace
36
    private const REGEXP_WHITESPACE = '/\\s/';
37
38
    // Allowed keyword characters.
39
    private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';
40
41
    private array $whitespace = [];
42
43
    /**
44
     * @var array<array-key, Byte|Token>|array{0: Byte}
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<array-key, Byte|Token>|array{0: Byte} at position 2 could not be parsed: Unknown type name 'array-key' at position 2 in array<array-key, Byte|Token>|array{0: Byte}.
Loading history...
45
     */
46
    private array $attribute = [];
47
48
    private array $keyword = [];
49
50
    /**
51
     * @codeCoverageIgnore
52
     */
53
    public static function tokenName(int $token): string
54
    {
55
        return match ($token) {
56
            self::TYPE_RAW => 'HTML:RAW',
57
            self::TYPE_KEYWORD => 'HTML:KEYWORD',
58
            self::TYPE_OPEN => 'HTML:OPEN_TAG',
59
            self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
60
            self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
61
            self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
62
            self::TYPE_EQUAL => 'HTML:EQUAL',
63
            self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
64
            self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
65
            self::TYPE_VERBATIM => 'HTML:VERBATIM',
66
            default => 'HTML:UNDEFINED',
67
        };
68
    }
69
70 181
    public function parse(Buffer $src): \Generator
71
    {
72 181
        while ($n = $src->next()) {
73 181
            if (!$n instanceof Byte || $n->char !== '<') {
74 135
                yield $n;
75 135
                continue;
76
            }
77
78
            // work with isolated token stream!
79 137
            $tag = (clone $this)->parseGrammar($src);
80 137
            if ($tag === null) {
81 15
                yield $n;
82 15
                $src->replay($n->offset);
83 15
                continue;
84
            }
85
86 127
            $tagName = $this->tagName($tag);
87
88
            // todo: add support for custom tag list
89 127
            if (\in_array($tagName, self::VERBATIM_TAGS)) {
90 9
                yield from $tag;
91 9
                yield from $this->parseVerbatim($src, $tagName);
92 9
                continue;
93
            }
94
95 118
            yield from $tag;
96
        }
97
    }
98
99 9
    private function parseVerbatim(Buffer $src, string $verbatim): \Generator
100
    {
101 9
        $chunks = [];
102
103 9
        while ($n = $src->next()) {
104 9
            if ($n instanceof Token) {
105 2
                $chunks[] = $n;
106 2
                continue;
107
            }
108
109 9
            switch ($n->char) {
110 9
                case '"':
111 9
                case "'":
112 9
                case '`':
113 7
                    $chunks[] = $n;
114
115
                    // language inclusions allow nested strings
116 7
                    while ($nc = $src->next()) {
117 7
                        $chunks[] = $nc;
118 7
                        if ($nc instanceof Token) {
119 1
                            continue;
120
                        }
121
122 7
                        if ($nc->char === $n->char) {
123 7
                            break;
124
                        }
125
                    }
126
127 7
                    break;
128
129 9
                case '/':
130 3
                    $chunks[] = $n;
131
132 3
                    $multiline = false;
133 3
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
134 3
                        if ($src->lookaheadByte(1) === '*') {
135 1
                            $multiline = true;
136
                        }
137
138 3
                        $chunks[] = $src->next();
139
140
                        // language inclusions allow nested strings
141 3
                        while ($nc = $src->next()) {
142 3
                            if ($nc instanceof Token) {
143
                                continue;
144
                            }
145
146 3
                            if ($nc->char === '<') {
147 2
                                $tag = (clone $this)->parseGrammar($src);
148 2
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
149
                                    $src->replay($n->offset);
150
                                    break;
151
                                }
152
                                // back to primary loop
153 2
                                $src->replay($nc->offset - 1);
154 2
                                break 2;
155
                            }
156
157 3
                            $chunks[] = $nc;
158
159 3
                            if ($multiline) {
160 1
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
161 1
                                    $chunks[] = $src->next();
162 1
                                    break;
163
                                }
164 2
                            } elseif ($nc->char === "\n") {
165
                                break;
166
                            }
167
                        }
168
                    }
169
170 1
                    break;
171
172 9
                case '<':
173
                    // tag beginning?
174 9
                    $tag = (clone $this)->parseGrammar($src);
175 9
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
176
                        $chunks[] = $n;
177
                        $src->replay($n->offset);
178
                        break;
179
                    }
180
181
                    // found closing verbatim tag
182 9
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
183 9
                    yield from $tag;
184
185 9
                    break 2;
186
187
                default:
188 9
                    $chunks[] = $n;
189
            }
190
        }
191
    }
192
193 127
    private function tagName(array $tag): string
194
    {
195 127
        foreach ($tag as $token) {
196 127
            if ($token->type === self::TYPE_KEYWORD) {
197 127
                return \strtolower((string) $token->content);
198
            }
199
        }
200
201
        return '';
202
    }
203
204
    /**
205
     * TODO issue #767
206
     * @link https://github.com/spiral/framework/issues/767
207
     * @psalm-suppress UndefinedPropertyFetch
208
     */
209 137
    private function parseGrammar(Buffer $src): ?array
210
    {
211 137
        $this->tokens = [
212 137
            new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
213 137
        ];
214
215 137
        if ($src->lookaheadByte() === '/') {
216 103
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
217 103
            $this->tokens[0]->content .= $src->next()->char;
0 ignored issues
show
Bug introduced by
The property char does not seem to exist on Spiral\Stempler\Lexer\Token.
Loading history...
218
        }
219
220 137
        while ($n = $src->next()) {
221 137
            if ($this->attribute !== []) {
222 94
                $this->attribute[] = $n;
223
224 94
                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
225 93
                    $this->flushAttribute();
226
                }
227
228 94
                continue;
229
            }
230
231 137
            if ($n instanceof Token) {
232 5
                $this->keyword[] = $n;
233 5
                continue;
234
            }
235
236 137
            switch ($n->char) {
237 137
                case '"':
238 137
                case "'":
239 137
                case '`':
240 94
                    $this->flush();
241 94
                    $this->attribute[] = $n;
242 94
                    break;
243
244 137
                case '=':
245 93
                    $this->flush();
246 93
                    $this->tokens[] = new Token(
247 93
                        self::TYPE_EQUAL,
248 93
                        $n->offset,
249 93
                        $n->char,
250 93
                    );
251 93
                    break;
252
253 137
                case '/':
254 70
                    if ($src->lookaheadByte() === '>') {
255 69
                        $this->flush();
256 69
                        $this->tokens[] = new Token(
257 69
                            self::TYPE_CLOSE_SHORT,
258 69
                            $n->offset,
259 69
                            $n->char . $src->next()->char,
260 69
                        );
261
262 69
                        break 2;
263
                    }
264
265
                    // unexpected "/"
266 1
                    return null;
267
268 137
                case '>':
269 121
                    $this->flush();
270 121
                    $this->tokens[] = new Token(
271 121
                        self::TYPE_CLOSE,
272 121
                        $n->offset,
273 121
                        $n->char,
274 121
                    );
275 121
                    break 2;
276
277
                default:
278 132
                    if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
279 96
                        $this->flushKeyword();
280 96
                        $this->whitespace[] = $n;
281 96
                        break;
282
                    }
283 132
                    $this->flushWhitespace();
284
285
286 132
                    if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
287
                        // unexpected char
288 8
                        return null;
289
                    }
290
291 130
                    $this->keyword[] = $n;
292
            }
293
        }
294
295 134
        if (!$this->isValid()) {
296 7
            return null;
297
        }
298
299 127
        return $this->tokens;
300
    }
301
302 134
    private function isValid(): bool
303
    {
304
        // tag is too short or does not have name keyword
305 134
        if (\count($this->tokens) < 3) {
306 2
            return false;
307
        }
308
309 132
        $last = $this->tokens[\count($this->tokens) - 1];
310 132
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
311 2
            return false;
312
        }
313
314 130
        foreach ($this->tokens as $token) {
315 130
            switch ($token->type) {
316
                case self::TYPE_WHITESPACE:
317
                    // ignore
318 3
                    continue 2;
319
320
                case self::TYPE_ATTRIBUTE:
321
                case self::TYPE_EQUAL:
322 3
                    return false;
323
324
                case self::TYPE_KEYWORD:
325 127
                    return true;
326
            }
327
        }
328
329
        return false;
330
    }
331
332
    /**
333
     * Flush whitespace or keyword tokens.
334
     */
335 135
    private function flush(): void
336
    {
337 135
        $this->flushWhitespace();
338 135
        $this->flushKeyword();
339
    }
340
341
    /**
342
     * Flush keyword content.
343
     */
344 137
    private function flushWhitespace(): void
345
    {
346 137
        if ($this->whitespace === []) {
347 137
            return;
348
        }
349
350 96
        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
351 96
        $this->whitespace = [];
352
    }
353
354
    /**
355
     * Flush keyword content.
356
     */
357 135
    private function flushKeyword(): void
358
    {
359 135
        if ($this->keyword === []) {
360 98
            return;
361
        }
362
363 132
        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
364 132
        $this->keyword = [];
365
    }
366
367
    /**
368
     * Flush attribute content.
369
     */
370 93
    private function flushAttribute(): void
371
    {
372 93
        if ($this->attribute === []) {
373
            return;
374
        }
375
376 93
        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
377 93
        $this->attribute = [];
378
    }
379
}
380