HTMLGrammar::tokenName() - Code Metrics - spiral/framework - Measure and Improve Code Quality continuously with Scrutinizer

HTMLGrammar::tokenName() A
last analyzed 2025-12-14 11:38 UTC

↳ Parent: HTMLGrammar

Complexity

Conditions	1
Paths	1

Size

Total Lines	14
Code Lines	12

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	0
CRAP Score	2

Importance

Changes

Metric	Value
eloc	12
c	0
b	0
f	0
dl	0
loc	14
ccs	0
cts	0
cp	0
rs	9.8666
cc	1
nc	1
nop	1
crap	2

<?php

declare(strict_types=1);

namespace Spiral\Stempler\Lexer\Grammar;

use Spiral\Stempler\Lexer\Buffer;
use Spiral\Stempler\Lexer\Byte;
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
use Spiral\Stempler\Lexer\GrammarInterface;
use Spiral\Stempler\Lexer\Token;

/**
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
 */
final class HTMLGrammar implements GrammarInterface
{
    use TokenTrait;


    // HTML grammar tokens
    public const TYPE_RAW         = 0;
    public const TYPE_KEYWORD     = 1;
    public const TYPE_OPEN        = 2;
    public const TYPE_OPEN_SHORT  = 3;
    public const TYPE_CLOSE       = 4;
    public const TYPE_CLOSE_SHORT = 5;
    public const TYPE_EQUAL       = 6;
    public const TYPE_ATTRIBUTE   = 7;
    public const TYPE_WHITESPACE  = 9;
    public const TYPE_VERBATIM    = 10;

    // Content within given tags must not be parsed
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];

    // whitespace
    private const REGEXP_WHITESPACE = '/\\s/';

    // Allowed keyword characters.
    private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';

    private array $whitespace = [];

    /**
     * @var array<array-key, Byte|Token>|array{0: Byte}

     */
    private array $attribute = [];

    private array $keyword = [];

    /**
     * @codeCoverageIgnore
     */
    public static function tokenName(int $token): string
    {
        return match ($token) {
            self::TYPE_RAW => 'HTML:RAW',
            self::TYPE_KEYWORD => 'HTML:KEYWORD',
            self::TYPE_OPEN => 'HTML:OPEN_TAG',
            self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
            self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
            self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
            self::TYPE_EQUAL => 'HTML:EQUAL',
            self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
            self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
            self::TYPE_VERBATIM => 'HTML:VERBATIM',
            default => 'HTML:UNDEFINED',
        };
    }

    public function parse(Buffer $src): \Generator
    {
        while ($n = $src->next()) {
            if (!$n instanceof Byte || $n->char !== '<') {
                yield $n;
                continue;
            }

            // work with isolated token stream!
            $tag = (clone $this)->parseGrammar($src);
            if ($tag === null) {
                yield $n;
                $src->replay($n->offset);
                continue;
            }

            $tagName = $this->tagName($tag);

            // todo: add support for custom tag list
            if (\in_array($tagName, self::VERBATIM_TAGS)) {
                yield from $tag;
                yield from $this->parseVerbatim($src, $tagName);
                continue;
            }

            yield from $tag;
        }
    }

    private function parseVerbatim(Buffer $src, string $verbatim): \Generator
    {
        $chunks = [];

        while ($n = $src->next()) {
            if ($n instanceof Token) {
                $chunks[] = $n;
                continue;
            }

            switch ($n->char) {
                case '"':
                case "'":
                case '`':
                    $chunks[] = $n;

                    // language inclusions allow nested strings
                    while ($nc = $src->next()) {
                        $chunks[] = $nc;
                        if ($nc instanceof Token) {
                            continue;
                        }

                        if ($nc->char === $n->char) {
                            break;
                        }
                    }

                    break;

                case '/':
                    $chunks[] = $n;

                    $multiline = false;
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
                        if ($src->lookaheadByte(1) === '*') {
                            $multiline = true;
                        }

                        $chunks[] = $src->next();

                        // language inclusions allow nested strings
                        while ($nc = $src->next()) {
                            if ($nc instanceof Token) {
                                continue;
                            }

                            if ($nc->char === '<') {
                                $tag = (clone $this)->parseGrammar($src);
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
                                    $src->replay($n->offset);
                                    break;
                                }
                                // back to primary loop
                                $src->replay($nc->offset - 1);
                                break 2;
                            }

                            $chunks[] = $nc;

                            if ($multiline) {
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
                                    $chunks[] = $src->next();
                                    break;
                                }
                            } elseif ($nc->char === "\n") {
                                break;
                            }
                        }
                    }

                    break;

                case '<':
                    // tag beginning?
                    $tag = (clone $this)->parseGrammar($src);
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
                        $chunks[] = $n;
                        $src->replay($n->offset);
                        break;
                    }

                    // found closing verbatim tag
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
                    yield from $tag;

                    break 2;

                default:
                    $chunks[] = $n;
            }
        }
    }

    private function tagName(array $tag): string
    {
        foreach ($tag as $token) {
            if ($token->type === self::TYPE_KEYWORD) {
                return \strtolower((string) $token->content);
            }
        }

        return '';
    }

    /**
     * TODO issue #767
     * @link https://github.com/spiral/framework/issues/767
     * @psalm-suppress UndefinedPropertyFetch
     */
    private function parseGrammar(Buffer $src): ?array
    {
        $this->tokens = [
            new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
        ];

        if ($src->lookaheadByte() === '/') {
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
            $this->tokens[0]->content .= $src->next()->char;

        }

        while ($n = $src->next()) {
            if ($this->attribute !== []) {
                $this->attribute[] = $n;

                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
                    $this->flushAttribute();
                }

                continue;
            }

            if ($n instanceof Token) {
                $this->keyword[] = $n;
                continue;
            }

            switch ($n->char) {
                case '"':
                case "'":
                case '`':
                    $this->flush();
                    $this->attribute[] = $n;
                    break;

                case '=':
                    $this->flush();
                    $this->tokens[] = new Token(
                        self::TYPE_EQUAL,
                        $n->offset,
                        $n->char,
                    );
                    break;

                case '/':
                    if ($src->lookaheadByte() === '>') {
                        $this->flush();
                        $this->tokens[] = new Token(
                            self::TYPE_CLOSE_SHORT,
                            $n->offset,
                            $n->char . $src->next()->char,
                        );

                        break 2;
                    }

                    // unexpected "/"
                    return null;

                case '>':
                    $this->flush();
                    $this->tokens[] = new Token(
                        self::TYPE_CLOSE,
                        $n->offset,
                        $n->char,
                    );
                    break 2;

                default:
                    if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
                        $this->flushKeyword();
                        $this->whitespace[] = $n;
                        break;
                    }
                    $this->flushWhitespace();


                    if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
                        // unexpected char
                        return null;
                    }

                    $this->keyword[] = $n;
            }
        }

        if (!$this->isValid()) {
            return null;
        }

        return $this->tokens;
    }

    private function isValid(): bool
    {
        // tag is too short or does not have name keyword
        if (\count($this->tokens) < 3) {
            return false;
        }

        $last = $this->tokens[\count($this->tokens) - 1];
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
            return false;
        }

        foreach ($this->tokens as $token) {
            switch ($token->type) {
                case self::TYPE_WHITESPACE:
                    // ignore
                    continue 2;

                case self::TYPE_ATTRIBUTE:
                case self::TYPE_EQUAL:
                    return false;

                case self::TYPE_KEYWORD:
                    return true;
            }
        }

        return false;
    }

    /**
     * Flush whitespace or keyword tokens.
     */
    private function flush(): void
    {
        $this->flushWhitespace();
        $this->flushKeyword();
    }

    /**
     * Flush keyword content.
     */
    private function flushWhitespace(): void
    {
        if ($this->whitespace === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
        $this->whitespace = [];
    }

    /**
     * Flush keyword content.
     */
    private function flushKeyword(): void
    {
        if ($this->keyword === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
        $this->keyword = [];
    }

    /**
     * Flush attribute content.
     */
    private function flushAttribute(): void
    {
        if ($this->attribute === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
        $this->attribute = [];
    }
}


1		<?php
2
3		declare(strict_types=1);
4
5		namespace Spiral\Stempler\Lexer\Grammar;
6
7		use Spiral\Stempler\Lexer\Buffer;
8		use Spiral\Stempler\Lexer\Byte;
9		use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
10		use Spiral\Stempler\Lexer\GrammarInterface;
11		use Spiral\Stempler\Lexer\Token;
12
13		/**
14		* @see https://html.spec.whatwg.org/multipage/syntax.htm
15		*/
16		final class HTMLGrammar implements GrammarInterface
17		{
18		use TokenTrait;
		0 ignored issues – show introduced 2020-09-18 20:16 UTC by Report Bug Copy Issue Report The trait `Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait` requires some properties which are not provided by `Spiral\Stempler\Lexer\Grammar\HTMLGrammar`: `$char`, `$content` Loading history...
19
20		// HTML grammar tokens
21		public const TYPE_RAW = 0;
22		public const TYPE_KEYWORD = 1;
23		public const TYPE_OPEN = 2;
24		public const TYPE_OPEN_SHORT = 3;
25		public const TYPE_CLOSE = 4;
26		public const TYPE_CLOSE_SHORT = 5;
27		public const TYPE_EQUAL = 6;
28		public const TYPE_ATTRIBUTE = 7;
29		public const TYPE_WHITESPACE = 9;
30		public const TYPE_VERBATIM = 10;
31
32		// Content within given tags must not be parsed
33		private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
34
35		// whitespace
36		private const REGEXP_WHITESPACE = '/\\s/';
37
38		// Allowed keyword characters.
39		private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';
40
41		private array $whitespace = [];
42
43		/**
44		* @var array<array-key, Byte\|Token>\|array{0: Byte}
		0 ignored issues – show Documentation Bug introduced 2022-09-13 07:04 UTC by Report Bug Copy Issue Report The doc comment `array<array-key, Byte\|Token>\|array{0: Byte}` at position `2` could not be parsed: Unknown type name 'array-key' at position 2 in array<array-key, Byte\|Token>\|array{0: Byte}. Loading history...
45		*/
46		private array $attribute = [];
47
48		private array $keyword = [];
49
50		/**
51		* @codeCoverageIgnore
52		*/
53		public static function tokenName(int $token): string
54		{
55		return match ($token) {
56		self::TYPE_RAW => 'HTML:RAW',
57		self::TYPE_KEYWORD => 'HTML:KEYWORD',
58		self::TYPE_OPEN => 'HTML:OPEN_TAG',
59		self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
60		self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
61		self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
62		self::TYPE_EQUAL => 'HTML:EQUAL',
63		self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
64		self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
65		self::TYPE_VERBATIM => 'HTML:VERBATIM',
66		default => 'HTML:UNDEFINED',
67		};
68		}
69
70	181	public function parse(Buffer $src): \Generator
71		{
72	181	while ($n = $src->next()) {
73	181	if (!$n instanceof Byte \|\| $n->char !== '<') {
74	135	yield $n;
75	135	continue;
76		}
77
78		// work with isolated token stream!
79	137	$tag = (clone $this)->parseGrammar($src);
80	137	if ($tag === null) {
81	15	yield $n;
82	15	$src->replay($n->offset);
83	15	continue;
84		}
85
86	127	$tagName = $this->tagName($tag);
87
88		// todo: add support for custom tag list
89	127	if (\in_array($tagName, self::VERBATIM_TAGS)) {
90	9	yield from $tag;
91	9	yield from $this->parseVerbatim($src, $tagName);
92	9	continue;
93		}
94
95	118	yield from $tag;
96		}
97		}
98
99	9	private function parseVerbatim(Buffer $src, string $verbatim): \Generator
100		{
101	9	$chunks = [];
102
103	9	while ($n = $src->next()) {
104	9	if ($n instanceof Token) {
105	2	$chunks[] = $n;
106	2	continue;
107		}
108
109	9	switch ($n->char) {
110	9	case '"':
111	9	case "'":
112	9	case '`':
113	7	$chunks[] = $n;
114
115		// language inclusions allow nested strings
116	7	while ($nc = $src->next()) {
117	7	$chunks[] = $nc;
118	7	if ($nc instanceof Token) {
119	1	continue;
120		}
121
122	7	if ($nc->char === $n->char) {
123	7	break;
124		}
125		}
126
127	7	break;
128
129	9	case '/':
130	3	$chunks[] = $n;
131
132	3	$multiline = false;
133	3	if ($src->lookaheadByte(1) === '/' \|\| $src->lookaheadByte(1) === '*') {
134	3	if ($src->lookaheadByte(1) === '*') {
135	1	$multiline = true;
136		}
137
138	3	$chunks[] = $src->next();
139
140		// language inclusions allow nested strings
141	3	while ($nc = $src->next()) {
142	3	if ($nc instanceof Token) {
143		continue;
144		}
145
146	3	if ($nc->char === '<') {
147	2	$tag = (clone $this)->parseGrammar($src);
148	2	if ($tag === null \|\| $this->tagName($tag) !== $verbatim) {
149		$src->replay($n->offset);
150		break;
151		}
152		// back to primary loop
153	2	$src->replay($nc->offset - 1);
154	2	break 2;
155		}
156
157	3	$chunks[] = $nc;
158
159	3	if ($multiline) {
160	1	if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
161	1	$chunks[] = $src->next();
162	1	break;
163		}
164	2	} elseif ($nc->char === "\n") {
165		break;
166		}
167		}
168		}
169
170	1	break;
171
172	9	case '<':
173		// tag beginning?
174	9	$tag = (clone $this)->parseGrammar($src);
175	9	if ($tag === null \|\| $this->tagName($tag) !== $verbatim) {
176		$chunks[] = $n;
177		$src->replay($n->offset);
178		break;
179		}
180
181		// found closing verbatim tag
182	9	yield $this->packToken($chunks, self::TYPE_VERBATIM);
183	9	yield from $tag;
184
185	9	break 2;
186
187		default:
188	9	$chunks[] = $n;
189		}
190		}
191		}
192
193	127	private function tagName(array $tag): string
194		{
195	127	foreach ($tag as $token) {
196	127	if ($token->type === self::TYPE_KEYWORD) {
197	127	return \strtolower((string) $token->content);
198		}
199		}
200
201		return '';
202		}
203
204		/**
205		* TODO issue #767
206		* @link https://github.com/spiral/framework/issues/767
207		* @psalm-suppress UndefinedPropertyFetch
208		*/
209	137	private function parseGrammar(Buffer $src): ?array
210		{
211	137	$this->tokens = [
212	137	new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
213	137	];
214
215	137	if ($src->lookaheadByte() === '/') {
216	103	$this->tokens[0]->type = self::TYPE_OPEN_SHORT;
217	103	$this->tokens[0]->content .= $src->next()->char;
		0 ignored issues – show Bug introduced 2020-09-18 20:16 UTC by Report Bug Copy Issue Report The property `char` does not seem to exist on `Spiral\Stempler\Lexer\Token`. Loading history...
218		}
219
220	137	while ($n = $src->next()) {
221	137	if ($this->attribute !== []) {
222	94	$this->attribute[] = $n;
223
224	94	if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
225	93	$this->flushAttribute();
226		}
227
228	94	continue;
229		}
230
231	137	if ($n instanceof Token) {
232	5	$this->keyword[] = $n;
233	5	continue;
234		}
235
236	137	switch ($n->char) {
237	137	case '"':
238	137	case "'":
239	137	case '`':
240	94	$this->flush();
241	94	$this->attribute[] = $n;
242	94	break;
243
244	137	case '=':
245	93	$this->flush();
246	93	$this->tokens[] = new Token(
247	93	self::TYPE_EQUAL,
248	93	$n->offset,
249	93	$n->char,
250	93	);
251	93	break;
252
253	137	case '/':
254	70	if ($src->lookaheadByte() === '>') {
255	69	$this->flush();
256	69	$this->tokens[] = new Token(
257	69	self::TYPE_CLOSE_SHORT,
258	69	$n->offset,
259	69	$n->char . $src->next()->char,
260	69	);
261
262	69	break 2;
263		}
264
265		// unexpected "/"
266	1	return null;
267
268	137	case '>':
269	121	$this->flush();
270	121	$this->tokens[] = new Token(
271	121	self::TYPE_CLOSE,
272	121	$n->offset,
273	121	$n->char,
274	121	);
275	121	break 2;
276
277		default:
278	132	if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
279	96	$this->flushKeyword();
280	96	$this->whitespace[] = $n;
281	96	break;
282		}
283	132	$this->flushWhitespace();
284
285
286	132	if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
287		// unexpected char
288	8	return null;
289		}
290
291	130	$this->keyword[] = $n;
292		}
293		}
294
295	134	if (!$this->isValid()) {
296	7	return null;
297		}
298
299	127	return $this->tokens;
300		}
301
302	134	private function isValid(): bool
303		{
304		// tag is too short or does not have name keyword
305	134	if (\count($this->tokens) < 3) {
306	2	return false;
307		}
308
309	132	$last = $this->tokens[\count($this->tokens) - 1];
310	132	if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
311	2	return false;
312		}
313
314	130	foreach ($this->tokens as $token) {
315	130	switch ($token->type) {
316		case self::TYPE_WHITESPACE:
317		// ignore
318	3	continue 2;
319
320		case self::TYPE_ATTRIBUTE:
321		case self::TYPE_EQUAL:
322	3	return false;
323
324		case self::TYPE_KEYWORD:
325	127	return true;
326		}
327		}
328
329		return false;
330		}
331
332		/**
333		* Flush whitespace or keyword tokens.
334		*/
335	135	private function flush(): void
336		{
337	135	$this->flushWhitespace();
338	135	$this->flushKeyword();
339		}
340
341		/**
342		* Flush keyword content.
343		*/
344	137	private function flushWhitespace(): void
345		{
346	137	if ($this->whitespace === []) {
347	137	return;
348		}
349
350	96	$this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
351	96	$this->whitespace = [];
352		}
353
354		/**
355		* Flush keyword content.
356		*/
357	135	private function flushKeyword(): void
358		{
359	135	if ($this->keyword === []) {
360	98	return;
361		}
362
363	132	$this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
364	132	$this->keyword = [];
365		}
366
367		/**
368		* Flush attribute content.
369		*/
370	93	private function flushAttribute(): void
371		{
372	93	if ($this->attribute === []) {
373		return;
374		}
375
376	93	$this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
377	93	$this->attribute = [];
378		}
379		}
380

spiral / framework

HTMLGrammar::tokenName() A last analyzed 2025-12-14 11:38 UTC

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like

HTMLGrammar::tokenName() A
last analyzed 2025-12-14 11:38 UTC