HTMLGrammar::parseGrammar() - Code Metrics - Inspection of "Merging bugfixes from 3.0" - spiral/framework - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#806)

by Maxim

created 2022-09-29 14:16 UTC

HTMLGrammar::parseGrammar() C

↳ Parent: HTMLGrammar

Complexity

Conditions	17
Paths	44

Size

Total Lines	91
Code Lines	56

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	53
CRAP Score	17

Importance

Changes

Metric	Value
cc	17
eloc	56
nc	44
nop	1
dl	0
loc	91
ccs	53
cts	53
cp	1
crap	17
rs	5.2166
c	0
b	0
f	0

How to fix Long Method Complexity

<?php

declare(strict_types=1);

namespace Spiral\Stempler\Lexer\Grammar;

use Spiral\Stempler\Lexer\Buffer;
use Spiral\Stempler\Lexer\Byte;
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
use Spiral\Stempler\Lexer\GrammarInterface;
use Spiral\Stempler\Lexer\Token;

/**
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
 */
final class HTMLGrammar implements GrammarInterface
{
    use TokenTrait;


    // HTML grammar tokens
    public const TYPE_RAW         = 0;
    public const TYPE_KEYWORD     = 1;
    public const TYPE_OPEN        = 2;
    public const TYPE_OPEN_SHORT  = 3;
    public const TYPE_CLOSE       = 4;
    public const TYPE_CLOSE_SHORT = 5;
    public const TYPE_EQUAL       = 6;
    public const TYPE_ATTRIBUTE   = 7;
    public const TYPE_WHITESPACE  = 9;
    public const TYPE_VERBATIM    = 10;

    // Content within given tags must not be parsed
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];

    // whitespace
    private const REGEXP_WHITESPACE = '/\\s/';

    // Allowed keyword characters.
    private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';

    private array $whitespace = [];
    /**
     * @var array<array-key, Byte|Token>|array{0: Byte}

     */
    private array $attribute = [];
    private array $keyword = [];

    public function parse(Buffer $src): \Generator
    {
        while ($n = $src->next()) {
            if (!$n instanceof Byte || $n->char !== '<') {
                yield $n;
                continue;
            }

            // work with isolated token stream!
            $tag = (clone $this)->parseGrammar($src);
            if ($tag === null) {
                yield $n;
                $src->replay($n->offset);
                continue;
            }

            $tagName = $this->tagName($tag);

            // todo: add support for custom tag list
            if (\in_array($tagName, self::VERBATIM_TAGS)) {
                yield from $tag;
                yield from $this->parseVerbatim($src, $tagName);
                continue;
            }

            yield from $tag;
        }
    }

    /**
     * @codeCoverageIgnore
     */
    public static function tokenName(int $token): string
    {
        return match ($token) {
            self::TYPE_RAW => 'HTML:RAW',
            self::TYPE_KEYWORD => 'HTML:KEYWORD',
            self::TYPE_OPEN => 'HTML:OPEN_TAG',
            self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
            self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
            self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
            self::TYPE_EQUAL => 'HTML:EQUAL',
            self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
            self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
            self::TYPE_VERBATIM => 'HTML:VERBATIM',
            default => 'HTML:UNDEFINED',
        };
    }

    private function parseVerbatim(Buffer $src, string $verbatim): \Generator
    {
        $chunks = [];

        while ($n = $src->next()) {
            if ($n instanceof Token) {
                $chunks[] = $n;
                continue;
            }

            switch ($n->char) {
                case '"':
                case "'":
                case '`':
                    $chunks[] = $n;

                    // language inclusions allow nested strings
                    while ($nc = $src->next()) {
                        $chunks[] = $nc;
                        if ($nc instanceof Token) {
                            continue;
                        }

                        if ($nc->char === $n->char) {
                            break;
                        }
                    }

                    break;

                case '/':
                    $chunks[] = $n;

                    $multiline = false;
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
                        if ($src->lookaheadByte(1) === '*') {
                            $multiline = true;
                        }

                        $chunks[] = $src->next();

                        // language inclusions allow nested strings
                        while ($nc = $src->next()) {
                            if ($nc instanceof Token) {
                                continue;
                            }

                            if ($nc->char === '<') {
                                $tag = (clone $this)->parseGrammar($src);
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
                                    $src->replay($n->offset);
                                    break;
                                }
                                // back to primary loop
                                $src->replay($nc->offset - 1);
                                break 2;
                            }

                            $chunks[] = $nc;

                            if ($multiline) {
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
                                    $chunks[] = $src->next();
                                    break;
                                }
                            } elseif ($nc->char === "\n") {
                                break;
                            }
                        }
                    }

                    break;

                case '<':
                    // tag beginning?
                    $tag = (clone $this)->parseGrammar($src);
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
                        $chunks[] = $n;
                        $src->replay($n->offset);
                        break;
                    }

                    // found closing verbatim tag
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
                    yield from $tag;

                    break 2;

                default:
                    $chunks[] = $n;
            }
        }
    }

    private function tagName(array $tag): string
    {
        foreach ($tag as $token) {
            if ($token->type === self::TYPE_KEYWORD) {
                return \strtolower($token->content);
            }
        }

        return '';
    }

    /**
     * TODO issue #767
     * @link https://github.com/spiral/framework/issues/767
     * @psalm-suppress UndefinedPropertyFetch
     */
    private function parseGrammar(Buffer $src): ?array
    {
        $this->tokens = [
            new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
        ];

        if ($src->lookaheadByte() === '/') {
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
            $this->tokens[0]->content .= $src->next()->char;

        }

        while ($n = $src->next()) {
            if ($this->attribute !== []) {
                $this->attribute[] = $n;

                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
                    $this->flushAttribute();
                }

                continue;
            }

            if ($n instanceof Token) {
                $this->keyword[] = $n;
                continue;
            }

            switch ($n->char) {
                case '"':
                case "'":
                case '`':
                    $this->flush();
                    $this->attribute[] = $n;
                    break;

                case '=':
                    $this->flush();
                    $this->tokens[] = new Token(
                        self::TYPE_EQUAL,
                        $n->offset,
                        $n->char
                    );
                    break;

                case '/':
                    if ($src->lookaheadByte() === '>') {
                        $this->flush();
                        $this->tokens[] = new Token(
                            self::TYPE_CLOSE_SHORT,
                            $n->offset,
                            $n->char . $src->next()->char
                        );

                        break 2;
                    }

                    // unexpected "/"
                    return null;

                case '>':
                    $this->flush();
                    $this->tokens[] = new Token(
                        self::TYPE_CLOSE,
                        $n->offset,
                        $n->char
                    );
                    break 2;

                default:
                    if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
                        $this->flushKeyword();
                        $this->whitespace[] = $n;
                        break;
                    }
                    $this->flushWhitespace();


                    if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
                        // unexpected char
                        return null;
                    }

                    $this->keyword[] = $n;
            }
        }

        if (!$this->isValid()) {
            return null;
        }

        return $this->tokens;
    }

    private function isValid(): bool
    {
        // tag is too short or does not have name keyword
        if (\count($this->tokens) < 3) {
            return false;
        }

        $last = $this->tokens[\count($this->tokens) - 1];
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
            return false;
        }

        foreach ($this->tokens as $token) {
            switch ($token->type) {
                case self::TYPE_WHITESPACE:
                    // ignore
                    continue 2;

                case self::TYPE_ATTRIBUTE:
                case self::TYPE_EQUAL:
                    return false;

                case self::TYPE_KEYWORD:
                    return true;
            }
        }

        return false;
    }

    /**
     * Flush whitespace or keyword tokens.
     */
    private function flush(): void
    {
        $this->flushWhitespace();
        $this->flushKeyword();
    }

    /**
     * Flush keyword content.
     */
    private function flushWhitespace(): void
    {
        if ($this->whitespace === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
        $this->whitespace = [];
    }

    /**
     * Flush keyword content.
     */
    private function flushKeyword(): void
    {
        if ($this->keyword === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
        $this->keyword = [];
    }

    /**
     * Flush attribute content.
     */
    private function flushAttribute(): void
    {
        if ($this->attribute === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
        $this->attribute = [];
    }
}


1		<?php
2
3		declare(strict_types=1);
4
5		namespace Spiral\Stempler\Lexer\Grammar;
6
7		use Spiral\Stempler\Lexer\Buffer;
8		use Spiral\Stempler\Lexer\Byte;
9		use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
10		use Spiral\Stempler\Lexer\GrammarInterface;
11		use Spiral\Stempler\Lexer\Token;
12
13		/**
14		* @see https://html.spec.whatwg.org/multipage/syntax.htm
15		*/
16		final class HTMLGrammar implements GrammarInterface
17		{
18		use TokenTrait;
		0 ignored issues – show introduced 2020-09-18 20:16 UTC by Report Bug Copy Issue Report The trait `Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait` requires some properties which are not provided by `Spiral\Stempler\Lexer\Grammar\HTMLGrammar`: `$char`, `$content` Loading history...
19
20		// HTML grammar tokens
21		public const TYPE_RAW = 0;
22		public const TYPE_KEYWORD = 1;
23		public const TYPE_OPEN = 2;
24		public const TYPE_OPEN_SHORT = 3;
25		public const TYPE_CLOSE = 4;
26		public const TYPE_CLOSE_SHORT = 5;
27		public const TYPE_EQUAL = 6;
28		public const TYPE_ATTRIBUTE = 7;
29		public const TYPE_WHITESPACE = 9;
30		public const TYPE_VERBATIM = 10;
31
32		// Content within given tags must not be parsed
33		private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
34
35		// whitespace
36		private const REGEXP_WHITESPACE = '/\\s/';
37
38		// Allowed keyword characters.
39		private const REGEXP_KEYWORD = '/[a-z0-9_\\-:\\.]/ui';
40
41		private array $whitespace = [];
42		/**
43		* @var array<array-key, Byte\|Token>\|array{0: Byte}
		0 ignored issues – show Documentation Bug introduced 2022-09-13 07:04 UTC by Report Bug Copy Issue Report The doc comment `array<array-key, Byte\|Token>\|array{0: Byte}` at position `2` could not be parsed: Unknown type name 'array-key' at position 2 in array<array-key, Byte\|Token>\|array{0: Byte}. Loading history...
44		*/
45		private array $attribute = [];
46		private array $keyword = [];
47
48	161	public function parse(Buffer $src): \Generator
49		{
50	161	while ($n = $src->next()) {
51	161	if (!$n instanceof Byte \|\| $n->char !== '<') {
52	116	yield $n;
53	116	continue;
54		}
55
56		// work with isolated token stream!
57	130	$tag = (clone $this)->parseGrammar($src);
58	130	if ($tag === null) {
59	15	yield $n;
60	15	$src->replay($n->offset);
61	15	continue;
62		}
63
64	120	$tagName = $this->tagName($tag);
65
66		// todo: add support for custom tag list
67	120	if (\in_array($tagName, self::VERBATIM_TAGS)) {
68	9	yield from $tag;
69	9	yield from $this->parseVerbatim($src, $tagName);
70	9	continue;
71		}
72
73	111	yield from $tag;
74		}
75		}
76
77		/**
78		* @codeCoverageIgnore
79		*/
80		public static function tokenName(int $token): string
81		{
82		return match ($token) {
83		self::TYPE_RAW => 'HTML:RAW',
84		self::TYPE_KEYWORD => 'HTML:KEYWORD',
85		self::TYPE_OPEN => 'HTML:OPEN_TAG',
86		self::TYPE_OPEN_SHORT => 'HTML:OPEN_SHORT_TAG',
87		self::TYPE_CLOSE => 'HTML:CLOSE_TAG',
88		self::TYPE_CLOSE_SHORT => 'HTML:CLOSE_SHORT_TAG',
89		self::TYPE_EQUAL => 'HTML:EQUAL',
90		self::TYPE_ATTRIBUTE => 'HTML:ATTRIBUTE',
91		self::TYPE_WHITESPACE => 'HTML:WHITESPACE',
92		self::TYPE_VERBATIM => 'HTML:VERBATIM',
93		default => 'HTML:UNDEFINED',
94		};
95		}
96
97	9	private function parseVerbatim(Buffer $src, string $verbatim): \Generator
98		{
99	9	$chunks = [];
100
101	9	while ($n = $src->next()) {
102	9	if ($n instanceof Token) {
103	2	$chunks[] = $n;
104	2	continue;
105		}
106
107	9	switch ($n->char) {
108	9	case '"':
109	9	case "'":
110	9	case '`':
111	7	$chunks[] = $n;
112
113		// language inclusions allow nested strings
114	7	while ($nc = $src->next()) {
115	7	$chunks[] = $nc;
116	7	if ($nc instanceof Token) {
117	1	continue;
118		}
119
120	7	if ($nc->char === $n->char) {
121	7	break;
122		}
123		}
124
125	7	break;
126
127	9	case '/':
128	3	$chunks[] = $n;
129
130	3	$multiline = false;
131	3	if ($src->lookaheadByte(1) === '/' \|\| $src->lookaheadByte(1) === '*') {
132	3	if ($src->lookaheadByte(1) === '*') {
133	1	$multiline = true;
134		}
135
136	3	$chunks[] = $src->next();
137
138		// language inclusions allow nested strings
139	3	while ($nc = $src->next()) {
140	3	if ($nc instanceof Token) {
141		continue;
142		}
143
144	3	if ($nc->char === '<') {
145	2	$tag = (clone $this)->parseGrammar($src);
146	2	if ($tag === null \|\| $this->tagName($tag) !== $verbatim) {
147		$src->replay($n->offset);
148		break;
149		}
150		// back to primary loop
151	2	$src->replay($nc->offset - 1);
152	2	break 2;
153		}
154
155	3	$chunks[] = $nc;
156
157	3	if ($multiline) {
158	1	if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
159	1	$chunks[] = $src->next();
160	1	break;
161		}
162	2	} elseif ($nc->char === "\n") {
163		break;
164		}
165		}
166		}
167
168	1	break;
169
170	9	case '<':
171		// tag beginning?
172	9	$tag = (clone $this)->parseGrammar($src);
173	9	if ($tag === null \|\| $this->tagName($tag) !== $verbatim) {
174		$chunks[] = $n;
175		$src->replay($n->offset);
176		break;
177		}
178
179		// found closing verbatim tag
180	9	yield $this->packToken($chunks, self::TYPE_VERBATIM);
181	9	yield from $tag;
182
183	9	break 2;
184
185		default:
186	9	$chunks[] = $n;
187		}
188		}
189		}
190
191	120	private function tagName(array $tag): string
192		{
193	120	foreach ($tag as $token) {
194	120	if ($token->type === self::TYPE_KEYWORD) {
195	120	return \strtolower($token->content);
196		}
197		}
198
199		return '';
200		}
201
202		/**
203		* TODO issue #767
204		* @link https://github.com/spiral/framework/issues/767
205		* @psalm-suppress UndefinedPropertyFetch
206		*/
207	130	private function parseGrammar(Buffer $src): ?array
208		{
209	130	$this->tokens = [
210	130	new Token(self::TYPE_OPEN, $src->getOffset(), '<'),
211		];
212
213	130	if ($src->lookaheadByte() === '/') {
214	98	$this->tokens[0]->type = self::TYPE_OPEN_SHORT;
215	98	$this->tokens[0]->content .= $src->next()->char;
		0 ignored issues – show Bug introduced 2020-09-18 20:16 UTC by Report Bug Copy Issue Report The property `char` does not seem to exist on `Spiral\Stempler\Lexer\Token`. Loading history...
216		}
217
218	130	while ($n = $src->next()) {
219	130	if ($this->attribute !== []) {
220	92	$this->attribute[] = $n;
221
222	92	if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
223	91	$this->flushAttribute();
224		}
225
226	92	continue;
227		}
228
229	130	if ($n instanceof Token) {
230	5	$this->keyword[] = $n;
231	5	continue;
232		}
233
234	130	switch ($n->char) {
235	130	case '"':
236	130	case "'":
237	130	case '`':
238	92	$this->flush();
239	92	$this->attribute[] = $n;
240	92	break;
241
242	130	case '=':
243	91	$this->flush();
244	91	$this->tokens[] = new Token(
245		self::TYPE_EQUAL,
246	91	$n->offset,
247	91	$n->char
248		);
249	91	break;
250
251	130	case '/':
252	69	if ($src->lookaheadByte() === '>') {
253	68	$this->flush();
254	68	$this->tokens[] = new Token(
255		self::TYPE_CLOSE_SHORT,
256	68	$n->offset,
257	68	$n->char . $src->next()->char
258		);
259
260	68	break 2;
261		}
262
263		// unexpected "/"
264	1	return null;
265
266	130	case '>':
267	115	$this->flush();
268	115	$this->tokens[] = new Token(
269		self::TYPE_CLOSE,
270	115	$n->offset,
271	115	$n->char
272		);
273	115	break 2;
274
275		default:
276	125	if (\preg_match(self::REGEXP_WHITESPACE, $n->char)) {
277	94	$this->flushKeyword();
278	94	$this->whitespace[] = $n;
279	94	break;
280		}
281	125	$this->flushWhitespace();
282
283
284	125	if (!\preg_match(self::REGEXP_KEYWORD, $n->char)) {
285		// unexpected char
286	8	return null;
287		}
288
289	123	$this->keyword[] = $n;
290		}
291		}
292
293	127	if (!$this->isValid()) {
294	7	return null;
295		}
296
297	120	return $this->tokens;
298		}
299
300	127	private function isValid(): bool
301		{
302		// tag is too short or does not have name keyword
303	127	if (\count($this->tokens) < 3) {
304	2	return false;
305		}
306
307	125	$last = $this->tokens[\count($this->tokens) - 1];
308	125	if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
309	2	return false;
310		}
311
312	123	foreach ($this->tokens as $token) {
313	123	switch ($token->type) {
314		case self::TYPE_WHITESPACE:
315		// ignore
316	3	continue 2;
317
318		case self::TYPE_ATTRIBUTE:
319		case self::TYPE_EQUAL:
320	3	return false;
321
322		case self::TYPE_KEYWORD:
323	120	return true;
324		}
325		}
326
327		return false;
328		}
329
330		/**
331		* Flush whitespace or keyword tokens.
332		*/
333	128	private function flush(): void
334		{
335	128	$this->flushWhitespace();
336	128	$this->flushKeyword();
337		}
338
339		/**
340		* Flush keyword content.
341		*/
342	130	private function flushWhitespace(): void
343		{
344	130	if ($this->whitespace === []) {
345	130	return;
346		}
347
348	94	$this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
349	94	$this->whitespace = [];
350		}
351
352		/**
353		* Flush keyword content.
354		*/
355	128	private function flushKeyword(): void
356		{
357	128	if ($this->keyword === []) {
358	96	return;
359		}
360
361	125	$this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
362	125	$this->keyword = [];
363		}
364
365		/**
366		* Flush attribute content.
367		*/
368	91	private function flushAttribute(): void
369		{
370	91	if ($this->attribute === []) {
371		return;
372		}
373
374	91	$this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
375	91	$this->attribute = [];
376		}
377		}
378

spiral / framework

Pull Request — master (#806)

HTMLGrammar::parseGrammar() C

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Duplication Side-by-Side

Filter issues like