HTMLGrammar::flushKeyword() - Code Metrics - Inspection of "Remove unused dev-autoload namespace" - spiral/framework - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( c776c7...570285 )

by Kirill

created 2020-09-18 20:12 UTC

HTMLGrammar::flushKeyword() A

↳ Parent: HTMLGrammar

Complexity

Conditions	2
Paths	2

Size

Total Lines	8
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	2
eloc	4
c	1
b	0
f	0
nc	2
nop	0
dl	0
loc	8
rs	10

<?php

/**
 * Spiral Framework.
 *
 * @license   MIT
 * @author    Anton Titov (Wolfy-J)
 */

declare(strict_types=1);

namespace Spiral\Stempler\Lexer\Grammar;

use Spiral\Stempler\Lexer\Buffer;
use Spiral\Stempler\Lexer\Byte;
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
use Spiral\Stempler\Lexer\GrammarInterface;
use Spiral\Stempler\Lexer\Token;

/**
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
 */
final class HTMLGrammar implements GrammarInterface
{
    use TokenTrait;


    // HTML grammar tokens
    public const TYPE_RAW         = 0;
    public const TYPE_KEYWORD     = 1;
    public const TYPE_OPEN        = 2;
    public const TYPE_OPEN_SHORT  = 3;
    public const TYPE_CLOSE       = 4;
    public const TYPE_CLOSE_SHORT = 5;
    public const TYPE_EQUAL       = 6;
    public const TYPE_ATTRIBUTE   = 7;
    public const TYPE_WHITESPACE  = 9;
    public const TYPE_VERBATIM    = 10;

    // Content within given tags must not be parsed
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];

    // whitespace
    private const REGEXP_WHITESPACE = '/\s/';

    // Allowed keyword characters.
    private const REGEXP_KEYWORD = '/[a-z0-9_\-:\.]/ui';

    /** @var array */
    private $whitespace = [];

    /** @var array */
    private $attribute = [];

    /** @var array */
    private $keyword = [];

    /**
     * @inheritDoc
     */
    public function parse(Buffer $src): \Generator
    {
        while ($n = $src->next()) {
            if (!$n instanceof Byte || $n->char !== '<') {
                yield $n;
                continue;
            }

            // work with isolated token stream!
            $tag = (clone $this)->parseGrammar($src);
            if ($tag === null) {
                yield $n;
                $src->replay($n->offset);
                continue;
            }

            $tagName = $this->tagName($tag);

            // todo: add support for custom tag list
            if (in_array($tagName, self::VERBATIM_TAGS)) {
                yield from $tag;
                yield from $this->parseVerbatim($src, $tagName);
                continue;
            }

            yield from $tag;
        }
    }

    /**
     * @codeCoverageIgnore
     * @inheritDoc
     */
    public static function tokenName(int $token): string
    {
        switch ($token) {
            case self::TYPE_RAW:
                return 'HTML:RAW';
            case self::TYPE_KEYWORD:
                return 'HTML:KEYWORD';
            case self::TYPE_OPEN:
                return 'HTML:OPEN_TAG';
            case self::TYPE_OPEN_SHORT:
                return 'HTML:OPEN_SHORT_TAG';
            case self::TYPE_CLOSE:
                return 'HTML:CLOSE_TAG';
            case self::TYPE_CLOSE_SHORT:
                return 'HTML:CLOSE_SHORT_TAG';
            case self::TYPE_EQUAL:
                return 'HTML:EQUAL';
            case self::TYPE_ATTRIBUTE:
                return 'HTML:ATTRIBUTE';
            case self::TYPE_WHITESPACE:
                return 'HTML:WHITESPACE';
            case self::TYPE_VERBATIM:
                return 'HTML:VERBATIM';
            default:
                return 'HTML:UNDEFINED';
        }
    }

    /**
     * @param Buffer $src
     * @param string $verbatim
     * @return \Generator
     */
    private function parseVerbatim(Buffer $src, string $verbatim)
    {
        $chunks = [];

        while ($n = $src->next()) {
            if ($n instanceof Token) {
                $chunks[] = $n;
                continue;
            }

            switch ($n->char) {
                case '"':
                case "'":
                case '`':
                    $chunks[] = $n;

                    // language inclusions allow nested strings
                    while ($nc = $src->next()) {
                        $chunks[] = $nc;
                        if ($nc instanceof Token) {
                            continue;
                        }

                        if ($nc->char === $n->char) {
                            break;
                        }
                    }

                    break;

                case '/':
                    $chunks[] = $n;

                    $multiline = false;
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
                        if ($src->lookaheadByte(1) === '*') {
                            $multiline = true;
                        }

                        $chunks[] = $src->next();

                        // language inclusions allow nested strings
                        while ($nc = $src->next()) {
                            if ($nc instanceof Token) {
                                continue;
                            }

                            if ($nc->char === '<') {
                                $tag = (clone $this)->parseGrammar($src);
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
                                    $src->replay($n->offset);
                                    break;
                                }
                                // back to primary loop
                                $src->replay($nc->offset - 1);
                                break 2;
                            }

                            $chunks[] = $nc;

                            if ($multiline) {
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
                                    $chunks[] = $src->next();
                                    break;
                                }
                            } elseif ($nc->char === "\n") {
                                break;
                            }
                        }
                    }

                    break;

                case '<':
                    // tag beginning?
                    $tag = (clone $this)->parseGrammar($src);
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
                        $chunks[] = $n;
                        $src->replay($n->offset);
                        break;
                    }

                    // found closing verbatim tag
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
                    yield from $tag;

                    break 2;

                default:
                    $chunks[] = $n;
            }
        }
    }

    /**
     * @param array $tag
     * @return string
     */
    private function tagName(array $tag): string
    {
        foreach ($tag as $token) {
            if ($token->type === self::TYPE_KEYWORD) {
                return strtolower($token->content);
            }
        }

        return '';
    }

    /**
     * @param Buffer $src
     * @return array|null
     */
    private function parseGrammar(Buffer $src): ?array
    {
        $this->tokens = [
            new Token(self::TYPE_OPEN, $src->getOffset(), '<')
        ];

        if ($src->lookaheadByte() === '/') {
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
            $this->tokens[0]->content .= $src->next()->char;

        }

        while ($n = $src->next()) {
            if ($this->attribute !== []) {
                $this->attribute[] = $n;

                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
                    $this->flushAttribute();
                }

                continue;
            }

            if ($n instanceof Token) {
                $this->keyword[] = $n;
                continue;
            }

            switch ($n->char) {
                case '"':
                case "'":
                case '`':
                    $this->flush();
                    $this->attribute[] = $n;
                    break;

                case '=':
                    $this->flush();
                    $this->tokens[] = new Token(
                        self::TYPE_EQUAL,
                        $n->offset,
                        $n->char
                    );
                    break;

                case '/':
                    if ($src->lookaheadByte() === '>') {
                        $this->flush();
                        $this->tokens[] = new Token(
                            self::TYPE_CLOSE_SHORT,
                            $n->offset,
                            $n->char . $src->next()->char
                        );

                        break 2;
                    }

                    // unexpected "/"
                    return null;

                case '>':
                    $this->flush();
                    $this->tokens[] = new Token(
                        self::TYPE_CLOSE,
                        $n->offset,
                        $n->char
                    );
                    break 2;

                default:
                    if (preg_match(self::REGEXP_WHITESPACE, $n->char)) {
                        $this->flushKeyword();
                        $this->whitespace[] = $n;
                        break;
                    }
                    $this->flushWhitespace();


                    if (!preg_match(self::REGEXP_KEYWORD, $n->char)) {
                        // unexpected char
                        return null;
                    }

                    $this->keyword[] = $n;
            }
        }

        if (!$this->isValid()) {
            return null;
        }

        return $this->tokens;
    }

    /**
     * @return bool
     */
    private function isValid(): bool
    {
        // tag is too short or does not have name keyword
        if (count($this->tokens) < 3) {
            return false;
        }

        $last = $this->tokens[count($this->tokens) - 1];
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
            return false;
        }

        foreach ($this->tokens as $token) {
            switch ($token->type) {
                case self::TYPE_WHITESPACE:
                    // ignore
                    continue 2;

                case self::TYPE_ATTRIBUTE:
                case self::TYPE_EQUAL:
                    return false;

                case self::TYPE_KEYWORD:
                    return true;
            }
        }

        return false;
    }

    /**
     * Flush whitespace or keyword tokens.
     */
    private function flush(): void
    {
        $this->flushWhitespace();
        $this->flushKeyword();
    }

    /**
     * Flush keyword content.
     */
    private function flushWhitespace(): void
    {
        if ($this->whitespace === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
        $this->whitespace = [];
    }

    /**
     * Flush keyword content.
     */
    private function flushKeyword(): void
    {
        if ($this->keyword === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
        $this->keyword = [];
    }

    /**
     * Flush attribute content.
     */
    private function flushAttribute(): void
    {
        if ($this->attribute === []) {
            return;
        }

        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
        $this->attribute = [];
    }
}


1			<?php
2
3			/**
4			* Spiral Framework.
5			*
6			* @license MIT
7			* @author Anton Titov (Wolfy-J)
8			*/
9
10			declare(strict_types=1);
11
12			namespace Spiral\Stempler\Lexer\Grammar;
13
14			use Spiral\Stempler\Lexer\Buffer;
15			use Spiral\Stempler\Lexer\Byte;
16			use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
17			use Spiral\Stempler\Lexer\GrammarInterface;
18			use Spiral\Stempler\Lexer\Token;
19
20			/**
21			* @see https://html.spec.whatwg.org/multipage/syntax.htm
22			*/
23			final class HTMLGrammar implements GrammarInterface
24			{
25			use TokenTrait;
			0 ignored issues – show introduced 2020-09-18 20:16 UTC by Report Bug Copy Issue Report The trait `Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait` requires some properties which are not provided by `Spiral\Stempler\Lexer\Grammar\HTMLGrammar`: `$char`, `$content` Loading history...
26
27			// HTML grammar tokens
28			public const TYPE_RAW = 0;
29			public const TYPE_KEYWORD = 1;
30			public const TYPE_OPEN = 2;
31			public const TYPE_OPEN_SHORT = 3;
32			public const TYPE_CLOSE = 4;
33			public const TYPE_CLOSE_SHORT = 5;
34			public const TYPE_EQUAL = 6;
35			public const TYPE_ATTRIBUTE = 7;
36			public const TYPE_WHITESPACE = 9;
37			public const TYPE_VERBATIM = 10;
38
39			// Content within given tags must not be parsed
40			private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
41
42			// whitespace
43			private const REGEXP_WHITESPACE = '/\s/';
44
45			// Allowed keyword characters.
46			private const REGEXP_KEYWORD = '/[a-z0-9_\-:\.]/ui';
47
48			/** @var array */
49			private $whitespace = [];
50
51			/** @var array */
52			private $attribute = [];
53
54			/** @var array */
55			private $keyword = [];
56
57			/**
58			* @inheritDoc
59			*/
60			public function parse(Buffer $src): \Generator
61			{
62			while ($n = $src->next()) {
63			if (!$n instanceof Byte \|\| $n->char !== '<') {
64			yield $n;
65			continue;
66			}
67
68			// work with isolated token stream!
69			$tag = (clone $this)->parseGrammar($src);
70			if ($tag === null) {
71			yield $n;
72			$src->replay($n->offset);
73			continue;
74			}
75
76			$tagName = $this->tagName($tag);
77
78			// todo: add support for custom tag list
79			if (in_array($tagName, self::VERBATIM_TAGS)) {
80			yield from $tag;
81			yield from $this->parseVerbatim($src, $tagName);
82			continue;
83			}
84
85			yield from $tag;
86			}
87			}
88
89			/**
90			* @codeCoverageIgnore
91			* @inheritDoc
92			*/
93			public static function tokenName(int $token): string
94			{
95			switch ($token) {
96			case self::TYPE_RAW:
97			return 'HTML:RAW';
98			case self::TYPE_KEYWORD:
99			return 'HTML:KEYWORD';
100			case self::TYPE_OPEN:
101			return 'HTML:OPEN_TAG';
102			case self::TYPE_OPEN_SHORT:
103			return 'HTML:OPEN_SHORT_TAG';
104			case self::TYPE_CLOSE:
105			return 'HTML:CLOSE_TAG';
106			case self::TYPE_CLOSE_SHORT:
107			return 'HTML:CLOSE_SHORT_TAG';
108			case self::TYPE_EQUAL:
109			return 'HTML:EQUAL';
110			case self::TYPE_ATTRIBUTE:
111			return 'HTML:ATTRIBUTE';
112			case self::TYPE_WHITESPACE:
113			return 'HTML:WHITESPACE';
114			case self::TYPE_VERBATIM:
115			return 'HTML:VERBATIM';
116			default:
117			return 'HTML:UNDEFINED';
118			}
119			}
120
121			/**
122			* @param Buffer $src
123			* @param string $verbatim
124			* @return \Generator
125			*/
126			private function parseVerbatim(Buffer $src, string $verbatim)
127			{
128			$chunks = [];
129
130			while ($n = $src->next()) {
131			if ($n instanceof Token) {
132			$chunks[] = $n;
133			continue;
134			}
135
136			switch ($n->char) {
137			case '"':
138			case "'":
139			case '`':
140			$chunks[] = $n;
141
142			// language inclusions allow nested strings
143			while ($nc = $src->next()) {
144			$chunks[] = $nc;
145			if ($nc instanceof Token) {
146			continue;
147			}
148
149			if ($nc->char === $n->char) {
150			break;
151			}
152			}
153
154			break;
155
156			case '/':
157			$chunks[] = $n;
158
159			$multiline = false;
160			if ($src->lookaheadByte(1) === '/' \|\| $src->lookaheadByte(1) === '*') {
161			if ($src->lookaheadByte(1) === '*') {
162			$multiline = true;
163			}
164
165			$chunks[] = $src->next();
166
167			// language inclusions allow nested strings
168			while ($nc = $src->next()) {
169			if ($nc instanceof Token) {
170			continue;
171			}
172
173			if ($nc->char === '<') {
174			$tag = (clone $this)->parseGrammar($src);
175			if ($tag === null \|\| $this->tagName($tag) !== $verbatim) {
176			$src->replay($n->offset);
177			break;
178			}
179			// back to primary loop
180			$src->replay($nc->offset - 1);
181			break 2;
182			}
183
184			$chunks[] = $nc;
185
186			if ($multiline) {
187			if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
188			$chunks[] = $src->next();
189			break;
190			}
191			} elseif ($nc->char === "\n") {
192			break;
193			}
194			}
195			}
196
197			break;
198
199			case '<':
200			// tag beginning?
201			$tag = (clone $this)->parseGrammar($src);
202			if ($tag === null \|\| $this->tagName($tag) !== $verbatim) {
203			$chunks[] = $n;
204			$src->replay($n->offset);
205			break;
206			}
207
208			// found closing verbatim tag
209			yield $this->packToken($chunks, self::TYPE_VERBATIM);
210			yield from $tag;
211
212			break 2;
213
214			default:
215			$chunks[] = $n;
216			}
217			}
218			}
219
220			/**
221			* @param array $tag
222			* @return string
223			*/
224			private function tagName(array $tag): string
225			{
226			foreach ($tag as $token) {
227			if ($token->type === self::TYPE_KEYWORD) {
228			return strtolower($token->content);
229			}
230			}
231
232			return '';
233			}
234
235			/**
236			* @param Buffer $src
237			* @return array\|null
238			*/
239			private function parseGrammar(Buffer $src): ?array
240			{
241			$this->tokens = [
242			new Token(self::TYPE_OPEN, $src->getOffset(), '<')
243			];
244
245			if ($src->lookaheadByte() === '/') {
246			$this->tokens[0]->type = self::TYPE_OPEN_SHORT;
247			$this->tokens[0]->content .= $src->next()->char;
			0 ignored issues – show Bug introduced 2020-09-18 20:16 UTC by Report Bug Copy Issue Report The property `char` does not seem to exist on `Spiral\Stempler\Lexer\Token`. Loading history...
248			}
249
250			while ($n = $src->next()) {
251			if ($this->attribute !== []) {
252			$this->attribute[] = $n;
253
254			if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
255			$this->flushAttribute();
256			}
257
258			continue;
259			}
260
261			if ($n instanceof Token) {
262			$this->keyword[] = $n;
263			continue;
264			}
265
266			switch ($n->char) {
267			case '"':
268			case "'":
269			case '`':
270			$this->flush();
271			$this->attribute[] = $n;
272			break;
273
274			case '=':
275			$this->flush();
276			$this->tokens[] = new Token(
277			self::TYPE_EQUAL,
278			$n->offset,
279			$n->char
280			);
281			break;
282
283			case '/':
284			if ($src->lookaheadByte() === '>') {
285			$this->flush();
286			$this->tokens[] = new Token(
287			self::TYPE_CLOSE_SHORT,
288			$n->offset,
289			$n->char . $src->next()->char
290			);
291
292			break 2;
293			}
294
295			// unexpected "/"
296			return null;
297
298			case '>':
299			$this->flush();
300			$this->tokens[] = new Token(
301			self::TYPE_CLOSE,
302			$n->offset,
303			$n->char
304			);
305			break 2;
306
307			default:
308			if (preg_match(self::REGEXP_WHITESPACE, $n->char)) {
309			$this->flushKeyword();
310			$this->whitespace[] = $n;
311			break;
312			}
313			$this->flushWhitespace();
314
315
316			if (!preg_match(self::REGEXP_KEYWORD, $n->char)) {
317			// unexpected char
318			return null;
319			}
320
321			$this->keyword[] = $n;
322			}
323			}
324
325			if (!$this->isValid()) {
326			return null;
327			}
328
329			return $this->tokens;
330			}
331
332			/**
333			* @return bool
334			*/
335			private function isValid(): bool
336			{
337			// tag is too short or does not have name keyword
338			if (count($this->tokens) < 3) {
339			return false;
340			}
341
342			$last = $this->tokens[count($this->tokens) - 1];
343			if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
344			return false;
345			}
346
347			foreach ($this->tokens as $token) {
348			switch ($token->type) {
349			case self::TYPE_WHITESPACE:
350			// ignore
351			continue 2;
352
353			case self::TYPE_ATTRIBUTE:
354			case self::TYPE_EQUAL:
355			return false;
356
357			case self::TYPE_KEYWORD:
358			return true;
359			}
360			}
361
362			return false;
363			}
364
365			/**
366			* Flush whitespace or keyword tokens.
367			*/
368			private function flush(): void
369			{
370			$this->flushWhitespace();
371			$this->flushKeyword();
372			}
373
374			/**
375			* Flush keyword content.
376			*/
377			private function flushWhitespace(): void
378			{
379			if ($this->whitespace === []) {
380			return;
381			}
382
383			$this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
384			$this->whitespace = [];
385			}
386
387			/**
388			* Flush keyword content.
389			*/
390			private function flushKeyword(): void
391			{
392			if ($this->keyword === []) {
393			return;
394			}
395
396			$this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
397			$this->keyword = [];
398			}
399
400			/**
401			* Flush attribute content.
402			*/
403			private function flushAttribute(): void
404			{
405			if ($this->attribute === []) {
406			return;
407			}
408
409			$this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
410			$this->attribute = [];
411			}
412			}
413

spiral / framework

Push — master ( c776c7...570285 )

HTMLGrammar::flushKeyword() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like