Passed
Push — master ( c776c7...570285 )
by Kirill
04:05
created

HTMLGrammar::tokenName()   B

Complexity

Conditions 11
Paths 11

Size

Total Lines 25
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
eloc 23
c 1
b 0
f 0
nc 11
nop 1
dl 0
loc 25
rs 7.3166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Spiral Framework.
5
 *
6
 * @license   MIT
7
 * @author    Anton Titov (Wolfy-J)
8
 */
9
10
declare(strict_types=1);
11
12
namespace Spiral\Stempler\Lexer\Grammar;
13
14
use Spiral\Stempler\Lexer\Buffer;
15
use Spiral\Stempler\Lexer\Byte;
16
use Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait;
17
use Spiral\Stempler\Lexer\GrammarInterface;
18
use Spiral\Stempler\Lexer\Token;
19
20
/**
21
 * @see https://html.spec.whatwg.org/multipage/syntax.htm
22
 */
23
final class HTMLGrammar implements GrammarInterface
24
{
25
    use TokenTrait;
0 ignored issues
show
introduced by
The trait Spiral\Stempler\Lexer\Grammar\Traits\TokenTrait requires some properties which are not provided by Spiral\Stempler\Lexer\Grammar\HTMLGrammar: $char, $content
Loading history...
26
27
    // HTML grammar tokens
28
    public const TYPE_RAW         = 0;
29
    public const TYPE_KEYWORD     = 1;
30
    public const TYPE_OPEN        = 2;
31
    public const TYPE_OPEN_SHORT  = 3;
32
    public const TYPE_CLOSE       = 4;
33
    public const TYPE_CLOSE_SHORT = 5;
34
    public const TYPE_EQUAL       = 6;
35
    public const TYPE_ATTRIBUTE   = 7;
36
    public const TYPE_WHITESPACE  = 9;
37
    public const TYPE_VERBATIM    = 10;
38
39
    // Content within given tags must not be parsed
40
    private const VERBATIM_TAGS = ['script', 'canvas', 'style'];
41
42
    // whitespace
43
    private const REGEXP_WHITESPACE = '/\s/';
44
45
    // Allowed keyword characters.
46
    private const REGEXP_KEYWORD = '/[a-z0-9_\-:\.]/ui';
47
48
    /** @var array */
49
    private $whitespace = [];
50
51
    /** @var array */
52
    private $attribute = [];
53
54
    /** @var array */
55
    private $keyword = [];
56
57
    /**
58
     * @inheritDoc
59
     */
60
    public function parse(Buffer $src): \Generator
61
    {
62
        while ($n = $src->next()) {
63
            if (!$n instanceof Byte || $n->char !== '<') {
64
                yield $n;
65
                continue;
66
            }
67
68
            // work with isolated token stream!
69
            $tag = (clone $this)->parseGrammar($src);
70
            if ($tag === null) {
71
                yield $n;
72
                $src->replay($n->offset);
73
                continue;
74
            }
75
76
            $tagName = $this->tagName($tag);
77
78
            // todo: add support for custom tag list
79
            if (in_array($tagName, self::VERBATIM_TAGS)) {
80
                yield from $tag;
81
                yield from $this->parseVerbatim($src, $tagName);
82
                continue;
83
            }
84
85
            yield from $tag;
86
        }
87
    }
88
89
    /**
90
     * @codeCoverageIgnore
91
     * @inheritDoc
92
     */
93
    public static function tokenName(int $token): string
94
    {
95
        switch ($token) {
96
            case self::TYPE_RAW:
97
                return 'HTML:RAW';
98
            case self::TYPE_KEYWORD:
99
                return 'HTML:KEYWORD';
100
            case self::TYPE_OPEN:
101
                return 'HTML:OPEN_TAG';
102
            case self::TYPE_OPEN_SHORT:
103
                return 'HTML:OPEN_SHORT_TAG';
104
            case self::TYPE_CLOSE:
105
                return 'HTML:CLOSE_TAG';
106
            case self::TYPE_CLOSE_SHORT:
107
                return 'HTML:CLOSE_SHORT_TAG';
108
            case self::TYPE_EQUAL:
109
                return 'HTML:EQUAL';
110
            case self::TYPE_ATTRIBUTE:
111
                return 'HTML:ATTRIBUTE';
112
            case self::TYPE_WHITESPACE:
113
                return 'HTML:WHITESPACE';
114
            case self::TYPE_VERBATIM:
115
                return 'HTML:VERBATIM';
116
            default:
117
                return 'HTML:UNDEFINED';
118
        }
119
    }
120
121
    /**
122
     * @param Buffer $src
123
     * @param string $verbatim
124
     * @return \Generator
125
     */
126
    private function parseVerbatim(Buffer $src, string $verbatim)
127
    {
128
        $chunks = [];
129
130
        while ($n = $src->next()) {
131
            if ($n instanceof Token) {
132
                $chunks[] = $n;
133
                continue;
134
            }
135
136
            switch ($n->char) {
137
                case '"':
138
                case "'":
139
                case '`':
140
                    $chunks[] = $n;
141
142
                    // language inclusions allow nested strings
143
                    while ($nc = $src->next()) {
144
                        $chunks[] = $nc;
145
                        if ($nc instanceof Token) {
146
                            continue;
147
                        }
148
149
                        if ($nc->char === $n->char) {
150
                            break;
151
                        }
152
                    }
153
154
                    break;
155
156
                case '/':
157
                    $chunks[] = $n;
158
159
                    $multiline = false;
160
                    if ($src->lookaheadByte(1) === '/' || $src->lookaheadByte(1) === '*') {
161
                        if ($src->lookaheadByte(1) === '*') {
162
                            $multiline = true;
163
                        }
164
165
                        $chunks[] = $src->next();
166
167
                        // language inclusions allow nested strings
168
                        while ($nc = $src->next()) {
169
                            if ($nc instanceof Token) {
170
                                continue;
171
                            }
172
173
                            if ($nc->char === '<') {
174
                                $tag = (clone $this)->parseGrammar($src);
175
                                if ($tag === null || $this->tagName($tag) !== $verbatim) {
176
                                    $src->replay($n->offset);
177
                                    break;
178
                                }
179
                                // back to primary loop
180
                                $src->replay($nc->offset - 1);
181
                                break 2;
182
                            }
183
184
                            $chunks[] = $nc;
185
186
                            if ($multiline) {
187
                                if ($nc->char === '*' && $src->lookaheadByte(1) === '/') {
188
                                    $chunks[] = $src->next();
189
                                    break;
190
                                }
191
                            } elseif ($nc->char === "\n") {
192
                                break;
193
                            }
194
                        }
195
                    }
196
197
                    break;
198
199
                case '<':
200
                    // tag beginning?
201
                    $tag = (clone $this)->parseGrammar($src);
202
                    if ($tag === null || $this->tagName($tag) !== $verbatim) {
203
                        $chunks[] = $n;
204
                        $src->replay($n->offset);
205
                        break;
206
                    }
207
208
                    // found closing verbatim tag
209
                    yield $this->packToken($chunks, self::TYPE_VERBATIM);
210
                    yield from $tag;
211
212
                    break 2;
213
214
                default:
215
                    $chunks[] = $n;
216
            }
217
        }
218
    }
219
220
    /**
221
     * @param array $tag
222
     * @return string
223
     */
224
    private function tagName(array $tag): string
225
    {
226
        foreach ($tag as $token) {
227
            if ($token->type === self::TYPE_KEYWORD) {
228
                return strtolower($token->content);
229
            }
230
        }
231
232
        return '';
233
    }
234
235
    /**
236
     * @param Buffer $src
237
     * @return array|null
238
     */
239
    private function parseGrammar(Buffer $src): ?array
240
    {
241
        $this->tokens = [
242
            new Token(self::TYPE_OPEN, $src->getOffset(), '<')
243
        ];
244
245
        if ($src->lookaheadByte() === '/') {
246
            $this->tokens[0]->type = self::TYPE_OPEN_SHORT;
247
            $this->tokens[0]->content .= $src->next()->char;
0 ignored issues
show
Bug introduced by
The property char does not seem to exist on Spiral\Stempler\Lexer\Token.
Loading history...
248
        }
249
250
        while ($n = $src->next()) {
251
            if ($this->attribute !== []) {
252
                $this->attribute[] = $n;
253
254
                if ($n instanceof Byte && $n->char === $this->attribute[0]->char) {
255
                    $this->flushAttribute();
256
                }
257
258
                continue;
259
            }
260
261
            if ($n instanceof Token) {
262
                $this->keyword[] = $n;
263
                continue;
264
            }
265
266
            switch ($n->char) {
267
                case '"':
268
                case "'":
269
                case '`':
270
                    $this->flush();
271
                    $this->attribute[] = $n;
272
                    break;
273
274
                case '=':
275
                    $this->flush();
276
                    $this->tokens[] = new Token(
277
                        self::TYPE_EQUAL,
278
                        $n->offset,
279
                        $n->char
280
                    );
281
                    break;
282
283
                case '/':
284
                    if ($src->lookaheadByte() === '>') {
285
                        $this->flush();
286
                        $this->tokens[] = new Token(
287
                            self::TYPE_CLOSE_SHORT,
288
                            $n->offset,
289
                            $n->char . $src->next()->char
290
                        );
291
292
                        break 2;
293
                    }
294
295
                    // unexpected "/"
296
                    return null;
297
298
                case '>':
299
                    $this->flush();
300
                    $this->tokens[] = new Token(
301
                        self::TYPE_CLOSE,
302
                        $n->offset,
303
                        $n->char
304
                    );
305
                    break 2;
306
307
                default:
308
                    if (preg_match(self::REGEXP_WHITESPACE, $n->char)) {
309
                        $this->flushKeyword();
310
                        $this->whitespace[] = $n;
311
                        break;
312
                    }
313
                    $this->flushWhitespace();
314
315
316
                    if (!preg_match(self::REGEXP_KEYWORD, $n->char)) {
317
                        // unexpected char
318
                        return null;
319
                    }
320
321
                    $this->keyword[] = $n;
322
            }
323
        }
324
325
        if (!$this->isValid()) {
326
            return null;
327
        }
328
329
        return $this->tokens;
330
    }
331
332
    /**
333
     * @return bool
334
     */
335
    private function isValid(): bool
336
    {
337
        // tag is too short or does not have name keyword
338
        if (count($this->tokens) < 3) {
339
            return false;
340
        }
341
342
        $last = $this->tokens[count($this->tokens) - 1];
343
        if ($last->type !== self::TYPE_CLOSE && $last->type !== self::TYPE_CLOSE_SHORT) {
344
            return false;
345
        }
346
347
        foreach ($this->tokens as $token) {
348
            switch ($token->type) {
349
                case self::TYPE_WHITESPACE:
350
                    // ignore
351
                    continue 2;
352
353
                case self::TYPE_ATTRIBUTE:
354
                case self::TYPE_EQUAL:
355
                    return false;
356
357
                case self::TYPE_KEYWORD:
358
                    return true;
359
            }
360
        }
361
362
        return false;
363
    }
364
365
    /**
366
     * Flush whitespace or keyword tokens.
367
     */
368
    private function flush(): void
369
    {
370
        $this->flushWhitespace();
371
        $this->flushKeyword();
372
    }
373
374
    /**
375
     * Flush keyword content.
376
     */
377
    private function flushWhitespace(): void
378
    {
379
        if ($this->whitespace === []) {
380
            return;
381
        }
382
383
        $this->tokens[] = $this->packToken($this->whitespace, self::TYPE_WHITESPACE);
384
        $this->whitespace = [];
385
    }
386
387
    /**
388
     * Flush keyword content.
389
     */
390
    private function flushKeyword(): void
391
    {
392
        if ($this->keyword === []) {
393
            return;
394
        }
395
396
        $this->tokens[] = $this->packToken($this->keyword, self::TYPE_KEYWORD);
397
        $this->keyword = [];
398
    }
399
400
    /**
401
     * Flush attribute content.
402
     */
403
    private function flushAttribute(): void
404
    {
405
        if ($this->attribute === []) {
406
            return;
407
        }
408
409
        $this->tokens[] = $this->packToken($this->attribute, self::TYPE_ATTRIBUTE);
410
        $this->attribute = [];
411
    }
412
}
413