Passed
Push — master ( 0853ff...bcb7ae )
by Nikita
04:55
created

Lexer::detectToken()   D

Complexity

Conditions 23
Paths 42

Size

Total Lines 80
Code Lines 56

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 23
eloc 56
nc 42
nop 0
dl 0
loc 80
rs 4.9926
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file is part of PHP-Yacc package.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
declare(strict_types=1);
9
10
namespace PhpYacc\Yacc;
11
12
use PhpYacc\Exception\LexingException;
13
use PhpYacc\Exception\ParseException;
14
use PhpYacc\Support\Utils;
15
16
/**
17
 * Class Lexer.
18
 */
19
class Lexer
20
{
21
    /**
22
     * Whitespace tokens.
23
     */
24
    private const SPACE_TOKENS = [
25
        Token::SPACE,
26
        Token::COMMENT,
27
        Token::NEWLINE,
28
    ];
29
30
    /**
31
     * Tag map.
32
     */
33
    private const TAG_MAP = [
34
        '%%'            => Token::MARK,
35
        '%{'            => Token::BEGININC,
36
        '%}'            => Token::ENDINC,
37
        '%token'        => Token::TOKEN,
38
        '%term'         => Token::TOKEN,
39
        '%left'         => Token::LEFT,
40
        '%right'        => Token::RIGHT,
41
        '%nonassoc'     => Token::NONASSOC,
42
        '%prec'         => Token::PRECTOK,
43
        '%type'         => Token::TYPE,
44
        '%union'        => Token::UNION,
45
        '%start'        => Token::START,
46
        '%expect'       => Token::EXPECT,
47
        '%pure_parser'  => Token::PURE_PARSER,
48
    ];
49
50
    /**
51
     * @var string
52
     */
53
    protected $buffer;
54
55
    /**
56
     * @var string
57
     */
58
    protected $filename;
59
60
    /**
61
     * @var int
62
     */
63
    protected $line;
64
65
    /**
66
     * @var int
67
     */
68
    protected $offset;
69
70
    /**
71
     * @var Token
72
     */
73
    protected $currentToken;
74
75
    /**
76
     * @var Token
77
     */
78
    protected $backToken;
79
80
    /**
81
     * @var string
82
     */
83
    protected $backChar;
84
85
    /**
86
     * @var bool
87
     */
88
    protected $prevIsDollar;
89
90
    /**
91
     * @var string
92
     */
93
    protected $char;
94
95
    /**
96
     * @var string
97
     */
98
    protected $value;
99
100
    /**
101
     * @param string $code
102
     * @param string $filename
103
     */
104
    public function startLexing(string $code, string $filename = '')
105
    {
106
        $this->buffer = $code;
107
        $this->filename = $filename;
108
109
        $this->reset();
110
    }
111
112
    /**
113
     * @return void
114
     */
115
    protected function reset()
116
    {
117
        $this->line = 1;
118
        $this->offset = 0;
119
        $this->backChar = null;
120
        $this->backToken = null;
121
        $this->prevIsDollar = false;
122
    }
123
124
    /**
125
     * @throws LexingException
126
     * @throws ParseException
127
     *
128
     * @return Token
129
     */
130
    public function getToken(): Token
131
    {
132
        $this->currentToken = $this->getRawToken();
133
134
        while (in_array($this->currentToken->getType(), self::SPACE_TOKENS)) {
135
            $this->currentToken = $this->getRawToken();
136
        }
137
138
        return $this->currentToken;
139
    }
140
141
    /**
142
     * @throws LexingException
143
     */
144
    public function ungetToken()
145
    {
146
        if ($this->backToken !== null) {
147
            throw new LexingException('Too many ungetToken calls');
148
        }
149
150
        $this->backToken = $this->currentToken;
151
    }
152
153
    /**
154
     * @throws LexingException
155
     * @throws ParseException
156
     *
157
     * @return Token
158
     */
159
    public function peek(): Token
160
    {
161
        $result = $this->getToken();
162
        $this->ungetToken();
163
164
        return $result;
165
    }
166
167
    /**
168
     * @throws LexingException
169
     * @throws ParseException
170
     *
171
     * @return Token
172
     */
173
    public function getRawToken()
174
    {
175
        if ($this->backToken !== null) {
176
            $this->currentToken = $this->backToken;
177
            $this->backToken = null;
178
179
            return $this->currentToken;
180
        }
181
182
        $this->char = $this->getChar();
183
        $this->value = '';
184
185
        switch (true) {
186
            case $this->isWhitespace():
187
                return $this->token(Token::SPACE, $this->value);
188
            case $this->isNewline():
189
                return $this->token(Token::NEWLINE, $this->value);
190
            case $this->isComment():
191
                return $this->token(Token::COMMENT, $this->value);
192
            case $this->isEof():
193
                return $this->token(Token::EOF, $this->value);
194
        }
195
196
        $tag = $this->detectToken();
197
198
        switch (true) {
199
            case isset(self::TAG_MAP[$this->value]):
200
                return $this->token(self::TAG_MAP[$this->value], $this->value);
201
            case $this->value === ':':
202
                return $this->token(Token::COLON, $this->value);
203
            case $this->value === ';':
204
                return $this->token(Token::SEMICOLON, $this->value);
205
            case $this->value === '$':
206
                return $this->token(Token::DOLLAR, $this->value);
207
            default:
208
                return $this->token($tag, $this->value);
209
        }
210
    }
211
212
    /**
213
     * @return bool
214
     */
215
    protected function isWhitespace(): bool
216
    {
217
        if (Utils::isWhite($this->char)) {
218
            while (Utils::isWhite($this->char)) {
219
                $this->value .= $this->char;
220
                $this->char = $this->getChar();
221
            }
222
            $this->ungetChar($this->char);
223
224
            return true;
225
        }
226
227
        return false;
228
    }
229
230
    /**
231
     * @return bool
232
     */
233
    protected function isNewline(): bool
234
    {
235
        if ($this->char === "\n") {
236
            $this->value = $this->char;
237
            return true;
238
        }
239
240
        return false;
241
    }
242
243
    /**
244
     * @throws ParseException
245
     *
246
     * @return bool
247
     */
248
    protected function isComment(): bool
249
    {
250
        if ($this->char === '/') {
251
            if (($this->char = $this->getChar()) === '*') {
252
                $this->value = '/*';
253
254
                while (true) {
255
                    if (($this->char = $this->getChar()) === '*') {
256
                        if (($this->char = $this->getChar()) === '/') {
257
                            break;
258
                        }
259
                        $this->ungetChar($this->char);
260
                    }
261
262
                    if ($this->char === "\0") {
263
                        throw ParseException::unexpected($this->token(Token::EOF, "\0"), '*/');
264
                    }
265
266
                    $this->value .= $this->char;
267
                }
268
269
                $this->value .= '*/';
270
271
                return true;
272
            } elseif ($this->char === '/') {
273
                $this->value = '//';
274
275
                do {
276
                    $this->char = $this->getChar();
277
                    if ($this->char !== "\0") {
278
                        $this->value .= $this->char;
279
                    }
280
                } while ($this->char !== "\n" && $this->char !== "\0");
281
282
                return true;
283
            }
284
285
            $this->ungetChar($this->char);
286
            $this->char = '/';
287
        }
288
289
        return false;
290
    }
291
292
    /**
293
     * @return bool
294
     */
295
    protected function isEof(): bool
296
    {
297
        if ($this->char === "\0") {
298
            $this->value = $this->char;
299
            return true;
300
        }
301
302
        return false;
303
    }
304
305
    /**
306
     * @throws ParseException
307
     *
308
     * @return int
309
     */
310
    protected function detectToken()
311
    {
312
        $tag = Token::UNKNOW;
313
314
        if ($this->char === '%') {
315
            $this->char = $this->getChar();
316
            if ($this->char === '%' || $this->char === '{' | $this->char === '}' || Utils::isSymChar($this->char)) {
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($this->char === '{') | $this->char === '}', Probably Intended Meaning: $this->char === ('{' | $this->char === '}')

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
317
                $this->value .= '%';
318
            } else {
319
                $this->ungetChar($this->char);
320
                $this->char = '%';
321
            }
322
        }
323
324
        if ($this->char === '$') {
325
            if (!$this->prevIsDollar) {
326
                $this->value .= '$';
327
                $this->char = $this->getChar();
328
329
                if ($this->char === '$') {
330
                    $this->ungetChar($this->char);
331
                    $this->prevIsDollar = true;
332
                } elseif (!\ctype_digit($this->char) && Utils::isSymChar($this->char)) {
333
                    do {
334
                        $this->value .= $this->char;
335
                        $this->char = $this->getChar();
336
                    } while (Utils::isSymChar($this->char));
337
                    $this->ungetChar($this->char);
338
                    $tag = Token::NAME;
339
                } else {
340
                    $this->ungetChar($this->char);
341
                }
342
            } else {
343
                $this->value .= '$';
344
                $this->prevIsDollar = false;
345
            }
346
        } elseif (Utils::isSymChar($this->char)) {
347
            do {
348
                $this->value .= $this->char;
349
                $this->char = $this->getChar();
350
            } while ($this->char !== "\0" && Utils::isSymChar($this->char));
351
352
            $this->ungetChar($this->char);
353
            $tag = \ctype_digit($this->value) ? Token::NUMBER : Token::NAME;
354
        } elseif ($this->char === '\'' || $this->char === '"') {
355
            $quote = $this->char;
356
            $this->value .= $this->char;
357
358
            while (($this->char = $this->getChar()) !== $quote) {
359
                if ($this->char === "\0") {
360
                    throw ParseException::unexpected($this->token(Token::EOF, "\0"), $quote);
361
                }
362
363
                if ($this->char === "\n") {
364
                    throw ParseException::unexpected($this->token(Token::NEWLINE, "\n"), $quote);
365
                }
366
367
                $this->value .= $this->char;
368
                if ($this->char === '\\') {
369
                    $this->char = $this->getChar();
370
371
                    if ($this->char === "\0") {
372
                        break;
373
                    }
374
375
                    if ($this->char === "\n") {
376
                        continue;
377
                    }
378
379
                    $this->value .= $this->char;
380
                }
381
            }
382
            $this->value .= $this->char;
383
            $tag = Token::STRING;
384
        } else {
385
            $this->value .= $this->char;
386
        }
387
388
        return $tag;
389
    }
390
391
    /**
392
     * @return string
393
     */
394
    protected function getChar(): string
395
    {
396
        if ($this->backChar !== null) {
397
            $result = $this->backChar;
398
            $this->backChar = null;
399
400
            return $result;
401
        }
402
403
        if ($this->offset >= \mb_strlen($this->buffer)) {
404
            return "\0";
405
        }
406
407
        $char = $this->buffer[$this->offset++];
408
409
        if ($char === "\n") {
410
            $this->line++;
411
        }
412
413
        return $char;
414
    }
415
416
    /**
417
     * @param string $char
418
     *
419
     * @throws LexingException
420
     */
421
    protected function ungetChar(string $char)
422
    {
423
        if ($char == "\0") {
424
            return;
425
        }
426
427
        if ($this->backChar !== null) {
428
            throw new LexingException('To many ungetChar calls');
429
        }
430
431
        $this->backChar = $char;
432
    }
433
434
    /**
435
     * @param int    $type
436
     * @param string $value
437
     *
438
     * @return Token
439
     */
440
    protected function token(int $type, string $value): Token
441
    {
442
        return new Token($type, $value, $this->line, $this->filename);
443
    }
444
}
445