Lexer::getToken()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 10
rs 9.9332
c 0
b 0
f 0
cc 2
nc 2
nop 0
1
<?php
2
/**
3
 * This file is part of PHP-Yacc package.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
declare(strict_types=1);
9
10
namespace PhpYacc\Yacc;
11
12
use PhpYacc\Exception\LexingException;
13
use PhpYacc\Exception\ParseException;
14
use PhpYacc\Support\Utils;
15
16
/**
17
 * Class Lexer.
18
 */
19
class Lexer
20
{
21
    /**
22
     * Whitespace tokens.
23
     */
24
    private const SPACE_TOKENS = [
25
        Token::T_SPACE,
26
        Token::T_COMMENT,
27
        Token::T_NEWLINE,
28
    ];
29
30
    /**
31
     * Tag map.
32
     */
33
    private const TAG_MAP = [
34
        ':'             => Token::T_COLON,
35
        ';'             => Token::T_SEMICOLON,
36
        '$'             => Token::T_DOLLAR,
37
        '%%'            => Token::T_MARK,
38
        '%{'            => Token::T_BEGIN_INC,
39
        '%}'            => Token::T_END_INC,
40
        '%token'        => Token::T_TOKEN,
41
        '%term'         => Token::T_TOKEN,
42
        '%left'         => Token::T_LEFT,
43
        '%right'        => Token::T_RIGHT,
44
        '%nonassoc'     => Token::T_NON_ASSOC,
45
        '%prec'         => Token::T_PRECTOK,
46
        '%type'         => Token::T_TYPE,
47
        '%union'        => Token::T_UNION,
48
        '%start'        => Token::T_START,
49
        '%expect'       => Token::T_EXPECT,
50
        '%pure_parser'  => Token::T_PURE_PARSER,
51
    ];
52
53
    /**
54
     * @var string
55
     */
56
    protected $buffer;
57
58
    /**
59
     * @var string
60
     */
61
    protected $fileName;
62
63
    /**
64
     * @var int
65
     */
66
    protected $line;
67
68
    /**
69
     * @var int
70
     */
71
    protected $offset;
72
73
    /**
74
     * @var Token
75
     */
76
    protected $currentToken;
77
78
    /**
79
     * @var Token
80
     */
81
    protected $backToken;
82
83
    /**
84
     * @var string
85
     */
86
    protected $backChar;
87
88
    /**
89
     * @var bool
90
     */
91
    protected $prevIsDollar;
92
93
    /**
94
     * @var string
95
     */
96
    protected $char;
97
98
    /**
99
     * @var string
100
     */
101
    protected $value;
102
103
    /**
104
     * @param string $code
105
     * @param string $filename
106
     */
107
    public function startLexing(string $code, string $filename = '')
108
    {
109
        $this->buffer = $code;
110
        $this->fileName = $filename;
111
112
        $this->reset();
113
    }
114
115
    /**
116
     * @return void
117
     */
118
    protected function reset()
119
    {
120
        $this->line = 1;
121
        $this->offset = 0;
122
        $this->backChar = null;
123
        $this->backToken = null;
124
        $this->prevIsDollar = false;
125
    }
126
127
    /**
128
     * @throws LexingException
129
     * @throws ParseException
130
     *
131
     * @return Token
132
     */
133
    public function getToken(): Token
134
    {
135
        $this->currentToken = $this->getRawToken();
136
137
        while (\in_array($this->currentToken->getType(), self::SPACE_TOKENS)) {
138
            $this->currentToken = $this->getRawToken();
139
        }
140
141
        return $this->currentToken;
142
    }
143
144
    /**
145
     * @throws LexingException
146
     */
147
    public function ungetToken()
148
    {
149
        if ($this->backToken !== null) {
150
            throw new LexingException('Too many ungetToken calls');
151
        }
152
153
        $this->backToken = $this->currentToken;
154
    }
155
156
    /**
157
     * @throws LexingException
158
     * @throws ParseException
159
     *
160
     * @return Token
161
     */
162
    public function peek(): Token
163
    {
164
        $result = $this->getToken();
165
        $this->ungetToken();
166
167
        return $result;
168
    }
169
170
    /**
171
     * @throws LexingException
172
     * @throws ParseException
173
     *
174
     * @return Token
175
     */
176
    public function getRawToken()
177
    {
178
        if ($this->backToken !== null) {
179
            $this->currentToken = $this->backToken;
180
            $this->backToken = null;
181
182
            return $this->currentToken;
183
        }
184
185
        $this->char = $this->getChar();
186
        $this->value = '';
187
188
        switch (true) {
189
            case $this->isWhitespace():
190
                return $this->token(Token::T_SPACE, $this->value);
191
            case $this->isNewline():
192
                return $this->token(Token::T_NEWLINE, $this->value);
193
            case $this->isComment():
194
                return $this->token(Token::T_COMMENT, $this->value);
195
            case $this->isEof():
196
                return $this->token(Token::T_EOF, $this->value);
197
        }
198
199
        $tag = $this->detectToken();
200
201
        switch (true) {
202
            case isset(self::TAG_MAP[$this->value]):
203
                return $this->token(self::TAG_MAP[$this->value], $this->value);
204
            default:
205
                return $this->token($tag, $this->value);
206
        }
207
    }
208
209
    /**
210
     * @throws LexingException
211
     *
212
     * @return bool
213
     */
214
    protected function isWhitespace(): bool
215
    {
216
        if (Utils::isWhite($this->char)) {
217
            while (Utils::isWhite($this->char)) {
218
                $this->value .= $this->char;
219
                $this->char = $this->getChar();
220
            }
221
            $this->ungetChar($this->char);
222
223
            return true;
224
        }
225
226
        return false;
227
    }
228
229
    /**
230
     * @return bool
231
     */
232
    protected function isNewline(): bool
233
    {
234
        if ($this->char === "\n") {
235
            $this->value = $this->char;
236
237
            return true;
238
        }
239
240
        return false;
241
    }
242
243
    /**
244
     * @throws LexingException
245
     * @throws ParseException
246
     *
247
     * @return bool
248
     */
249
    protected function isComment(): bool
250
    {
251
        if ($this->char === '/') {
252
            if (($this->char = $this->getChar()) === '*') {
253
                $this->value = '/*';
254
255
                while (true) {
256
                    if (($this->char = $this->getChar()) === '*') {
257
                        if (($this->char = $this->getChar()) === '/') {
258
                            break;
259
                        }
260
                        $this->ungetChar($this->char);
261
                    }
262
263
                    if ($this->char === "\0") {
264
                        throw ParseException::unexpected($this->token(Token::T_EOF, "\0"), '*/');
265
                    }
266
267
                    $this->value .= $this->char;
268
                }
269
270
                $this->value .= '*/';
271
272
                return true;
273
            } elseif ($this->char === '/') {
274
                $this->value = '//';
275
276
                do {
277
                    $this->char = $this->getChar();
278
                    if ($this->char !== "\0") {
279
                        $this->value .= $this->char;
280
                    }
281
                } while ($this->char !== "\n" && $this->char !== "\0");
282
283
                return true;
284
            }
285
286
            $this->ungetChar($this->char);
287
            $this->char = '/';
288
        }
289
290
        return false;
291
    }
292
293
    /**
294
     * @return bool
295
     */
296
    protected function isEof(): bool
297
    {
298
        if ($this->char === "\0") {
299
            $this->value = $this->char;
300
301
            return true;
302
        }
303
304
        return false;
305
    }
306
307
    /**
308
     * @throws LexingException
309
     * @throws ParseException
310
     *
311
     * @return int
312
     */
313
    protected function detectToken()
314
    {
315
        $tag = Token::T_UNKNOWN;
316
317
        if ($this->char === '%') {
318
            $this->char = $this->getChar();
319
            if ($this->char === '%' || \in_array($this->char, ['{', '}'], true) || Utils::isSymChar($this->char)) {
320
                $this->value .= '%';
321
            } else {
322
                $this->ungetChar($this->char);
323
                $this->char = '%';
324
            }
325
        }
326
327
        if ($this->char === '$') {
328
            if (!$this->prevIsDollar) {
329
                $this->value .= '$';
330
                $this->char = $this->getChar();
331
332
                if ($this->char === '$') {
333
                    $this->ungetChar($this->char);
334
                    $this->prevIsDollar = true;
335
                } elseif (!\ctype_digit($this->char) && Utils::isSymChar($this->char)) {
336
                    do {
337
                        $this->value .= $this->char;
338
                        $this->char = $this->getChar();
339
                    } while (Utils::isSymChar($this->char));
340
                    $this->ungetChar($this->char);
341
                    $tag = Token::T_NAME;
342
                } else {
343
                    $this->ungetChar($this->char);
344
                }
345
            } else {
346
                $this->value .= '$';
347
                $this->prevIsDollar = false;
348
            }
349
        } elseif (Utils::isSymChar($this->char)) {
350
            do {
351
                $this->value .= $this->char;
352
                $this->char = $this->getChar();
353
            } while ($this->char !== "\0" && Utils::isSymChar($this->char));
354
355
            $this->ungetChar($this->char);
356
            $tag = \ctype_digit($this->value) ? Token::T_NUMBER : Token::T_NAME;
357
        } elseif ($this->char === '\'' || $this->char === '"') {
358
            $quote = $this->char;
359
            $this->value .= $this->char;
360
361
            while (($this->char = $this->getChar()) !== $quote) {
362
                if ($this->char === "\0") {
363
                    throw ParseException::unexpected($this->token(Token::T_EOF, "\0"), $quote);
364
                }
365
366
                if ($this->char === "\n") {
367
                    throw ParseException::unexpected($this->token(Token::T_NEWLINE, "\n"), $quote);
368
                }
369
370
                $this->value .= $this->char;
371
                if ($this->char === '\\') {
372
                    $this->char = $this->getChar();
373
374
                    if ($this->char === "\0") {
375
                        break;
376
                    }
377
378
                    if ($this->char === "\n") {
379
                        continue;
380
                    }
381
382
                    $this->value .= $this->char;
383
                }
384
            }
385
            $this->value .= $this->char;
386
            $tag = Token::T_STRING;
387
        } else {
388
            $this->value .= $this->char;
389
        }
390
391
        return $tag;
392
    }
393
394
    /**
395
     * @return string
396
     */
397
    protected function getChar(): string
398
    {
399
        if ($this->backChar !== null) {
400
            $result = $this->backChar;
401
            $this->backChar = null;
402
403
            return $result;
404
        }
405
406
        if ($this->offset >= \mb_strlen($this->buffer)) {
407
            return "\0";
408
        }
409
410
        $char = $this->buffer[$this->offset++];
411
412
        if ($char === "\n") {
413
            $this->line++;
414
        }
415
416
        return $char;
417
    }
418
419
    /**
420
     * @param string $char
421
     *
422
     * @throws LexingException
423
     */
424
    protected function ungetChar(string $char)
425
    {
426
        if ($char == "\0") {
427
            return;
428
        }
429
430
        if ($this->backChar !== null) {
431
            throw new LexingException('To many ungetChar calls');
432
        }
433
434
        $this->backChar = $char;
435
    }
436
437
    /**
438
     * @param int    $type
439
     * @param string $value
440
     *
441
     * @return Token
442
     */
443
    protected function token(int $type, string $value): Token
444
    {
445
        return new Token($type, $value, $this->line, $this->fileName);
446
    }
447
}
448