Passed
Push — master ( 7d326c...9ae8b9 )
by Vladimir
03:29
created

Lexer::readDigits()   A

Complexity

Conditions 5
Paths 3

Size

Total Lines 23
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 23
c 0
b 0
f 0
rs 9.4888
ccs 14
cts 14
cp 1
cc 5
nc 3
nop 0
crap 5
1
<?php
2
3
declare(strict_types=1);
4
5
namespace GraphQL\Language;
6
7
use GraphQL\Error\SyntaxError;
8
use GraphQL\Utils\BlockString;
9
use GraphQL\Utils\Utils;
10
use function chr;
11
use function hexdec;
12
use function ord;
13
use function preg_match;
14
15
/**
16
 * A Lexer is a stateful stream generator in that every time
17
 * it is advanced, it returns the next token in the Source. Assuming the
18
 * source lexes, the final Token emitted by the lexer will be of kind
19
 * EOF, after which the lexer will repeatedly return the same EOF token
20
 * whenever called.
21
 *
22
 * Algorithm is O(N) both on memory and time
23
 */
24
class Lexer
25
{
26
    /** @var Source */
27
    public $source;
28
29
    /** @var bool[] */
30
    public $options;
31
32
    /**
33
     * The previously focused non-ignored token.
34
     *
35
     * @var Token
36
     */
37
    public $lastToken;
38
39
    /**
40
     * The currently focused non-ignored token.
41
     *
42
     * @var Token
43
     */
44
    public $token;
45
46
    /**
47
     * The (1-indexed) line containing the current token.
48
     *
49
     * @var int
50
     */
51
    public $line;
52
53
    /**
54
     * The character offset at which the current line begins.
55
     *
56
     * @var int
57
     */
58
    public $lineStart;
59
60
    /**
61
     * Current cursor position for UTF8 encoding of the source
62
     *
63
     * @var int
64
     */
65
    private $position;
66
67
    /**
68
     * Current cursor position for ASCII representation of the source
69
     *
70
     * @var int
71
     */
72
    private $byteStreamPosition;
73
74
    /**
75
     * @param bool[] $options
76
     */
77 905
    public function __construct(Source $source, array $options = [])
78
    {
79 905
        $startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null);
80
81 905
        $this->source    = $source;
82 905
        $this->options   = $options;
83 905
        $this->lastToken = $startOfFileToken;
84 905
        $this->token     = $startOfFileToken;
85 905
        $this->line      = 1;
86 905
        $this->lineStart = 0;
87 905
        $this->position  = $this->byteStreamPosition = 0;
88 905
    }
89
90
    /**
91
     * @return Token
92
     */
93 905
    public function advance()
94
    {
95 905
        $this->lastToken = $this->token;
96 905
        $token           = $this->token = $this->lookahead();
97
98 870
        return $token;
99
    }
100
101 905
    public function lookahead()
102
    {
103 905
        $token = $this->token;
104 905
        if ($token->kind !== Token::EOF) {
105
            do {
106 905
                $token = $token->next ?: ($token->next = $this->readToken($token));
107 870
            } while ($token->kind === Token::COMMENT);
108
        }
109
110 870
        return $token;
111
    }
112
113
    /**
114
     * @return Token
115
     * @throws SyntaxError
116
     */
117 905
    private function readToken(Token $prev)
118
    {
119 905
        $bodyLength = $this->source->length;
120
121 905
        $this->positionAfterWhitespace();
122 905
        $position = $this->position;
123
124 905
        $line = $this->line;
125 905
        $col  = 1 + $position - $this->lineStart;
126
127 905
        if ($position >= $bodyLength) {
128 848
            return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev);
129
        }
130
131
        // Read next char and advance string cursor:
132 905
        list (, $code, $bytes) = $this->readChar(true);
133
134
        // SourceCharacter
135 905
        if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
136 1
            throw new SyntaxError(
137 1
                $this->source,
138 1
                $position,
139 1
                'Cannot contain the invalid character ' . Utils::printCharCode($code)
0 ignored issues
show
Bug introduced by
It seems like $code can also be of type string; however, parameter $code of GraphQL\Utils\Utils::printCharCode() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

139
                'Cannot contain the invalid character ' . Utils::printCharCode(/** @scrutinizer ignore-type */ $code)
Loading history...
140
            );
141
        }
142
143
        switch ($code) {
144 904
            case 33: // !
145 61
                return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
146 904
            case 35: // #
147 15
                $this->moveStringCursor(-1, -1 * $bytes);
148
149 15
                return $this->readComment($line, $col, $prev);
150 904
            case 36: // $
151 133
                return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
152 904
            case 38: // &
153 6
                return new Token(Token::AMP, $position, $position + 1, $line, $col, $prev);
154 904
            case 40: // (
155 378
                return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
156 904
            case 41: // )
157 376
                return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
158 904
            case 46: // .
159 193
                list (, $charCode1) = $this->readChar(true);
160 193
                list (, $charCode2) = $this->readChar(true);
161
162 193
                if ($charCode1 === 46 && $charCode2 === 46) {
163 191
                    return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev);
164
                }
165 2
                break;
166 901
            case 58: // :
167 501
                return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
168 901
            case 61: // =
169 62
                return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
170 901
            case 64: // @
171 70
                return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
172 901
            case 91: // [
173 66
                return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
174 901
            case 93: // ]
175 65
                return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
176 901
            case 123: // {
177 823
                return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
178 900
            case 124: // |
179 19
                return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
180 900
            case 125: // }
181 818
                return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
182
            // A-Z
183 899
            case 65:
184 899
            case 66:
185 899
            case 67:
186 899
            case 68:
187 899
            case 69:
188 899
            case 70:
189 899
            case 71:
190 899
            case 72:
191 899
            case 73:
192 899
            case 74:
193 899
            case 75:
194 899
            case 76:
195 899
            case 77:
196 895
            case 78:
197 895
            case 79:
198 895
            case 80:
199 895
            case 81:
200 895
            case 82:
201 895
            case 83:
202 894
            case 84:
203 893
            case 85:
204 893
            case 86:
205 893
            case 87:
206 893
            case 88:
207 893
            case 89:
208 893
            case 90:
209
                // _
210 893
            case 95:
211
                // a-z
212 893
            case 97:
213 890
            case 98:
214 889
            case 99:
215 880
            case 100:
216 878
            case 101:
217 874
            case 102:
218 833
            case 103:
219 832
            case 104:
220 830
            case 105:
221 816
            case 106:
222 816
            case 107:
223 816
            case 108:
224 816
            case 109:
225 811
            case 110:
226 803
            case 111:
227 724
            case 112:
228 716
            case 113:
229 575
            case 114:
230 569
            case 115:
231 509
            case 116:
232 344
            case 117:
233 303
            case 118:
234 282
            case 119:
235 250
            case 120:
236 233
            case 121:
237 225
            case 122:
238 861
                return $this->moveStringCursor(-1, -1 * $bytes)
239 861
                    ->readName($line, $col, $prev);
240
            // -
241 225
            case 45:
242
                // 0-9
243 221
            case 48:
244 219
            case 49:
245 187
            case 50:
246 174
            case 51:
247 170
            case 52:
248 154
            case 53:
249 150
            case 54:
250 147
            case 55:
251 147
            case 56:
252 145
            case 57:
253 111
                return $this->moveStringCursor(-1, -1 * $bytes)
254 111
                    ->readNumber($line, $col, $prev);
255
            // "
256 144
            case 34:
257 136
                list(, $nextCode)     = $this->readChar();
258 136
                list(, $nextNextCode) = $this->moveStringCursor(1, 1)->readChar();
259
260 136
                if ($nextCode === 34 && $nextNextCode === 34) {
261 23
                    return $this->moveStringCursor(-2, (-1 * $bytes) - 1)
262 23
                        ->readBlockString($line, $col, $prev);
263
                }
264
265 123
                return $this->moveStringCursor(-2, (-1 * $bytes) - 1)
266 123
                    ->readString($line, $col, $prev);
267
        }
268
269 10
        $errMessage = $code === 39
270 1
            ? "Unexpected single quote character ('), did you mean to use " . 'a double quote (")?'
271 10
            : 'Cannot parse the unexpected character ' . Utils::printCharCode($code) . '.';
272
273 10
        throw new SyntaxError(
274 10
            $this->source,
275 10
            $position,
276 10
            $errMessage
277
        );
278
    }
279
280
    /**
281
     * Reads an alphanumeric + underscore name from the source.
282
     *
283
     * [_A-Za-z][_0-9A-Za-z]*
284
     *
285
     * @param int $line
286
     * @param int $col
287
     * @return Token
288
     */
289 861
    private function readName($line, $col, Token $prev)
290
    {
291 861
        $value              = '';
292 861
        $start              = $this->position;
293 861
        list ($char, $code) = $this->readChar();
294
295 861
        while ($code && (
296 861
                $code === 95 || // _
297 861
                $code >= 48 && $code <= 57 || // 0-9
298 861
                $code >= 65 && $code <= 90 || // A-Z
299 861
                $code >= 97 && $code <= 122 // a-z
300
            )) {
301 861
            $value             .= $char;
302 861
            list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
303
        }
304
305 861
        return new Token(
306 861
            Token::NAME,
307 861
            $start,
308 861
            $this->position,
309 861
            $line,
310 861
            $col,
311 861
            $prev,
312 861
            $value
313
        );
314
    }
315
316
    /**
317
     * Reads a number token from the source file, either a float
318
     * or an int depending on whether a decimal point appears.
319
     *
320
     * Int:   -?(0|[1-9][0-9]*)
321
     * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
322
     *
323
     * @param int $line
324
     * @param int $col
325
     * @return Token
326
     * @throws SyntaxError
327
     */
328 111
    private function readNumber($line, $col, Token $prev)
329
    {
330 111
        $value              = '';
331 111
        $start              = $this->position;
332 111
        list ($char, $code) = $this->readChar();
333
334 111
        $isFloat = false;
335
336 111
        if ($code === 45) { // -
337 6
            $value             .= $char;
338 6
            list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
339
        }
340
341
        // guard against leading zero's
342 111
        if ($code === 48) { // 0
343 3
            $value             .= $char;
344 3
            list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
345
346 3
            if ($code >= 48 && $code <= 57) {
347 1
                throw new SyntaxError(
348 1
                    $this->source,
349 1
                    $this->position,
350 3
                    'Invalid number, unexpected digit after 0: ' . Utils::printCharCode($code)
0 ignored issues
show
Bug introduced by
It seems like $code can also be of type string; however, parameter $code of GraphQL\Utils\Utils::printCharCode() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

350
                    'Invalid number, unexpected digit after 0: ' . Utils::printCharCode(/** @scrutinizer ignore-type */ $code)
Loading history...
351
                );
352
            }
353
        } else {
354 109
            $value             .= $this->readDigits();
355 107
            list ($char, $code) = $this->readChar();
356
        }
357
358 108
        if ($code === 46) { // .
359 20
            $isFloat = true;
360 20
            $this->moveStringCursor(1, 1);
361
362 20
            $value             .= $char;
363 20
            $value             .= $this->readDigits();
364 17
            list ($char, $code) = $this->readChar();
365
        }
366
367 105
        if ($code === 69 || $code === 101) { // E e
368 5
            $isFloat            = true;
369 5
            $value             .= $char;
370 5
            list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
371
372 5
            if ($code === 43 || $code === 45) { // + -
373 1
                $value .= $char;
374 1
                $this->moveStringCursor(1, 1);
375
            }
376 5
            $value .= $this->readDigits();
377
        }
378
379 103
        return new Token(
380 103
            $isFloat ? Token::FLOAT : Token::INT,
381 103
            $start,
382 103
            $this->position,
383 103
            $line,
384 103
            $col,
385 103
            $prev,
386 103
            $value
387
        );
388
    }
389
390
    /**
391
     * Returns string with all digits + changes current string cursor position to point to the first char after digits
392
     */
393 109
    private function readDigits()
394
    {
395 109
        list ($char, $code) = $this->readChar();
396
397 109
        if ($code >= 48 && $code <= 57) { // 0 - 9
398 107
            $value = '';
399
400
            do {
401 107
                $value             .= $char;
402 107
                list ($char, $code) = $this->moveStringCursor(1, 1)->readChar();
403 107
            } while ($code >= 48 && $code <= 57); // 0 - 9
404
405 107
            return $value;
406
        }
407
408 7
        if ($this->position > $this->source->length - 1) {
409 2
            $code = null;
410
        }
411
412 7
        throw new SyntaxError(
413 7
            $this->source,
414 7
            $this->position,
415 7
            'Invalid number, expected digit but got: ' . Utils::printCharCode($code)
0 ignored issues
show
Bug introduced by
It seems like $code can also be of type string; however, parameter $code of GraphQL\Utils\Utils::printCharCode() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

415
            'Invalid number, expected digit but got: ' . Utils::printCharCode(/** @scrutinizer ignore-type */ $code)
Loading history...
416
        );
417
    }
418
419
    /**
420
     * @param int $line
421
     * @param int $col
422
     * @return Token
423
     * @throws SyntaxError
424
     */
425 123
    private function readString($line, $col, Token $prev)
426
    {
427 123
        $start = $this->position;
428
429
        // Skip leading quote and read first string char:
430 123
        [$char, $code, $bytes] = $this->moveStringCursor(1, 1)->readChar();
431
432 123
        $chunk = '';
433 123
        $value = '';
434
435 123
        while ($code !== null &&
436
            // not LineTerminator
437 123
            $code !== 10 && $code !== 13
438
        ) {
439
            // Closing Quote (")
440 122
            if ($code === 34) {
441 110
                $value .= $chunk;
442
443
                // Skip quote
444 110
                $this->moveStringCursor(1, 1);
445
446 110
                return new Token(
447 110
                    Token::STRING,
448 110
                    $start,
449 110
                    $this->position,
450 110
                    $line,
451 110
                    $col,
452 110
                    $prev,
453 110
                    $value
454
                );
455
            }
456
457 122
            $this->assertValidStringCharacterCode($code, $this->position);
458 122
            $this->moveStringCursor(1, $bytes);
0 ignored issues
show
Bug introduced by
It seems like $bytes can also be of type string; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

458
            $this->moveStringCursor(1, /** @scrutinizer ignore-type */ $bytes);
Loading history...
459
460 122
            if ($code === 92) { // \
461 11
                $value        .= $chunk;
462 11
                list (, $code) = $this->readChar(true);
463
464
                switch ($code) {
465 11
                    case 34:
466 1
                        $value .= '"';
467 1
                        break;
468 11
                    case 47:
469
                        $value .= '/';
470
                        break;
471 11
                    case 92:
472 1
                        $value .= '\\';
473 1
                        break;
474 11
                    case 98:
475
                        $value .= chr(8);
476
                        break; // \b (backspace)
477 11
                    case 102:
478 1
                        $value .= "\f";
479 1
                        break;
480 11
                    case 110:
481
                        $value .= "\n";
482
                        break;
483 11
                    case 114:
484
                        $value .= "\r";
485
                        break;
486 11
                    case 116:
487 1
                        $value .= "\t";
488 1
                        break;
489 10
                    case 117:
490 8
                        $position   = $this->position;
491 8
                        list ($hex) = $this->readChars(4, true);
492 8
                        if (! preg_match('/[0-9a-fA-F]{4}/', $hex)) {
493 5
                            throw new SyntaxError(
494 5
                                $this->source,
495 5
                                $position - 1,
496 5
                                'Invalid character escape sequence: \\u' . $hex
497
                            );
498
                        }
499 3
                        $code = hexdec($hex);
500 3
                        $this->assertValidStringCharacterCode($code, $position - 2);
501 1
                        $value .= Utils::chr($code);
502 1
                        break;
503
                    default:
504 2
                        throw new SyntaxError(
505 2
                            $this->source,
506 2
                            $this->position - 1,
507 2
                            'Invalid character escape sequence: \\' . Utils::chr($code)
508
                        );
509
                }
510 2
                $chunk = '';
511
            } else {
512 122
                $chunk .= $char;
513
            }
514
515 122
            list ($char, $code, $bytes) = $this->readChar();
516
        }
517
518 4
        throw new SyntaxError(
519 4
            $this->source,
520 4
            $this->position,
521 4
            'Unterminated string.'
522
        );
523
    }
524
525
    /**
526
     * Reads a block string token from the source file.
527
     *
528
     * """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
529
     */
530 23
    private function readBlockString($line, $col, Token $prev)
531
    {
532 23
        $start = $this->position;
533
534
        // Skip leading quotes and read first string char:
535 23
        list ($char, $code, $bytes) = $this->moveStringCursor(3, 3)->readChar();
536
537 23
        $chunk = '';
538 23
        $value = '';
539
540 23
        while ($code !== null) {
541
            // Closing Triple-Quote (""")
542 22
            if ($code === 34) {
543
                // Move 2 quotes
544 19
                list(, $nextCode)     = $this->moveStringCursor(1, 1)->readChar();
545 19
                list(, $nextNextCode) = $this->moveStringCursor(1, 1)->readChar();
546
547 19
                if ($nextCode === 34 && $nextNextCode === 34) {
548 19
                    $value .= $chunk;
549
550 19
                    $this->moveStringCursor(1, 1);
551
552 19
                    return new Token(
553 19
                        Token::BLOCK_STRING,
554 19
                        $start,
555 19
                        $this->position,
556 19
                        $line,
557 19
                        $col,
558 19
                        $prev,
559 19
                        BlockString::value($value)
560
                    );
561
                } else {
562
                    // move cursor back to before the first quote
563 4
                    $this->moveStringCursor(-2, -2);
564
                }
565
            }
566
567 22
            $this->assertValidBlockStringCharacterCode($code, $this->position);
568 22
            $this->moveStringCursor(1, $bytes);
0 ignored issues
show
Bug introduced by
It seems like $bytes can also be of type string; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

568
            $this->moveStringCursor(1, /** @scrutinizer ignore-type */ $bytes);
Loading history...
569
570 22
            list(, $nextCode)         = $this->readChar();
571 22
            list(, $nextNextCode)     = $this->moveStringCursor(1, 1)->readChar();
572 22
            list(, $nextNextNextCode) = $this->moveStringCursor(1, 1)->readChar();
573
574
            // Escape Triple-Quote (\""")
575 22
            if ($code === 92 &&
576 22
                $nextCode === 34 &&
577 22
                $nextNextCode === 34 &&
578 22
                $nextNextNextCode === 34
579
            ) {
580 9
                $this->moveStringCursor(1, 1);
581 9
                $value .= $chunk . '"""';
582 9
                $chunk  = '';
583
            } else {
584 22
                $this->moveStringCursor(-2, -2);
585 22
                $chunk .= $char;
586
            }
587
588 22
            list ($char, $code, $bytes) = $this->readChar();
589
        }
590
591 2
        throw new SyntaxError(
592 2
            $this->source,
593 2
            $this->position,
594 2
            'Unterminated string.'
595
        );
596
    }
597
598 122
    private function assertValidStringCharacterCode($code, $position)
599
    {
600
        // SourceCharacter
601 122
        if ($code < 0x0020 && $code !== 0x0009) {
602 2
            throw new SyntaxError(
603 2
                $this->source,
604 2
                $position,
605 2
                'Invalid character within String: ' . Utils::printCharCode($code)
606
            );
607
        }
608 122
    }
609
610 22
    private function assertValidBlockStringCharacterCode($code, $position)
611
    {
612
        // SourceCharacter
613 22
        if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
614 2
            throw new SyntaxError(
615 2
                $this->source,
616 2
                $position,
617 2
                'Invalid character within String: ' . Utils::printCharCode($code)
618
            );
619
        }
620 22
    }
621
622
    /**
623
     * Reads from body starting at startPosition until it finds a non-whitespace
624
     * or commented character, then places cursor to the position of that character.
625
     */
626 905
    private function positionAfterWhitespace()
627
    {
628 905
        while ($this->position < $this->source->length) {
629 905
            list(, $code, $bytes) = $this->readChar();
630
631
            // Skip whitespace
632
            // tab | space | comma | BOM
633 905
            if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) {
634 832
                $this->moveStringCursor(1, $bytes);
0 ignored issues
show
Bug introduced by
It seems like $bytes can also be of type string; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

634
                $this->moveStringCursor(1, /** @scrutinizer ignore-type */ $bytes);
Loading history...
635 905
            } elseif ($code === 10) { // new line
636 666
                $this->moveStringCursor(1, $bytes);
637 666
                $this->line++;
638 666
                $this->lineStart = $this->position;
639 905
            } elseif ($code === 13) { // carriage return
640 1
                list(, $nextCode, $nextBytes) = $this->moveStringCursor(1, $bytes)->readChar();
641
642 1
                if ($nextCode === 10) { // lf after cr
643 1
                    $this->moveStringCursor(1, $nextBytes);
644
                }
645 1
                $this->line++;
646 1
                $this->lineStart = $this->position;
647
            } else {
648 905
                break;
649
            }
650
        }
651 905
    }
652
653
    /**
654
     * Reads a comment token from the source file.
655
     *
656
     * #[\u0009\u0020-\uFFFF]*
657
     *
658
     * @param int $line
659
     * @param int $col
660
     * @return Token
661
     */
662 15
    private function readComment($line, $col, Token $prev)
663
    {
664 15
        $start = $this->position;
665 15
        $value = '';
666 15
        $bytes = 1;
667
668
        do {
669 15
            list ($char, $code, $bytes) = $this->moveStringCursor(1, $bytes)->readChar();
670 15
            $value                     .= $char;
671 15
        } while ($code &&
672
        // SourceCharacter but not LineTerminator
673 15
        ($code > 0x001F || $code === 0x0009)
674
        );
675
676 15
        return new Token(
677 15
            Token::COMMENT,
678 15
            $start,
679 15
            $this->position,
680 15
            $line,
681 15
            $col,
682 15
            $prev,
683 15
            $value
684
        );
685
    }
686
687
    /**
688
     * Reads next UTF8Character from the byte stream, starting from $byteStreamPosition.
689
     *
690
     * @param bool $advance
691
     * @param int  $byteStreamPosition
692
     * @return (string|int)[]
693
     */
694 905
    private function readChar($advance = false, $byteStreamPosition = null)
695
    {
696 905
        if ($byteStreamPosition === null) {
697 905
            $byteStreamPosition = $this->byteStreamPosition;
698
        }
699
700 905
        $code           = null;
701 905
        $utf8char       = '';
702 905
        $bytes          = 0;
703 905
        $positionOffset = 0;
704
705 905
        if (isset($this->source->body[$byteStreamPosition])) {
706 905
            $ord = ord($this->source->body[$byteStreamPosition]);
707
708 905
            if ($ord < 128) {
709 903
                $bytes = 1;
710 5
            } elseif ($ord < 224) {
711 1
                $bytes = 2;
712 4
            } elseif ($ord < 240) {
713 4
                $bytes = 3;
714
            } else {
715
                $bytes = 4;
716
            }
717
718 905
            $utf8char = '';
719 905
            for ($pos = $byteStreamPosition; $pos < $byteStreamPosition + $bytes; $pos++) {
720 905
                $utf8char .= $this->source->body[$pos];
721
            }
722 905
            $positionOffset = 1;
723 905
            $code           = $bytes === 1 ? $ord : Utils::ord($utf8char);
724
        }
725
726 905
        if ($advance) {
727 905
            $this->moveStringCursor($positionOffset, $bytes);
728
        }
729
730 905
        return [$utf8char, $code, $bytes];
731
    }
732
733
    /**
734
     * Reads next $numberOfChars UTF8 characters from the byte stream, starting from $byteStreamPosition.
735
     *
736
     * @param int  $charCount
737
     * @param bool $advance
738
     * @param null $byteStreamPosition
0 ignored issues
show
Documentation Bug introduced by
Are you sure the doc-type for parameter $byteStreamPosition is correct as it would always require null to be passed?
Loading history...
739
     * @return (string|int)[]
740
     */
741 8
    private function readChars($charCount, $advance = false, $byteStreamPosition = null)
742
    {
743 8
        $result     = '';
744 8
        $totalBytes = 0;
745 8
        $byteOffset = $byteStreamPosition ?: $this->byteStreamPosition;
746
747 8
        for ($i = 0; $i < $charCount; $i++) {
748 8
            list ($char, $code, $bytes) = $this->readChar(false, $byteOffset);
749 8
            $totalBytes                += $bytes;
750 8
            $byteOffset                += $bytes;
751 8
            $result                    .= $char;
752
        }
753 8
        if ($advance) {
754 8
            $this->moveStringCursor($charCount, $totalBytes);
755
        }
756
757 8
        return [$result, $totalBytes];
758
    }
759
760
    /**
761
     * Moves internal string cursor position
762
     *
763
     * @param int $positionOffset
764
     * @param int $byteStreamOffset
765
     * @return self
766
     */
767 905
    private function moveStringCursor($positionOffset, $byteStreamOffset)
768
    {
769 905
        $this->position           += $positionOffset;
770 905
        $this->byteStreamPosition += $byteStreamOffset;
771
772 905
        return $this;
773
    }
774
}
775