Passed
Push — master ( 21e0c8...7c1977 )
by Vladimir
06:02
created

Lexer::unexpectedCharacterMessage()   A

Complexity

Conditions 6
Paths 3

Size

Total Lines 13
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 6

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 13
rs 9.2222
c 0
b 0
f 0
ccs 6
cts 6
cp 1
cc 6
nc 3
nop 1
crap 6
1
<?php
2
3
declare(strict_types=1);
4
5
namespace GraphQL\Language;
6
7
use GraphQL\Error\SyntaxError;
8
use GraphQL\Utils\BlockString;
9
use GraphQL\Utils\Utils;
10
use function chr;
11
use function hexdec;
12
use function ord;
13
use function preg_match;
14
15
/**
16
 * A Lexer is a stateful stream generator in that every time
17
 * it is advanced, it returns the next token in the Source. Assuming the
18
 * source lexes, the final Token emitted by the lexer will be of kind
19
 * EOF, after which the lexer will repeatedly return the same EOF token
20
 * whenever called.
21
 *
22
 * Algorithm is O(N) both on memory and time
23
 */
24
class Lexer
25
{
26
    /** @var Source */
27
    public $source;
28
29
    /** @var bool[] */
30
    public $options;
31
32
    /**
33
     * The previously focused non-ignored token.
34
     *
35
     * @var Token
36
     */
37
    public $lastToken;
38
39
    /**
40
     * The currently focused non-ignored token.
41
     *
42
     * @var Token
43
     */
44
    public $token;
45
46
    /**
47
     * The (1-indexed) line containing the current token.
48
     *
49
     * @var int
50
     */
51
    public $line;
52
53
    /**
54
     * The character offset at which the current line begins.
55
     *
56
     * @var int
57
     */
58
    public $lineStart;
59
60
    /**
61
     * Current cursor position for UTF8 encoding of the source
62
     *
63
     * @var int
64
     */
65
    private $position;
66
67
    /**
68
     * Current cursor position for ASCII representation of the source
69
     *
70
     * @var int
71
     */
72
    private $byteStreamPosition;
73
74
    /**
75
     * @param bool[] $options
76
     */
77 976
    public function __construct(Source $source, array $options = [])
78
    {
79 976
        $startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null);
80
81 976
        $this->source    = $source;
82 976
        $this->options   = $options;
83 976
        $this->lastToken = $startOfFileToken;
84 976
        $this->token     = $startOfFileToken;
85 976
        $this->line      = 1;
86 976
        $this->lineStart = 0;
87 976
        $this->position  = $this->byteStreamPosition = 0;
88 976
    }
89
90
    /**
91
     * @return Token
92
     */
93 976
    public function advance()
94
    {
95 976
        $this->lastToken    = $this->token;
96 976
        return $this->token = $this->lookahead();
97
    }
98
99 976
    public function lookahead()
100
    {
101 976
        $token = $this->token;
102 976
        if ($token->kind !== Token::EOF) {
103
            do {
104 976
                $token = $token->next ?: ($token->next = $this->readToken($token));
105 941
            } while ($token->kind === Token::COMMENT);
106
        }
107
108 941
        return $token;
109
    }
110
111
    /**
112
     * @return Token
113
     *
114
     * @throws SyntaxError
115
     */
116 976
    private function readToken(Token $prev)
117
    {
118 976
        $bodyLength = $this->source->length;
119
120 976
        $this->positionAfterWhitespace();
121 976
        $position = $this->position;
122
123 976
        $line = $this->line;
124 976
        $col  = 1 + $position - $this->lineStart;
125
126 976
        if ($position >= $bodyLength) {
127 918
            return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev);
128
        }
129
130
        // Read next char and advance string cursor:
131 976
        [, $code, $bytes] = $this->readChar(true);
132
133
        switch ($code) {
134 976
            case 33: // !
135 115
                return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
136 976
            case 35: // #
137 19
                $this->moveStringCursor(-1, -1 * $bytes);
138
139 19
                return $this->readComment($line, $col, $prev);
140 976
            case 36: // $
141 134
                return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
142 976
            case 38: // &
143 8
                return new Token(Token::AMP, $position, $position + 1, $line, $col, $prev);
144 976
            case 40: // (
145 436
                return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
146 976
            case 41: // )
147 434
                return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
148 976
            case 46: // .
149 193
                [, $charCode1] = $this->readChar(true);
150 193
                [, $charCode2] = $this->readChar(true);
151
152 193
                if ($charCode1 === 46 && $charCode2 === 46) {
153 191
                    return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev);
154
                }
155 2
                break;
156 973
            case 58: // :
157 564
                return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
158 973
            case 61: // =
159 116
                return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
160 973
            case 64: // @
161 125
                return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
162 973
            case 91: // [
163 121
                return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
164 973
            case 93: // ]
165 120
                return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
166 973
            case 123: // {
167 894
                return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
168 972
            case 124: // |
169 73
                return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
170 972
            case 125: // }
171 888
                return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
172
            // A-Z
173 971
            case 65:
174 971
            case 66:
175 971
            case 67:
176 971
            case 68:
177 971
            case 69:
178 971
            case 70:
179 971
            case 71:
180 971
            case 72:
181 971
            case 73:
182 971
            case 74:
183 971
            case 75:
184 971
            case 76:
185 971
            case 77:
186 967
            case 78:
187 967
            case 79:
188 967
            case 80:
189 967
            case 81:
190 967
            case 82:
191 967
            case 83:
192 966
            case 84:
193 965
            case 85:
194 965
            case 86:
195 965
            case 87:
196 965
            case 88:
197 965
            case 89:
198 965
            case 90:
199
                // _
200 965
            case 95:
201
                // a-z
202 965
            case 97:
203 962
            case 98:
204 961
            case 99:
205 952
            case 100:
206 950
            case 101:
207 946
            case 102:
208 904
            case 103:
209 903
            case 104:
210 901
            case 105:
211 887
            case 106:
212 887
            case 107:
213 887
            case 108:
214 887
            case 109:
215 882
            case 110:
216 874
            case 111:
217 794
            case 112:
218 786
            case 113:
219 645
            case 114:
220 639
            case 115:
221 573
            case 116:
222 401
            case 117:
223 311
            case 118:
224 290
            case 119:
225 258
            case 120:
226 241
            case 121:
227 233
            case 122:
228 932
                return $this->moveStringCursor(-1, -1 * $bytes)
229 932
                    ->readName($line, $col, $prev);
230
            // -
231 233
            case 45:
232
                // 0-9
233 229
            case 48:
234 227
            case 49:
235 195
            case 50:
236 182
            case 51:
237 178
            case 52:
238 162
            case 53:
239 158
            case 54:
240 155
            case 55:
241 155
            case 56:
242 153
            case 57:
243 111
                return $this->moveStringCursor(-1, -1 * $bytes)
244 111
                    ->readNumber($line, $col, $prev);
245
            // "
246 152
            case 34:
247 143
                [, $nextCode]     = $this->readChar();
248 143
                [, $nextNextCode] = $this->moveStringCursor(1, 1)->readChar();
249
250 143
                if ($nextCode === 34 && $nextNextCode === 34) {
251 25
                    return $this->moveStringCursor(-2, (-1 * $bytes) - 1)
252 25
                        ->readBlockString($line, $col, $prev);
253
                }
254
255 128
                return $this->moveStringCursor(-2, (-1 * $bytes) - 1)
256 128
                    ->readString($line, $col, $prev);
257
        }
258
259 11
        throw new SyntaxError(
260 11
            $this->source,
261 11
            $position,
262 11
            $this->unexpectedCharacterMessage($code)
263
        );
264
    }
265
266 11
    private function unexpectedCharacterMessage($code)
267
    {
268
        // SourceCharacter
269 11
        if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
270 1
            return 'Cannot contain the invalid character ' . Utils::printCharCode($code);
271
        }
272
273 10
        if ($code === 39) {
274
            return "Unexpected single quote character ('), did you mean to use " .
275 1
                'a double quote (")?';
276
        }
277
278 9
        return 'Cannot parse the unexpected character ' . Utils::printCharCode($code) . '.';
279
    }
280
281
    /**
282
     * Reads an alphanumeric + underscore name from the source.
283
     *
284
     * [_A-Za-z][_0-9A-Za-z]*
285
     *
286
     * @param int $line
287
     * @param int $col
288
     *
289
     * @return Token
290
     */
291 932
    private function readName($line, $col, Token $prev)
292
    {
293 932
        $value         = '';
294 932
        $start         = $this->position;
295 932
        [$char, $code] = $this->readChar();
296
297 932
        while ($code && (
298 932
                $code === 95 || // _
299 932
                $code >= 48 && $code <= 57 || // 0-9
300 932
                $code >= 65 && $code <= 90 || // A-Z
301 932
                $code >= 97 && $code <= 122 // a-z
302
            )) {
303 932
            $value        .= $char;
304 932
            [$char, $code] = $this->moveStringCursor(1, 1)->readChar();
305
        }
306
307 932
        return new Token(
308 932
            Token::NAME,
309 932
            $start,
310 932
            $this->position,
311 932
            $line,
312 932
            $col,
313 932
            $prev,
314 932
            $value
315
        );
316
    }
317
318
    /**
319
     * Reads a number token from the source file, either a float
320
     * or an int depending on whether a decimal point appears.
321
     *
322
     * Int:   -?(0|[1-9][0-9]*)
323
     * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
324
     *
325
     * @param int $line
326
     * @param int $col
327
     *
328
     * @return Token
329
     *
330
     * @throws SyntaxError
331
     */
332 111
    private function readNumber($line, $col, Token $prev)
333
    {
334 111
        $value         = '';
335 111
        $start         = $this->position;
336 111
        [$char, $code] = $this->readChar();
337
338 111
        $isFloat = false;
339
340 111
        if ($code === 45) { // -
341 6
            $value        .= $char;
342 6
            [$char, $code] = $this->moveStringCursor(1, 1)->readChar();
343
        }
344
345
        // guard against leading zero's
346 111
        if ($code === 48) { // 0
347 3
            $value        .= $char;
348 3
            [$char, $code] = $this->moveStringCursor(1, 1)->readChar();
349
350 3
            if ($code >= 48 && $code <= 57) {
351 1
                throw new SyntaxError(
352 1
                    $this->source,
353 1
                    $this->position,
354 3
                    'Invalid number, unexpected digit after 0: ' . Utils::printCharCode($code)
0 ignored issues
show
Bug introduced by
It seems like $code can also be of type string; however, parameter $code of GraphQL\Utils\Utils::printCharCode() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

354
                    'Invalid number, unexpected digit after 0: ' . Utils::printCharCode(/** @scrutinizer ignore-type */ $code)
Loading history...
355
                );
356
            }
357
        } else {
358 109
            $value        .= $this->readDigits();
359 107
            [$char, $code] = $this->readChar();
360
        }
361
362 108
        if ($code === 46) { // .
363 20
            $isFloat = true;
364 20
            $this->moveStringCursor(1, 1);
365
366 20
            $value        .= $char;
367 20
            $value        .= $this->readDigits();
368 17
            [$char, $code] = $this->readChar();
369
        }
370
371 105
        if ($code === 69 || $code === 101) { // E e
372 5
            $isFloat       = true;
373 5
            $value        .= $char;
374 5
            [$char, $code] = $this->moveStringCursor(1, 1)->readChar();
375
376 5
            if ($code === 43 || $code === 45) { // + -
377 1
                $value .= $char;
378 1
                $this->moveStringCursor(1, 1);
379
            }
380 5
            $value .= $this->readDigits();
381
        }
382
383 103
        return new Token(
384 103
            $isFloat ? Token::FLOAT : Token::INT,
385 103
            $start,
386 103
            $this->position,
387 103
            $line,
388 103
            $col,
389 103
            $prev,
390 103
            $value
391
        );
392
    }
393
394
    /**
395
     * Returns string with all digits + changes current string cursor position to point to the first char after digits
396
     */
397 109
    private function readDigits()
398
    {
399 109
        [$char, $code] = $this->readChar();
400
401 109
        if ($code >= 48 && $code <= 57) { // 0 - 9
402 107
            $value = '';
403
404
            do {
405 107
                $value        .= $char;
406 107
                [$char, $code] = $this->moveStringCursor(1, 1)->readChar();
407 107
            } while ($code >= 48 && $code <= 57); // 0 - 9
408
409 107
            return $value;
410
        }
411
412 7
        if ($this->position > $this->source->length - 1) {
413 2
            $code = null;
414
        }
415
416 7
        throw new SyntaxError(
417 7
            $this->source,
418 7
            $this->position,
419 7
            'Invalid number, expected digit but got: ' . Utils::printCharCode($code)
0 ignored issues
show
Bug introduced by
It seems like $code can also be of type string; however, parameter $code of GraphQL\Utils\Utils::printCharCode() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

419
            'Invalid number, expected digit but got: ' . Utils::printCharCode(/** @scrutinizer ignore-type */ $code)
Loading history...
420
        );
421
    }
422
423
    /**
424
     * @param int $line
425
     * @param int $col
426
     *
427
     * @return Token
428
     *
429
     * @throws SyntaxError
430
     */
431 128
    private function readString($line, $col, Token $prev)
432
    {
433 128
        $start = $this->position;
434
435
        // Skip leading quote and read first string char:
436 128
        [$char, $code, $bytes] = $this->moveStringCursor(1, 1)->readChar();
437
438 128
        $chunk = '';
439 128
        $value = '';
440
441 128
        while ($code !== null &&
442
            // not LineTerminator
443 128
            $code !== 10 && $code !== 13
444
        ) {
445
            // Closing Quote (")
446 127
            if ($code === 34) {
447 115
                $value .= $chunk;
448
449
                // Skip quote
450 115
                $this->moveStringCursor(1, 1);
451
452 115
                return new Token(
453 115
                    Token::STRING,
454 115
                    $start,
455 115
                    $this->position,
456 115
                    $line,
457 115
                    $col,
458 115
                    $prev,
459 115
                    $value
460
                );
461
            }
462
463 127
            $this->assertValidStringCharacterCode($code, $this->position);
464 127
            $this->moveStringCursor(1, $bytes);
0 ignored issues
show
Bug introduced by
It seems like $bytes can also be of type string; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

464
            $this->moveStringCursor(1, /** @scrutinizer ignore-type */ $bytes);
Loading history...
465
466 127
            if ($code === 92) { // \
467 11
                $value   .= $chunk;
468 11
                [, $code] = $this->readChar(true);
469
470
                switch ($code) {
471 11
                    case 34:
472 1
                        $value .= '"';
473 1
                        break;
474 11
                    case 47:
475
                        $value .= '/';
476
                        break;
477 11
                    case 92:
478 1
                        $value .= '\\';
479 1
                        break;
480 11
                    case 98:
481
                        $value .= chr(8);
482
                        break; // \b (backspace)
483 11
                    case 102:
484 1
                        $value .= "\f";
485 1
                        break;
486 11
                    case 110:
487
                        $value .= "\n";
488
                        break;
489 11
                    case 114:
490
                        $value .= "\r";
491
                        break;
492 11
                    case 116:
493 1
                        $value .= "\t";
494 1
                        break;
495 10
                    case 117:
496 8
                        $position = $this->position;
497 8
                        [$hex]    = $this->readChars(4, true);
498 8
                        if (! preg_match('/[0-9a-fA-F]{4}/', $hex)) {
499 5
                            throw new SyntaxError(
500 5
                                $this->source,
501 5
                                $position - 1,
502 5
                                'Invalid character escape sequence: \\u' . $hex
503
                            );
504
                        }
505 3
                        $code = hexdec($hex);
506 3
                        $this->assertValidStringCharacterCode($code, $position - 2);
507 1
                        $value .= Utils::chr($code);
508 1
                        break;
509
                    default:
510 2
                        throw new SyntaxError(
511 2
                            $this->source,
512 2
                            $this->position - 1,
513 2
                            'Invalid character escape sequence: \\' . Utils::chr($code)
514
                        );
515
                }
516 2
                $chunk = '';
517
            } else {
518 127
                $chunk .= $char;
519
            }
520
521 127
            [$char, $code, $bytes] = $this->readChar();
522
        }
523
524 4
        throw new SyntaxError(
525 4
            $this->source,
526 4
            $this->position,
527 4
            'Unterminated string.'
528
        );
529
    }
530
531
    /**
532
     * Reads a block string token from the source file.
533
     *
534
     * """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
535
     */
536 25
    private function readBlockString($line, $col, Token $prev)
537
    {
538 25
        $start = $this->position;
539
540
        // Skip leading quotes and read first string char:
541 25
        [$char, $code, $bytes] = $this->moveStringCursor(3, 3)->readChar();
542
543 25
        $chunk = '';
544 25
        $value = '';
545
546 25
        while ($code !== null) {
547
            // Closing Triple-Quote (""")
548 24
            if ($code === 34) {
549
                // Move 2 quotes
550 21
                [, $nextCode]     = $this->moveStringCursor(1, 1)->readChar();
551 21
                [, $nextNextCode] = $this->moveStringCursor(1, 1)->readChar();
552
553 21
                if ($nextCode === 34 && $nextNextCode === 34) {
554 21
                    $value .= $chunk;
555
556 21
                    $this->moveStringCursor(1, 1);
557
558 21
                    return new Token(
559 21
                        Token::BLOCK_STRING,
560 21
                        $start,
561 21
                        $this->position,
562 21
                        $line,
563 21
                        $col,
564 21
                        $prev,
565 21
                        BlockString::value($value)
566
                    );
567
                } else {
568
                    // move cursor back to before the first quote
569 5
                    $this->moveStringCursor(-2, -2);
570
                }
571
            }
572
573 24
            $this->assertValidBlockStringCharacterCode($code, $this->position);
574 24
            $this->moveStringCursor(1, $bytes);
0 ignored issues
show
Bug introduced by
It seems like $bytes can also be of type string; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

574
            $this->moveStringCursor(1, /** @scrutinizer ignore-type */ $bytes);
Loading history...
575
576 24
            [, $nextCode]         = $this->readChar();
577 24
            [, $nextNextCode]     = $this->moveStringCursor(1, 1)->readChar();
578 24
            [, $nextNextNextCode] = $this->moveStringCursor(1, 1)->readChar();
579
580
            // Escape Triple-Quote (\""")
581 24
            if ($code === 92 &&
582 24
                $nextCode === 34 &&
583 24
                $nextNextCode === 34 &&
584 24
                $nextNextNextCode === 34
585
            ) {
586 9
                $this->moveStringCursor(1, 1);
587 9
                $value .= $chunk . '"""';
588 9
                $chunk  = '';
589
            } else {
590 24
                $this->moveStringCursor(-2, -2);
591 24
                $chunk .= $char;
592
            }
593
594 24
            [$char, $code, $bytes] = $this->readChar();
595
        }
596
597 2
        throw new SyntaxError(
598 2
            $this->source,
599 2
            $this->position,
600 2
            'Unterminated string.'
601
        );
602
    }
603
604 127
    private function assertValidStringCharacterCode($code, $position)
605
    {
606
        // SourceCharacter
607 127
        if ($code < 0x0020 && $code !== 0x0009) {
608 2
            throw new SyntaxError(
609 2
                $this->source,
610 2
                $position,
611 2
                'Invalid character within String: ' . Utils::printCharCode($code)
612
            );
613
        }
614 127
    }
615
616 24
    private function assertValidBlockStringCharacterCode($code, $position)
617
    {
618
        // SourceCharacter
619 24
        if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
620 2
            throw new SyntaxError(
621 2
                $this->source,
622 2
                $position,
623 2
                'Invalid character within String: ' . Utils::printCharCode($code)
624
            );
625
        }
626 24
    }
627
628
    /**
629
     * Reads from body starting at startPosition until it finds a non-whitespace
630
     * or commented character, then places cursor to the position of that character.
631
     */
632 976
    private function positionAfterWhitespace()
633
    {
634 976
        while ($this->position < $this->source->length) {
635 976
            [, $code, $bytes] = $this->readChar();
636
637
            // Skip whitespace
638
            // tab | space | comma | BOM
639 976
            if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) {
640 903
                $this->moveStringCursor(1, $bytes);
0 ignored issues
show
Bug introduced by
It seems like $bytes can also be of type string; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

640
                $this->moveStringCursor(1, /** @scrutinizer ignore-type */ $bytes);
Loading history...
641 976
            } elseif ($code === 10) { // new line
642 730
                $this->moveStringCursor(1, $bytes);
643 730
                $this->line++;
644 730
                $this->lineStart = $this->position;
645 976
            } elseif ($code === 13) { // carriage return
646 1
                [, $nextCode, $nextBytes] = $this->moveStringCursor(1, $bytes)->readChar();
647
648 1
                if ($nextCode === 10) { // lf after cr
649 1
                    $this->moveStringCursor(1, $nextBytes);
650
                }
651 1
                $this->line++;
652 1
                $this->lineStart = $this->position;
653
            } else {
654 976
                break;
655
            }
656
        }
657 976
    }
658
659
    /**
660
     * Reads a comment token from the source file.
661
     *
662
     * #[\u0009\u0020-\uFFFF]*
663
     *
664
     * @param int $line
665
     * @param int $col
666
     *
667
     * @return Token
668
     */
669 19
    private function readComment($line, $col, Token $prev)
670
    {
671 19
        $start = $this->position;
672 19
        $value = '';
673 19
        $bytes = 1;
674
675
        do {
676 19
            [$char, $code, $bytes] = $this->moveStringCursor(1, $bytes)->readChar();
677 19
            $value                .= $char;
678 19
        } while ($code &&
679
        // SourceCharacter but not LineTerminator
680 19
        ($code > 0x001F || $code === 0x0009)
681
        );
682
683 19
        return new Token(
684 19
            Token::COMMENT,
685 19
            $start,
686 19
            $this->position,
687 19
            $line,
688 19
            $col,
689 19
            $prev,
690 19
            $value
691
        );
692
    }
693
694
    /**
695
     * Reads next UTF8Character from the byte stream, starting from $byteStreamPosition.
696
     *
697
     * @param bool $advance
698
     * @param int  $byteStreamPosition
699
     *
700
     * @return (string|int)[]
701
     */
702 976
    private function readChar($advance = false, $byteStreamPosition = null)
703
    {
704 976
        if ($byteStreamPosition === null) {
705 976
            $byteStreamPosition = $this->byteStreamPosition;
706
        }
707
708 976
        $code           = null;
709 976
        $utf8char       = '';
710 976
        $bytes          = 0;
711 976
        $positionOffset = 0;
712
713 976
        if (isset($this->source->body[$byteStreamPosition])) {
714 976
            $ord = ord($this->source->body[$byteStreamPosition]);
715
716 976
            if ($ord < 128) {
717 974
                $bytes = 1;
718 5
            } elseif ($ord < 224) {
719 1
                $bytes = 2;
720 4
            } elseif ($ord < 240) {
721 4
                $bytes = 3;
722
            } else {
723
                $bytes = 4;
724
            }
725
726 976
            $utf8char = '';
727 976
            for ($pos = $byteStreamPosition; $pos < $byteStreamPosition + $bytes; $pos++) {
728 976
                $utf8char .= $this->source->body[$pos];
729
            }
730 976
            $positionOffset = 1;
731 976
            $code           = $bytes === 1 ? $ord : Utils::ord($utf8char);
732
        }
733
734 976
        if ($advance) {
735 976
            $this->moveStringCursor($positionOffset, $bytes);
736
        }
737
738 976
        return [$utf8char, $code, $bytes];
739
    }
740
741
    /**
742
     * Reads next $numberOfChars UTF8 characters from the byte stream, starting from $byteStreamPosition.
743
     *
744
     * @param int  $charCount
745
     * @param bool $advance
746
     * @param null $byteStreamPosition
0 ignored issues
show
Documentation Bug introduced by
Are you sure the doc-type for parameter $byteStreamPosition is correct as it would always require null to be passed?
Loading history...
747
     *
748
     * @return (string|int)[]
749
     */
750 8
    private function readChars($charCount, $advance = false, $byteStreamPosition = null)
751
    {
752 8
        $result     = '';
753 8
        $totalBytes = 0;
754 8
        $byteOffset = $byteStreamPosition ?: $this->byteStreamPosition;
0 ignored issues
show
introduced by
$byteStreamPosition is of type null, thus it always evaluated to false.
Loading history...
755
756 8
        for ($i = 0; $i < $charCount; $i++) {
757 8
            [$char, $code, $bytes] = $this->readChar(false, $byteOffset);
758 8
            $totalBytes           += $bytes;
759 8
            $byteOffset           += $bytes;
760 8
            $result               .= $char;
761
        }
762 8
        if ($advance) {
763 8
            $this->moveStringCursor($charCount, $totalBytes);
764
        }
765
766 8
        return [$result, $totalBytes];
767
    }
768
769
    /**
770
     * Moves internal string cursor position
771
     *
772
     * @param int $positionOffset
773
     * @param int $byteStreamOffset
774
     *
775
     * @return self
776
     */
777 976
    private function moveStringCursor($positionOffset, $byteStreamOffset)
778
    {
779 976
        $this->position           += $positionOffset;
780 976
        $this->byteStreamPosition += $byteStreamOffset;
781
782 976
        return $this;
783
    }
784
}
785