Lexer - Code Metrics - Inspection of "Lexer optimizations" - digiaonline/graphql-php - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#196)

by Christoffer

created 2018-04-11 20:45 UTC

Lexer F

↳ Parent: Project

Complexity

Total Complexity

111

Size/Duplication

Total Lines	698
Duplicated Lines	0 %

Importance

Changes

Metric	Value
dl	0
loc	698
rs	1.2694
c	0
b	0
f	0
wmc	111

26 Methods

Rating	Name	Size	Complexity
A	getOption()	3	1
A	__construct()	12	1
A	advance()	4	1
A	lookahead()	13	3
A	createStartOfFileToken()	3	1
C	lexNumber()	55	10
C	lexBlockString()	38	7
A	getSource()	3	1
A	skipDigits()	12	3
A	getToken()	3	1
A	getLastToken()	3	1
A	readCharCode()	19	4
D	skipWhitespace()	25	9
A	lexPunctuation()	7	2
A	lexName()	15	4
A	isEscapedTripleQuote()	6	4
A	isSpread()	5	3
A	isTripleQuote()	5	3
C	lexString()	79	17
C	readToken()	53	24
A	isString()	3	2
A	lexSpread()	3	1
A	lexComment()	16	2
A	createEndOfFileToken()	3	1
A	createSyntaxErrorException()	6	1
A	unexpectedCharacterMessage()	12	4

How to fix Complexity

<?php

namespace Digia\GraphQL\Language;

use Digia\GraphQL\Error\SyntaxErrorException;

class Lexer implements LexerInterface
{
    protected const ENCODING = 'UTF-8';

    /**
     * A map between punctuation character code and the corresponding token kind.
     *
     * @var array
     */
    protected static $codeTokenKindMap = [
        33  => TokenKindEnum::BANG,
        36  => TokenKindEnum::DOLLAR,
        38  => TokenKindEnum::AMP,
        40  => TokenKindEnum::PAREN_L,
        41  => TokenKindEnum::PAREN_R,
        58  => TokenKindEnum::COLON,
        61  => TokenKindEnum::EQUALS,
        64  => TokenKindEnum::AT,
        91  => TokenKindEnum::BRACKET_L,
        93  => TokenKindEnum::BRACKET_R,
        123 => TokenKindEnum::BRACE_L,
        124 => TokenKindEnum::PIPE,
        125 => TokenKindEnum::BRACE_R,
    ];

    /**
     * The source file for this lexer.
     *
     * @var Source
     */
    protected $source;

    /**
     * The contents of the source file.
     *
     * @var string
     */
    protected $body;

    /**
     * The total number of characters in the source file.
     *
     * @var int
     */
    protected $bodyLength;

    /**
     * The options for this lexer.
     *
     * @var array
     */
    protected $options = [];

    /**
     * The previously focused non-ignored token.
     *
     * @var Token
     */
    protected $lastToken;

    /**
     * The currently focused non-ignored token.
     *
     * @var Token
     */
    protected $token;

    /**
     * The current position.
     *
     * @var int
     */
    protected $pos;

    /**
     * The (1-indexed) line containing the current token.
     *
     * @var int
     */
    protected $line;

    /**
     * The character offset at which the current line begins.
     *
     * @var int
     */
    protected $lineStart;

    /**
     * @var array
     */
    protected static $charCodeCache = [];

    /**
     * Lexer constructor.
     * @param Source|null $source
     * @param array       $options
     */
    public function __construct(Source $source, array $options)
    {
        $startOfFileToken = $this->createStartOfFileToken();

        $this->lastToken  = $startOfFileToken;
        $this->token      = $startOfFileToken;
        $this->line       = 1;
        $this->lineStart  = 0;
        $this->body       = $source->getBody();
        $this->bodyLength = \strlen($this->body);
        $this->source     = $source;
        $this->options    = $options;
    }

    /**
     * @inheritdoc
     * @throws SyntaxErrorException
     */
    public function advance(): Token
    {
        $this->lastToken = $this->token;
        return $this->token = $this->lookahead();
    }

    /**
     * @inheritdoc
     * @throws SyntaxErrorException
     */
    public function lookahead(): Token
    {
        $token = $this->token;

        if (TokenKindEnum::EOF !== $token->getKind()) {
            do {
                $next = $this->readToken($token);
                $token->setNext($next);
                $token = $next;
            } while (TokenKindEnum::COMMENT === $token->getKind());
        }

        return $token;
    }

    /**
     * @inheritdoc
     */
    public function getOption(string $name, $default = null)
    {
        return $this->options[$name] ?? $default;
    }

    /**
     * @inheritdoc
     */
    public function getSource(): Source
    {
        return $this->source;
    }

    /**
     * @inheritdoc
     */
    public function getToken(): Token
    {
        return $this->token;
    }

    /**
     * @inheritdoc
     */
    public function getLastToken(): Token
    {
        return $this->lastToken;
    }

    /**
     * @inheritdoc
     */
    public function createSyntaxErrorException(?string $description = null): SyntaxErrorException
    {
        return new SyntaxErrorException(
            $this->source,
            $this->pos,
            $description ?? $this->unexpectedCharacterMessage($this->readCharCode($this->pos))
        );
    }

    /**
     * Reads the token after the given token.
     *
     * @param Token $prev
     * @return Token
     * @throws SyntaxErrorException
     */
    protected function readToken(Token $prev): Token
    {
        $this->pos = $prev->getEnd();

        $this->skipWhitespace();

        $line = $this->line;
        $col  = (1 + $this->pos) - $this->lineStart;

        if ($this->pos >= $this->bodyLength) {
            return $this->createEndOfFileToken($line, $col, $prev);
        }

        $code = $this->readCharCode($this->pos);

        // Punctuation: [!$&:=@|()\[\]{}]{1}
        if (33 === $code || 36 === $code || 38 === $code || 58 === $code || 61 === $code || 64 === $code || 124 === $code ||
            40 === $code || 41 === $code || 91 === $code || 93 === $code || 123 === $code || 125 === $code) {
            return $this->lexPunctuation($code, $line, $col, $prev);
        }

        // Comment: #[\u0009\u0020-\uFFFF]*
        if (35 === $code) {
            return $this->lexComment($line, $col, $prev);
        }

        // Int:   -?(0|[1-9][0-9]*)
        // Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
        if (45 === $code || isNumber($code)) {
            return $this->lexNumber($code, $line, $col, $prev);
        }

        // Name: [_A-Za-z][_0-9A-Za-z]*
        if (isAlphaNumeric($code)) {
            return $this->lexName($line, $col, $prev);
        }

        // Spread: ...
        if ($this->bodyLength >= 3 && $this->isSpread($code)) {
            return $this->lexSpread($line, $col, $prev);
        }

        // String: "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
        if ($this->isString($code)) {
            return $this->lexString($line, $col, $prev);
        }

        // Block String: """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
        if ($this->bodyLength >= 3 && $this->isTripleQuote($code)) {
            return $this->lexBlockString($line, $col, $prev);
        }

        throw $this->createSyntaxErrorException();
    }

    /**
     * @return Token
     */
    protected function createStartOfFileToken(): Token
    {
        return new Token(TokenKindEnum::SOF);
    }

    /**
     * Creates an End Of File (EOF) token.
     *
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     */
    protected function createEndOfFileToken(int $line, int $col, Token $prev): Token
    {
        return new Token(TokenKindEnum::EOF, $this->bodyLength, $this->bodyLength, $line, $col, $prev);
    }

    /**
     * Reads a punctuation token from the source file.
     *
     * @param int   $code
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     * @throws SyntaxErrorException
     */
    protected function lexPunctuation(int $code, int $line, int $col, Token $prev): ?Token
    {
        if (!isset(self::$codeTokenKindMap[$code])) {
            throw $this->createSyntaxErrorException();
        }

        return new Token(self::$codeTokenKindMap[$code], $this->pos, $this->pos + 1, $line, $col, $prev);
    }

    /**
     * Reads a name token from the source file.
     *
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     */
    protected function lexName(int $line, int $col, Token $prev): Token
    {
        $start = $this->pos;

        ++$this->pos;

        while ($this->pos !== $this->bodyLength &&
            ($code = $this->readCharCode($this->pos)) !== null &&
            isAlphaNumeric($code)) {
            ++$this->pos;
        }

        $value = sliceString($this->body, $start, $this->pos);

        return new Token(TokenKindEnum::NAME, $start, $this->pos, $line, $col, $prev, $value);
    }

    /**
     * Reads a number (int or float) token from the source file.
     *
     * @param int   $code
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     * @throws SyntaxErrorException
     */
    protected function lexNumber(int $code, int $line, int $col, Token $prev): Token
    {
        $start   = $this->pos;
        $isFloat = false;

        if (45 === $code) {
            // -
            $code = $this->readCharCode(++$this->pos);
        }

        if (48 === $code) {
            // 0
            $code = $this->readCharCode(++$this->pos);

            if (isNumber($code)) {
                throw $this->createSyntaxErrorException(
                    \sprintf('Invalid number, unexpected digit after 0: %s.', printCharCode($code))
                );
            }
        } else {
            $this->skipDigits($code);
            $code = $this->readCharCode($this->pos);
        }

        if (46 === $code) {
            // .
            $isFloat = true;

            $code = $this->readCharCode(++$this->pos);
            $this->skipDigits($code);
            $code = $this->readCharCode($this->pos);
        }

        if (69 === $code || 101 === $code) {
            // e or E
            $isFloat = true;

            $code = $this->readCharCode(++$this->pos);

            if (43 === $code || 45 === $code) {
                // + or -
                $code = $this->readCharCode(++$this->pos);
            }

            $this->skipDigits($code);
        }

        return new Token(
            $isFloat ? TokenKindEnum::FLOAT : TokenKindEnum::INT,
            $start,
            $this->pos,
            $line,
            $col,
            $prev,
            sliceString($this->body, $start, $this->pos)
        );
    }

    /**
     * Skips digits at the current position.
     *
     * @param int $code
     * @throws SyntaxErrorException
     */
    protected function skipDigits(int $code): void
    {
        if (isNumber($code)) {
            do {
                $code = $this->readCharCode(++$this->pos);
            } while (isNumber($code));

            return;
        }

        throw $this->createSyntaxErrorException(
            \sprintf('Invalid number, expected digit but got: %s.', printCharCode($code))
        );
    }

    /**
     * Reads a comment token from the source file.
     *
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     */
    protected function lexComment(int $line, int $col, Token $prev): Token
    {
        $start = $this->pos;

        do {
            $code = $this->readCharCode(++$this->pos);
        } while ($code !== null && ($code > 0x001f || 0x0009 === $code)); // SourceCharacter but not LineTerminator

        return new Token(
            TokenKindEnum::COMMENT,
            $start,
            $this->pos,
            $line,
            $col,
            $prev,
            sliceString($this->body, $start + 1, $this->pos)
        );
    }

    /**
     * Reads a spread token from the source.
     *
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     */
    protected function lexSpread(int $line, int $col, Token $prev): Token
    {
        return new Token(TokenKindEnum::SPREAD, $this->pos, $this->pos + 3, $line, $col, $prev);
    }

    /**
     * Reads a string token from the source.
     *
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     * @throws SyntaxErrorException
     */
    protected function lexString(int $line, int $col, Token $prev): Token
    {
        $start      = $this->pos;
        $chunkStart = ++$this->pos; // skip the quote
        $value      = '';

        while ($this->pos < $this->bodyLength &&
            ($code = $this->readCharCode($this->pos)) !== null && !isLineTerminator($code)) {
            // Closing Quote (")
            if (34 === $code) {
                $value .= sliceString($this->body, $chunkStart, $this->pos);
                return new Token(TokenKindEnum::STRING, $start, $this->pos + 1, $line, $col, $prev, $value);
            }

            if (isSourceCharacter($code)) {
                throw $this->createSyntaxErrorException(
                    \sprintf('Invalid character within String: %s.', printCharCode($code))
                );
            }

            ++$this->pos;

            if (92 === $code) {
                // \
                $value .= sliceString($this->body, $chunkStart, $this->pos - 1);

                $code = $this->readCharCode($this->pos);

                switch ($code) {
                    case 34: // "
                        $value .= '"';
                        break;
                    case 47: // /
                        $value .= '/';
                        break;
                    case 92: // \
                        $value .= '\\';
                        break;
                    case 98: // b
                        $value .= '\b';
                        break;
                    case 102: // f
                        $value .= '\f';
                        break;
                    case 110: // n
                        $value .= '\n';
                        break;
                    case 114: // r
                        $value .= '\r';
                        break;
                    case 116: // t
                        $value .= '\t';
                        break;
                    case 117: // u
                        $unicodeString = sliceString($this->body, $this->pos + 1, $this->pos + 5);

                        if (!\preg_match('/[0-9A-Fa-f]{4}/', $unicodeString)) {
                            throw $this->createSyntaxErrorException(
                                \sprintf('Invalid character escape sequence: \\u%s.', $unicodeString)
                            );
                        }

                        $value     .= '\\u' . $unicodeString;
                        $this->pos += 4;

                        break;
                    default:
                        throw $this->createSyntaxErrorException(
                            \sprintf('Invalid character escape sequence: \\%s.', \chr($code))
                        );
                }

                ++$this->pos;

                $chunkStart = $this->pos;
            }
        }

        throw $this->createSyntaxErrorException('Unterminated string.');
    }

    /**
     * Reads a block string token from the source file.
     *
     * @param int   $line
     * @param int   $col
     * @param Token $prev
     * @return Token
     * @throws SyntaxErrorException
     */
    protected function lexBlockString(int $line, int $col, Token $prev): Token
    {
        $start      = $this->pos;
        $this->pos  = $start + 3; // skip the triple-quote
        $chunkStart = $this->pos;
        $rawValue   = '';

        while ($this->pos < $this->bodyLength && ($code = $this->readCharCode($this->pos)) !== null) {
            // Closing Triple-Quote (""")
            if ($this->isTripleQuote($code)) {
                $rawValue .= sliceString($this->body, $chunkStart, $this->pos);
                return new Token(
                    TokenKindEnum::BLOCK_STRING,
                    $start,
                    $this->pos + 3,
                    $line,
                    $col,
                    $prev,
                    blockStringValue($rawValue)
                );
            }

            if (isSourceCharacter($code) && !isLineTerminator($code)) {
                throw $this->createSyntaxErrorException(
                    \sprintf('Invalid character within String: %s.', printCharCode($code))
                );
            }

            if ($this->isEscapedTripleQuote($code)) {
                $rawValue   .= sliceString($this->body, $chunkStart, $this->pos) . '"""';
                $this->pos  += 4;
                $chunkStart = $this->pos;
            } else {
                ++$this->pos;
            }
        }

        throw $this->createSyntaxErrorException('Unterminated string.');
    }

    /**
     * Skips whitespace at the current position.
     */
    protected function skipWhitespace(): void
    {
        while ($this->pos < $this->bodyLength) {
            $code = $this->readCharCode($this->pos);

            if (9 === $code || 32 === $code || 44 === $code || 0xfeff === $code) {
                // tab | space | comma | BOM
                ++$this->pos;
            } elseif (10 === $code) {
                // new line (\n)
                ++$this->pos;
                ++$this->line;
                $this->lineStart = $this->pos;
            } elseif (13 === $code) {
                // carriage return (\r)
                if (10 === $this->readCharCode($this->pos + 1)) {
                    // carriage return and new line (\r\n)
                    $this->pos += 2;
                } else {
                    ++$this->pos;
                }
                ++$this->line;
                $this->lineStart = $this->pos;
            } else {
                break;
            }
        }
    }

    /**
     * @param int $pos
     * @return int
     */
    protected function readCharCode(int $pos): int
    {
        $char = \mb_substr($this->body, $pos, 1, self::ENCODING);

        if ('' === $char) {
            return 0;
        }

        if (!isset(self::$charCodeCache[$char])) {
            $code = \ord($char);

            if ($code >= 128) {
                $code = \mb_ord($char, self::ENCODING);
            }

            self::$charCodeCache[$char] = $code;
        }

        return self::$charCodeCache[$char];
    }

    /**
     * Report a message that an unexpected character was encountered.
     *
     * @param int $code
     * @return string
     */
    protected function unexpectedCharacterMessage(int $code): string
    {
        if (isSourceCharacter($code) && !isLineTerminator($code)) {
            return \sprintf('Cannot contain the invalid character %s.', printCharCode($code));
        }

        if ($code === 39) {
            // '
            return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
        }

        return \sprintf('Cannot parse the unexpected character %s.', printCharCode($code));
    }

    /**
     * @param int $code
     * @return bool
     */
    protected function isSpread(int $code): bool
    {
        return 46 === $code &&
            $this->readCharCode($this->pos + 1) === 46 &&
            $this->readCharCode($this->pos + 2) === 46; // ...
    }

    /**
     * @param int $code
     * @return bool
     */
    protected function isString(int $code): bool
    {
        return 34 === $code && $this->readCharCode($this->pos + 1) !== 34;
    }

    /**
     * @param int $code
     * @return bool
     */
    protected function isTripleQuote(int $code): bool
    {
        return 34 === $code &&
            34 === $this->readCharCode($this->pos + 1) &&
            34 === $this->readCharCode($this->pos + 2); // """
    }

    /**
     * @param int $code
     * @return bool
     */
    protected function isEscapedTripleQuote(int $code): bool
    {
        return $code === 92 &&
            34 === $this->readCharCode($this->pos + 1) &&
            34 === $this->readCharCode($this->pos + 2) &&
            34 === $this->readCharCode($this->pos + 3); // \"""
    }
}


1			<?php
2
3			namespace Digia\GraphQL\Language;
4
5			use Digia\GraphQL\Error\SyntaxErrorException;
6
7			class Lexer implements LexerInterface
8			{
9			protected const ENCODING = 'UTF-8';
10
11			/**
12			* A map between punctuation character code and the corresponding token kind.
13			*
14			* @var array
15			*/
16			protected static $codeTokenKindMap = [
17			33 => TokenKindEnum::BANG,
18			36 => TokenKindEnum::DOLLAR,
19			38 => TokenKindEnum::AMP,
20			40 => TokenKindEnum::PAREN_L,
21			41 => TokenKindEnum::PAREN_R,
22			58 => TokenKindEnum::COLON,
23			61 => TokenKindEnum::EQUALS,
24			64 => TokenKindEnum::AT,
25			91 => TokenKindEnum::BRACKET_L,
26			93 => TokenKindEnum::BRACKET_R,
27			123 => TokenKindEnum::BRACE_L,
28			124 => TokenKindEnum::PIPE,
29			125 => TokenKindEnum::BRACE_R,
30			];
31
32			/**
33			* The source file for this lexer.
34			*
35			* @var Source
36			*/
37			protected $source;
38
39			/**
40			* The contents of the source file.
41			*
42			* @var string
43			*/
44			protected $body;
45
46			/**
47			* The total number of characters in the source file.
48			*
49			* @var int
50			*/
51			protected $bodyLength;
52
53			/**
54			* The options for this lexer.
55			*
56			* @var array
57			*/
58			protected $options = [];
59
60			/**
61			* The previously focused non-ignored token.
62			*
63			* @var Token
64			*/
65			protected $lastToken;
66
67			/**
68			* The currently focused non-ignored token.
69			*
70			* @var Token
71			*/
72			protected $token;
73
74			/**
75			* The current position.
76			*
77			* @var int
78			*/
79			protected $pos;
80
81			/**
82			* The (1-indexed) line containing the current token.
83			*
84			* @var int
85			*/
86			protected $line;
87
88			/**
89			* The character offset at which the current line begins.
90			*
91			* @var int
92			*/
93			protected $lineStart;
94
95			/**
96			* @var array
97			*/
98			protected static $charCodeCache = [];
99
100			/**
101			* Lexer constructor.
102			* @param Source\|null $source
103			* @param array $options
104			*/
105			public function __construct(Source $source, array $options)
106			{
107			$startOfFileToken = $this->createStartOfFileToken();
108
109			$this->lastToken = $startOfFileToken;
110			$this->token = $startOfFileToken;
111			$this->line = 1;
112			$this->lineStart = 0;
113			$this->body = $source->getBody();
114			$this->bodyLength = \strlen($this->body);
115			$this->source = $source;
116			$this->options = $options;
117			}
118
119			/**
120			* @inheritdoc
121			* @throws SyntaxErrorException
122			*/
123			public function advance(): Token
124			{
125			$this->lastToken = $this->token;
126			return $this->token = $this->lookahead();
127			}
128
129			/**
130			* @inheritdoc
131			* @throws SyntaxErrorException
132			*/
133			public function lookahead(): Token
134			{
135			$token = $this->token;
136
137			if (TokenKindEnum::EOF !== $token->getKind()) {
138			do {
139			$next = $this->readToken($token);
140			$token->setNext($next);
141			$token = $next;
142			} while (TokenKindEnum::COMMENT === $token->getKind());
143			}
144
145			return $token;
146			}
147
148			/**
149			* @inheritdoc
150			*/
151			public function getOption(string $name, $default = null)
152			{
153			return $this->options[$name] ?? $default;
154			}
155
156			/**
157			* @inheritdoc
158			*/
159			public function getSource(): Source
160			{
161			return $this->source;
162			}
163
164			/**
165			* @inheritdoc
166			*/
167			public function getToken(): Token
168			{
169			return $this->token;
170			}
171
172			/**
173			* @inheritdoc
174			*/
175			public function getLastToken(): Token
176			{
177			return $this->lastToken;
178			}
179
180			/**
181			* @inheritdoc
182			*/
183			public function createSyntaxErrorException(?string $description = null): SyntaxErrorException
184			{
185			return new SyntaxErrorException(
186			$this->source,
187			$this->pos,
188			$description ?? $this->unexpectedCharacterMessage($this->readCharCode($this->pos))
189			);
190			}
191
192			/**
193			* Reads the token after the given token.
194			*
195			* @param Token $prev
196			* @return Token
197			* @throws SyntaxErrorException
198			*/
199			protected function readToken(Token $prev): Token
200			{
201			$this->pos = $prev->getEnd();
202
203			$this->skipWhitespace();
204
205			$line = $this->line;
206			$col = (1 + $this->pos) - $this->lineStart;
207
208			if ($this->pos >= $this->bodyLength) {
209			return $this->createEndOfFileToken($line, $col, $prev);
210			}
211
212			$code = $this->readCharCode($this->pos);
213
214			// Punctuation: [!$&:=@\|()\[\]{}]{1}
215			if (33 === $code \|\| 36 === $code \|\| 38 === $code \|\| 58 === $code \|\| 61 === $code \|\| 64 === $code \|\| 124 === $code \|\|
216			40 === $code \|\| 41 === $code \|\| 91 === $code \|\| 93 === $code \|\| 123 === $code \|\| 125 === $code) {
217			return $this->lexPunctuation($code, $line, $col, $prev);
218			}
219
220			// Comment: #[\u0009\u0020-\uFFFF]*
221			if (35 === $code) {
222			return $this->lexComment($line, $col, $prev);
223			}
224
225			// Int: -?(0\|[1-9][0-9]*)
226			// Float: -?(0\|[1-9][0-9]*)(\.[0-9]+)?((E\|e)(+\|-)?[0-9]+)?
227			if (45 === $code \|\| isNumber($code)) {
228			return $this->lexNumber($code, $line, $col, $prev);
229			}
230
231			// Name: [_A-Za-z][_0-9A-Za-z]*
232			if (isAlphaNumeric($code)) {
233			return $this->lexName($line, $col, $prev);
234			}
235
236			// Spread: ...
237			if ($this->bodyLength >= 3 && $this->isSpread($code)) {
238			return $this->lexSpread($line, $col, $prev);
239			}
240
241			// String: "([^"\\\u000A\u000D]\|(\\(u[0-9a-fA-F]{4}\|["\\/bfnrt])))*"
242			if ($this->isString($code)) {
243			return $this->lexString($line, $col, $prev);
244			}
245
246			// Block String: """("?"?(\\"""\|\\(?!=""")\|[^"\\]))*"""
247			if ($this->bodyLength >= 3 && $this->isTripleQuote($code)) {
248			return $this->lexBlockString($line, $col, $prev);
249			}
250
251			throw $this->createSyntaxErrorException();
252			}
253
254			/**
255			* @return Token
256			*/
257			protected function createStartOfFileToken(): Token
258			{
259			return new Token(TokenKindEnum::SOF);
260			}
261
262			/**
263			* Creates an End Of File (EOF) token.
264			*
265			* @param int $line
266			* @param int $col
267			* @param Token $prev
268			* @return Token
269			*/
270			protected function createEndOfFileToken(int $line, int $col, Token $prev): Token
271			{
272			return new Token(TokenKindEnum::EOF, $this->bodyLength, $this->bodyLength, $line, $col, $prev);
273			}
274
275			/**
276			* Reads a punctuation token from the source file.
277			*
278			* @param int $code
279			* @param int $line
280			* @param int $col
281			* @param Token $prev
282			* @return Token
283			* @throws SyntaxErrorException
284			*/
285			protected function lexPunctuation(int $code, int $line, int $col, Token $prev): ?Token
286			{
287			if (!isset(self::$codeTokenKindMap[$code])) {
288			throw $this->createSyntaxErrorException();
289			}
290
291			return new Token(self::$codeTokenKindMap[$code], $this->pos, $this->pos + 1, $line, $col, $prev);
292			}
293
294			/**
295			* Reads a name token from the source file.
296			*
297			* @param int $line
298			* @param int $col
299			* @param Token $prev
300			* @return Token
301			*/
302			protected function lexName(int $line, int $col, Token $prev): Token
303			{
304			$start = $this->pos;
305
306			++$this->pos;
307
308			while ($this->pos !== $this->bodyLength &&
309			($code = $this->readCharCode($this->pos)) !== null &&
310			isAlphaNumeric($code)) {
311			++$this->pos;
312			}
313
314			$value = sliceString($this->body, $start, $this->pos);
315
316			return new Token(TokenKindEnum::NAME, $start, $this->pos, $line, $col, $prev, $value);
317			}
318
319			/**
320			* Reads a number (int or float) token from the source file.
321			*
322			* @param int $code
323			* @param int $line
324			* @param int $col
325			* @param Token $prev
326			* @return Token
327			* @throws SyntaxErrorException
328			*/
329			protected function lexNumber(int $code, int $line, int $col, Token $prev): Token
330			{
331			$start = $this->pos;
332			$isFloat = false;
333
334			if (45 === $code) {
335			// -
336			$code = $this->readCharCode(++$this->pos);
337			}
338
339			if (48 === $code) {
340			// 0
341			$code = $this->readCharCode(++$this->pos);
342
343			if (isNumber($code)) {
344			throw $this->createSyntaxErrorException(
345			\sprintf('Invalid number, unexpected digit after 0: %s.', printCharCode($code))
346			);
347			}
348			} else {
349			$this->skipDigits($code);
350			$code = $this->readCharCode($this->pos);
351			}
352
353			if (46 === $code) {
354			// .
355			$isFloat = true;
356
357			$code = $this->readCharCode(++$this->pos);
358			$this->skipDigits($code);
359			$code = $this->readCharCode($this->pos);
360			}
361
362			if (69 === $code \|\| 101 === $code) {
363			// e or E
364			$isFloat = true;
365
366			$code = $this->readCharCode(++$this->pos);
367
368			if (43 === $code \|\| 45 === $code) {
369			// + or -
370			$code = $this->readCharCode(++$this->pos);
371			}
372
373			$this->skipDigits($code);
374			}
375
376			return new Token(
377			$isFloat ? TokenKindEnum::FLOAT : TokenKindEnum::INT,
378			$start,
379			$this->pos,
380			$line,
381			$col,
382			$prev,
383			sliceString($this->body, $start, $this->pos)
384			);
385			}
386
387			/**
388			* Skips digits at the current position.
389			*
390			* @param int $code
391			* @throws SyntaxErrorException
392			*/
393			protected function skipDigits(int $code): void
394			{
395			if (isNumber($code)) {
396			do {
397			$code = $this->readCharCode(++$this->pos);
398			} while (isNumber($code));
399
400			return;
401			}
402
403			throw $this->createSyntaxErrorException(
404			\sprintf('Invalid number, expected digit but got: %s.', printCharCode($code))
405			);
406			}
407
408			/**
409			* Reads a comment token from the source file.
410			*
411			* @param int $line
412			* @param int $col
413			* @param Token $prev
414			* @return Token
415			*/
416			protected function lexComment(int $line, int $col, Token $prev): Token
417			{
418			$start = $this->pos;
419
420			do {
421			$code = $this->readCharCode(++$this->pos);
422			} while ($code !== null && ($code > 0x001f \|\| 0x0009 === $code)); // SourceCharacter but not LineTerminator
423
424			return new Token(
425			TokenKindEnum::COMMENT,
426			$start,
427			$this->pos,
428			$line,
429			$col,
430			$prev,
431			sliceString($this->body, $start + 1, $this->pos)
432			);
433			}
434
435			/**
436			* Reads a spread token from the source.
437			*
438			* @param int $line
439			* @param int $col
440			* @param Token $prev
441			* @return Token
442			*/
443			protected function lexSpread(int $line, int $col, Token $prev): Token
444			{
445			return new Token(TokenKindEnum::SPREAD, $this->pos, $this->pos + 3, $line, $col, $prev);
446			}
447
448			/**
449			* Reads a string token from the source.
450			*
451			* @param int $line
452			* @param int $col
453			* @param Token $prev
454			* @return Token
455			* @throws SyntaxErrorException
456			*/
457			protected function lexString(int $line, int $col, Token $prev): Token
458			{
459			$start = $this->pos;
460			$chunkStart = ++$this->pos; // skip the quote
461			$value = '';
462
463			while ($this->pos < $this->bodyLength &&
464			($code = $this->readCharCode($this->pos)) !== null && !isLineTerminator($code)) {
465			// Closing Quote (")
466			if (34 === $code) {
467			$value .= sliceString($this->body, $chunkStart, $this->pos);
468			return new Token(TokenKindEnum::STRING, $start, $this->pos + 1, $line, $col, $prev, $value);
469			}
470
471			if (isSourceCharacter($code)) {
472			throw $this->createSyntaxErrorException(
473			\sprintf('Invalid character within String: %s.', printCharCode($code))
474			);
475			}
476
477			++$this->pos;
478
479			if (92 === $code) {
480			// \
481			$value .= sliceString($this->body, $chunkStart, $this->pos - 1);
482
483			$code = $this->readCharCode($this->pos);
484
485			switch ($code) {
486			case 34: // "
487			$value .= '"';
488			break;
489			case 47: // /
490			$value .= '/';
491			break;
492			case 92: // \
493			$value .= '\\';
494			break;
495			case 98: // b
496			$value .= '\b';
497			break;
498			case 102: // f
499			$value .= '\f';
500			break;
501			case 110: // n
502			$value .= '\n';
503			break;
504			case 114: // r
505			$value .= '\r';
506			break;
507			case 116: // t
508			$value .= '\t';
509			break;
510			case 117: // u
511			$unicodeString = sliceString($this->body, $this->pos + 1, $this->pos + 5);
512
513			if (!\preg_match('/[0-9A-Fa-f]{4}/', $unicodeString)) {
514			throw $this->createSyntaxErrorException(
515			\sprintf('Invalid character escape sequence: \\u%s.', $unicodeString)
516			);
517			}
518
519			$value .= '\\u' . $unicodeString;
520			$this->pos += 4;
521
522			break;
523			default:
524			throw $this->createSyntaxErrorException(
525			\sprintf('Invalid character escape sequence: \\%s.', \chr($code))
526			);
527			}
528
529			++$this->pos;
530
531			$chunkStart = $this->pos;
532			}
533			}
534
535			throw $this->createSyntaxErrorException('Unterminated string.');
536			}
537
538			/**
539			* Reads a block string token from the source file.
540			*
541			* @param int $line
542			* @param int $col
543			* @param Token $prev
544			* @return Token
545			* @throws SyntaxErrorException
546			*/
547			protected function lexBlockString(int $line, int $col, Token $prev): Token
548			{
549			$start = $this->pos;
550			$this->pos = $start + 3; // skip the triple-quote
551			$chunkStart = $this->pos;
552			$rawValue = '';
553
554			while ($this->pos < $this->bodyLength && ($code = $this->readCharCode($this->pos)) !== null) {
555			// Closing Triple-Quote (""")
556			if ($this->isTripleQuote($code)) {
557			$rawValue .= sliceString($this->body, $chunkStart, $this->pos);
558			return new Token(
559			TokenKindEnum::BLOCK_STRING,
560			$start,
561			$this->pos + 3,
562			$line,
563			$col,
564			$prev,
565			blockStringValue($rawValue)
566			);
567			}
568
569			if (isSourceCharacter($code) && !isLineTerminator($code)) {
570			throw $this->createSyntaxErrorException(
571			\sprintf('Invalid character within String: %s.', printCharCode($code))
572			);
573			}
574
575			if ($this->isEscapedTripleQuote($code)) {
576			$rawValue .= sliceString($this->body, $chunkStart, $this->pos) . '"""';
577			$this->pos += 4;
578			$chunkStart = $this->pos;
579			} else {
580			++$this->pos;
581			}
582			}
583
584			throw $this->createSyntaxErrorException('Unterminated string.');
585			}
586
587			/**
588			* Skips whitespace at the current position.
589			*/
590			protected function skipWhitespace(): void
591			{
592			while ($this->pos < $this->bodyLength) {
593			$code = $this->readCharCode($this->pos);
594
595			if (9 === $code \|\| 32 === $code \|\| 44 === $code \|\| 0xfeff === $code) {
596			// tab \| space \| comma \| BOM
597			++$this->pos;
598			} elseif (10 === $code) {
599			// new line (\n)
600			++$this->pos;
601			++$this->line;
602			$this->lineStart = $this->pos;
603			} elseif (13 === $code) {
604			// carriage return (\r)
605			if (10 === $this->readCharCode($this->pos + 1)) {
606			// carriage return and new line (\r\n)
607			$this->pos += 2;
608			} else {
609			++$this->pos;
610			}
611			++$this->line;
612			$this->lineStart = $this->pos;
613			} else {
614			break;
615			}
616			}
617			}
618
619			/**
620			* @param int $pos
621			* @return int
622			*/
623			protected function readCharCode(int $pos): int
624			{
625			$char = \mb_substr($this->body, $pos, 1, self::ENCODING);
626
627			if ('' === $char) {
628			return 0;
629			}
630
631			if (!isset(self::$charCodeCache[$char])) {
632			$code = \ord($char);
633
634			if ($code >= 128) {
635			$code = \mb_ord($char, self::ENCODING);
636			}
637
638			self::$charCodeCache[$char] = $code;
639			}
640
641			return self::$charCodeCache[$char];
642			}
643
644			/**
645			* Report a message that an unexpected character was encountered.
646			*
647			* @param int $code
648			* @return string
649			*/
650			protected function unexpectedCharacterMessage(int $code): string
651			{
652			if (isSourceCharacter($code) && !isLineTerminator($code)) {
653			return \sprintf('Cannot contain the invalid character %s.', printCharCode($code));
654			}
655
656			if ($code === 39) {
657			// '
658			return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
659			}
660
661			return \sprintf('Cannot parse the unexpected character %s.', printCharCode($code));
662			}
663
664			/**
665			* @param int $code
666			* @return bool
667			*/
668			protected function isSpread(int $code): bool
669			{
670			return 46 === $code &&
671			$this->readCharCode($this->pos + 1) === 46 &&
672			$this->readCharCode($this->pos + 2) === 46; // ...
673			}
674
675			/**
676			* @param int $code
677			* @return bool
678			*/
679			protected function isString(int $code): bool
680			{
681			return 34 === $code && $this->readCharCode($this->pos + 1) !== 34;
682			}
683
684			/**
685			* @param int $code
686			* @return bool
687			*/
688			protected function isTripleQuote(int $code): bool
689			{
690			return 34 === $code &&
691			34 === $this->readCharCode($this->pos + 1) &&
692			34 === $this->readCharCode($this->pos + 2); // """
693			}
694
695			/**
696			* @param int $code
697			* @return bool
698			*/
699			protected function isEscapedTripleQuote(int $code): bool
700			{
701			return $code === 92 &&
702			34 === $this->readCharCode($this->pos + 1) &&
703			34 === $this->readCharCode($this->pos + 2) &&
704			34 === $this->readCharCode($this->pos + 3); // \"""
705			}
706			}
707

digiaonline / graphql-php

Pull Request — master (#196)

Lexer F

Complexity

Size/Duplication

Importance

26 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like