Issues (2)

src/Lexer.php (1 issue)

Labels
Severity
1
<?php
2
/**
3
 * This file is part of NACL.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 *
8
 * @copyright 2019 Nuglif (2018) Inc.
9
 * @license   http://www.opensource.org/licenses/mit-license.html  MIT License
10
 * @author    Pierrick Charron <[email protected]>
11
 * @author    Charle Demers <[email protected]>
12
 */
13
14
declare(strict_types=1);
15
16
namespace Nuglif\Nacl;
17
18
class Lexer extends AbstractLexer
19
{
20
    protected const STATE_INSTRING  = 1;
21
    protected const STATE_INHEREDOC = 2;
22
23
    public const REGEX_SPACE      = '[ \t\n\r]+';
24
    public const REGEX_COMMENT    = '(?://|\#).*';
25
    public const REGEX_COMMENT_ML = '/\*';
26
    public const REGEX_NAME       = '[A-Za-z_][A-Za-z0-9_-]*';
27
    public const REGEX_VAR        = '?:\${([A-Za-z0-9_]+)}';
28
    public const REGEX_NUM        = '(?:[0-9]*\.?[0-9]+|[0-9]+\.)(?:[eE](?:\+|-)?[0-9]+)?(?:m(?:in|s)|[KkGgMm][Bb]?|[b|s|h|d|w|y])?';
29
    public const REGEX_DQUOTE     = '"';
30
    public const REGEX_HEREDOC    = '?:<<<([A-Za-z0-9_]+)\n';
31
    public const REGEX_BOOL       = '(?:true|false|yes|no|on|off)\b';
32
    public const REGEX_NULL       = 'null\b';
33
    public const REGEX_TOKEN      = '[\[\]=:{};,.()&|%^/*+-]|<<|>>';
34
    public const REGEX_ANY        = '.';
35
36
    private string $textBuffer = '';
37
38 591
    protected function getRules(): array
39
    {
40 591
        return [
41 591
            self::STATE_INITIAL => [
42 591
                self::REGEX_SPACE      => false,
43 591
                self::REGEX_COMMENT    => false,
44 591
                self::REGEX_COMMENT_ML => function () {
45 2
                    $pos = strpos($this->content, '*/', $this->count);
46 2
                    if (false === $pos) {
47 1
                        $this->line += substr_count(substr($this->content, $this->count), "\n");
48 1
                        $this->error('Unterminated multiline comment');
49
                    }
50 1
                    $this->line += substr_count(substr($this->content, $this->count, (int) $pos - $this->count + 2), "\n");
51 1
                    $this->count = (int) $pos + 2;
52 591
                },
53 591
                self::REGEX_DQUOTE => function (): void {
54 565
                    $this->begin(self::STATE_INSTRING);
55 565
                    $this->textBuffer = '';
56 591
                },
57 591
                self::REGEX_BOOL => function (mixed &$yylval): int {
58 76
                    $yylval = TypeCaster::toBool($yylval);
59
60 76
                    return Token::T_BOOL;
61 591
                },
62 591
                self::REGEX_NULL => function (mixed &$yylval): int {
63 68
                    $yylval = null;
64
65 68
                    return Token::T_NULL;
66 591
                },
67 591
                self::REGEX_NUM => function (mixed &$yylval): int {
68 144
                    $yylval = TypeCaster::toNum($yylval);
69
70 144
                    return Token::T_NUM;
71 591
                },
72 591
                self::REGEX_NAME => fn() => Token::T_NAME,
73 591
                self::REGEX_HEREDOC => function (mixed &$yylval): int {
74 4
                    $needle = "\n" . $yylval;
75 4
                    $pos = strpos($this->content, $needle, $this->count);
76 4
                    if (false === $pos) {
77 1
                        $this->line += substr_count(substr($this->content, $this->count), "\n");
78 1
                        $this->error('Unterminated HEREDOC');
79
                    }
80
81 3
                    $yylval = substr($this->content, $this->count, (int) $pos - $this->count);
82 3
                    $this->line += substr_count($yylval, "\n") + 1;
83 3
                    $this->count += strlen($yylval) + strlen($needle);
84
85 3
                    return Token::T_END_STR;
86 591
                },
87 591
                self::REGEX_TOKEN => fn(mixed $yylval): string => $yylval,
88 591
                self::REGEX_VAR => fn(): int => Token::T_VAR,
89 591
                self::REGEX_ANY => function (mixed $yylval): void {
90
                    $this->error('Unexpected char \'' . $yylval . '\'');
91 591
                },
92 591
                self::EOF => fn(): int => Token::T_EOF,
93 591
            ],
94 591
            self::STATE_INSTRING => [
95 591
                '[^\\\"$]+' => function (mixed &$yylval) {
96 565
                    $this->textBuffer .= $yylval;
97 565
                    if ('$' == substr($this->content, $this->count, 1)) {
98 5
                        $yylval = $this->textBuffer;
99 5
                        $this->textBuffer = '';
100
101 5
                        return Token::T_STRING;
102
                    }
103 591
                },
104 591
                '?:\\\(.)' => function (mixed $yylval) {
105
                    switch ($yylval) {
106 72
                        case 'n':
107 2
                            $this->textBuffer .= "\n";
108 2
                            break;
109 72
                        case 't':
110 2
                            $this->textBuffer .= "\t";
111 2
                            break;
112 72
                        case '\\':
113 5
                        case '/':
114 5
                        case '"':
115 69
                            $this->textBuffer .= $yylval;
116 69
                            break;
117 3
                        case 'u':
118 2
                            $utfCode = substr($this->content, $this->count, 4);
119 2
                            if (preg_match('/[A-Fa-f0-9]{4,4}/', $utfCode)) {
120 2
                                $utf = hexdec($utfCode);
121 2
                                $this->count += 4;
122
                                // UTF-32 ?
123 2
                                if ($utf >= 0xD800 && $utf <= 0xDBFF && preg_match('/^\\\\u[dD][c-fC-F][0-9a-fA-F][0-9a-fA-F]/', substr($this->content, $this->count, 6), $matches)) {
124 1
                                    $utf_hi = hexdec(substr($matches[0], -4));
125 1
                                    $utf = (($utf & 0x3FF) << 10) + ($utf_hi & 0x3FF) + 0x10000;
126 1
                                    $this->count += 6;
127
                                }
128 2
                                $this->textBuffer .= $this->fromCharCode($utf);
0 ignored issues
show
It seems like $utf can also be of type double; however, parameter $bytes of Nuglif\Nacl\Lexer::fromCharCode() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

128
                                $this->textBuffer .= $this->fromCharCode(/** @scrutinizer ignore-type */ $utf);
Loading history...
129 2
                                break;
130
                            }
131
                            /* no break */
132
                        default:
133 1
                            $this->textBuffer .= '\\' . $yylval;
134 1
                            break;
135
                    }
136 591
                },
137 591
                '\$' => function (mixed &$yylval) {
138 7
                    if (preg_match('/^{([A-Za-z0-9_]+)}/', substr($this->content, $this->count), $matches)) {
139 7
                        $this->count += strlen($matches[0]);
140 7
                        $yylval = $matches[1];
141
142 7
                        return Token::T_ENCAPSED_VAR;
143
                    }
144
145
                    $this->textBuffer .= $yylval;
146 591
                },
147 591
                self::REGEX_DQUOTE => function (mixed &$yylval) {
148 564
                    $yylval = $this->textBuffer;
149 564
                    $this->begin(self::STATE_INITIAL);
150
151 564
                    return Token::T_END_STR;
152 591
                },
153 591
                self::EOF => function () {
154 1
                    $this->error('Unterminated string');
155 591
                },
156 591
            ],
157 591
        ];
158
    }
159
160 2
    private function fromCharCode(int $bytes): string
161
    {
162 2
        return match (true) {
163 2
            (0x7F & $bytes) == $bytes => chr($bytes),
164 2
            (0x07FF & $bytes) == $bytes => chr(0xc0 | ($bytes >> 6))
165 2
                 . chr(0x80 | ($bytes & 0x3F)),
166 2
            (0xFFFF & $bytes) == $bytes => chr(0xe0 | ($bytes >> 12))
167 2
                 . chr(0x80 | (($bytes >> 6) & 0x3F))
168 2
                 . chr(0x80 | ($bytes & 0x3F)),
169 2
            default => chr(0xF0 | ($bytes >> 18))
170 2
                 . chr(0x80 | (($bytes >> 12) & 0x3F))
171 2
                 . chr(0x80 | (($bytes >> 6) & 0x3F))
172 2
                 . chr(0x80 | ($bytes & 0x3F)),
173 2
        };
174
    }
175
}
176