Passed
Branch master (ef264b)
by Pierrick
01:53
created

Lexer   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 176
Duplicated Lines 0 %

Test Coverage

Coverage 94.29%

Importance

Changes 4
Bugs 0 Features 0
Metric Value
eloc 122
c 4
b 0
f 0
dl 0
loc 176
ccs 99
cts 105
cp 0.9429
rs 10
wmc 20

3 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A fromCharCode() 0 20 4
D getRules() 0 125 15
1
<?php
2
/**
3
 * This file is part of NACL.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 *
8
 * @copyright 2019 Nuglif (2018) Inc.
9
 * @license   http://www.opensource.org/licenses/mit-license.html  MIT License
10
 * @author    Pierrick Charron <[email protected]>
11
 * @author    Charle Demers <[email protected]>
12
 */
13
14
declare(strict_types=1);
15
16
namespace Nuglif\Nacl;
17
18
class Lexer extends AbstractLexer
19
{
20
    const STATE_INITIAL  = 0;
21
    const STATE_INSTRING  = 1;
22
    const STATE_INHEREDOC = 2;
23
24
    const REGEX_SPACE      = '[ \t\n\r]+';
25
    const REGEX_COMMENT    = '(?://|\#).*';
26
    const REGEX_COMMENT_ML = '/\*';
27
    const REGEX_NAME       = '[A-Za-z_][A-Za-z0-9_-]*';
28
    const REGEX_VAR        = '?:\${([A-Za-z0-9_]+)}';
29
    const REGEX_NUM        = '(?:[0-9]*\.?[0-9]+|[0-9]+\.)(?:[eE](?:\+|-)?[0-9]+)?(?:m(?:in|s)|[KkGgMm][Bb]?|[b|s|h|d|w|y])?';
30
    const REGEX_DQUOTE     = '"';
31
    const REGEX_HEREDOC    = '?:<<<([A-Za-z0-9_]+)\n';
32
    const REGEX_BOOL       = '(?:true|false|yes|no|on|off)\b';
33
    const REGEX_NULL       = 'null\b';
34
    const REGEX_TOKEN      = '[\[\]=:{};,.()&|%^/*+-]|<<|>>';
35
    const REGEX_ANY        = '.';
36
37
    private $textBuffer;
38
39 590
    public function __construct()
40
    {
41 590
        parent::__construct();
42 590
    }
43
44 590
    protected function getRules()
45
    {
46
        return [
47 590
            self::STATE_INITIAL => [
48 590
                self::REGEX_SPACE      => false,
49 590
                self::REGEX_COMMENT    => false,
50
                self::REGEX_COMMENT_ML => function () {
51 2
                    $pos = strpos($this->content, '*/', $this->count);
52 2
                    if (false === $pos) {
53 1
                        $this->line += substr_count(substr($this->content, $this->count), "\n");
54 1
                        $this->error('Unterminated multiline comment');
55
                    }
56 1
                    $this->line += substr_count(substr($this->content, $this->count, $pos - $this->count + 2), "\n");
57 1
                    $this->count = $pos + 2;
58 590
                },
59
                self::REGEX_DQUOTE => function () {
60 564
                    $this->begin(self::STATE_INSTRING);
61 564
                    $this->textBuffer = '';
62 590
                },
63
                self::REGEX_BOOL => function (&$yylval) {
64 76
                    $yylval = TypeCaster::toBool($yylval);
65
66 76
                    return Token::T_BOOL;
67 590
                },
68
                self::REGEX_NULL => function (&$yylval) {
69 68
                    $yylval = null;
70
71 68
                    return Token::T_NULL;
72 590
                },
73
                self::REGEX_NUM => function (&$yylval) {
74 144
                    $yylval = TypeCaster::toNum($yylval);
75
76 144
                    return Token::T_NUM;
77 590
                },
78
                self::REGEX_NAME => function () {
79 495
                    return Token::T_NAME;
80 590
                },
81
                self::REGEX_HEREDOC => function (&$yylval) {
82 4
                    $needle = "\n" . $yylval;
83 4
                    $pos = strpos($this->content, $needle, $this->count);
84 4
                    if (false === $pos) {
85 1
                        $this->line += substr_count(substr($this->content, $this->count), "\n");
86 1
                        $this->error('Unterminated HEREDOC');
87
                    }
88
89 3
                    $yylval = substr($this->content, $this->count, $pos - $this->count);
90 3
                    $this->line += substr_count($yylval, "\n") + 1;
91 3
                    $this->count += strlen($yylval) + strlen($needle);
92
93 3
                    return Token::T_END_STR;
94 590
                },
95
                self::REGEX_TOKEN => function ($yylval) {
96 570
                    return $yylval;
97 590
                },
98
                self::REGEX_VAR => function () {
99 9
                    return Token::T_VAR;
100 590
                },
101
                self::REGEX_ANY => function ($yylval) {
102
                    $this->error('Unexpected char \'' . $yylval . '\'');
103 590
                },
104
                self::EOF => function () {
105 583
                    return Token::T_EOF;
106 590
                },
107
            ],
108 590
            self::STATE_INSTRING => [
109
                '[^\\\"$]+' => function (&$yylval) {
110 564
                    $this->textBuffer .= $yylval;
111 564
                    if ('$' == substr($this->content, $this->count, 1)) {
112 5
                        $yylval = $this->textBuffer;
113 5
                        $this->textBuffer = '';
114
115 5
                        return Token::T_STRING;
116
                    }
117 590
                },
118
                '?:\\\(.)' => function ($yylval) {
119 72
                    switch ($yylval) {
120 72
                        case 'n':
121 2
                            $this->textBuffer .= "\n";
122 2
                            break;
123 72
                        case 't':
124 2
                            $this->textBuffer .= "\t";
125 2
                            break;
126 72
                        case '\\':
127 5
                        case '/':
128 5
                        case '"':
129 69
                            $this->textBuffer .= $yylval;
130 69
                            break;
131 3
                        case 'u':
132 2
                            $utfCode = substr($this->content, $this->count, 4);
133 2
                            if (preg_match('/[A-Fa-f0-9]{4,4}/', $utfCode)) {
134 2
                                $utf = hexdec($utfCode);
135 2
                                $this->count += 4;
136
                                // UTF-32 ?
137 2
                                if ($utf >= 0xD800 && $utf <= 0xDBFF && preg_match('/^\\\\u[dD][c-fC-F][0-9a-fA-F][0-9a-fA-F]/', substr($this->content, $this->count, 6), $matches)) {
138 1
                                    $utf_hi = hexdec(substr($matches[0], -4));
139 1
                                    $utf = (($utf & 0x3FF) << 10) + ($utf_hi & 0x3FF) + 0x10000;
140 1
                                    $this->count += 6;
141
                                }
142 2
                                $this->textBuffer .= $this->fromCharCode($utf);
143 2
                                break;
144
                            }
145
                            /* no break */
146
                        default:
147 1
                            $this->textBuffer .= '\\' . $yylval;
148 1
                            break;
149
                    }
150 590
                },
151
                '\$' => function (&$yylval) {
152 7
                    if (preg_match('/^{([A-Za-z0-9_]+)}/', substr($this->content, $this->count), $matches)) {
153 7
                        $this->count += strlen($matches[0]);
154 7
                        $yylval = $matches[1];
155
156 7
                        return Token::T_ENCAPSED_VAR;
157
                    }
158
159
                    $this->textBuffer .= $yylval;
160 590
                },
161
                self::REGEX_DQUOTE => function (&$yylval) {
162 563
                    $yylval = $this->textBuffer;
163 563
                    $this->begin(self::STATE_INITIAL);
164
165 563
                    return Token::T_END_STR;
166 590
                },
167
                self::EOF => function () {
168 1
                    $this->error('Unterminated string');
169 590
                },
170
            ],
171
        ];
172
    }
173
174 2
    private function fromCharCode($bytes)
175
    {
176
        switch (true) {
177 2
            case (0x7F & $bytes) == $bytes:
178
                return chr($bytes);
179
180 2
            case (0x07FF & $bytes) == $bytes:
181 1
                return chr(0xc0 | ($bytes >> 6))
182 1
                     . chr(0x80 | ($bytes & 0x3F));
183
184 1
            case (0xFFFF & $bytes) == $bytes:
185
                return chr(0xe0 | ($bytes >> 12))
186
                     . chr(0x80 | (($bytes >> 6) & 0x3F))
187
                     . chr(0x80 | ($bytes & 0x3F));
188
189
            default:
190 1
                return chr(0xF0 | ($bytes >> 18))
191 1
                     . chr(0x80 | (($bytes >> 12) & 0x3F))
192 1
                     . chr(0x80 | (($bytes >> 6) & 0x3F))
193 1
                     . chr(0x80 | ($bytes & 0x3F));
194
        }
195
    }
196
}
197