Passed
Branch master (ef264b)
by Pierrick
01:53
created

Lexer::getRules()   D

Complexity

Conditions 15
Paths 1

Size

Total Lines 125
Code Lines 89

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 85
CRAP Score 15.0027

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 15
eloc 89
c 2
b 0
f 0
nc 1
nop 0
dl 0
loc 125
ccs 85
cts 87
cp 0.977
crap 15.0027
rs 4.9733

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file is part of NACL.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 *
8
 * @copyright 2019 Nuglif (2018) Inc.
9
 * @license   http://www.opensource.org/licenses/mit-license.html  MIT License
10
 * @author    Pierrick Charron <[email protected]>
11
 * @author    Charle Demers <[email protected]>
12
 */
13
14
declare(strict_types=1);
15
16
namespace Nuglif\Nacl;
17
18
class Lexer extends AbstractLexer
19
{
20
    const STATE_INITIAL  = 0;
21
    const STATE_INSTRING  = 1;
22
    const STATE_INHEREDOC = 2;
23
24
    const REGEX_SPACE      = '[ \t\n\r]+';
25
    const REGEX_COMMENT    = '(?://|\#).*';
26
    const REGEX_COMMENT_ML = '/\*';
27
    const REGEX_NAME       = '[A-Za-z_][A-Za-z0-9_-]*';
28
    const REGEX_VAR        = '?:\${([A-Za-z0-9_]+)}';
29
    const REGEX_NUM        = '(?:[0-9]*\.?[0-9]+|[0-9]+\.)(?:[eE](?:\+|-)?[0-9]+)?(?:m(?:in|s)|[KkGgMm][Bb]?|[b|s|h|d|w|y])?';
30
    const REGEX_DQUOTE     = '"';
31
    const REGEX_HEREDOC    = '?:<<<([A-Za-z0-9_]+)\n';
32
    const REGEX_BOOL       = '(?:true|false|yes|no|on|off)\b';
33
    const REGEX_NULL       = 'null\b';
34
    const REGEX_TOKEN      = '[\[\]=:{};,.()&|%^/*+-]|<<|>>';
35
    const REGEX_ANY        = '.';
36
37
    private $textBuffer;
38
39 590
    public function __construct()
40
    {
41 590
        parent::__construct();
42 590
    }
43
44 590
    protected function getRules()
45
    {
46
        return [
47 590
            self::STATE_INITIAL => [
48 590
                self::REGEX_SPACE      => false,
49 590
                self::REGEX_COMMENT    => false,
50
                self::REGEX_COMMENT_ML => function () {
51 2
                    $pos = strpos($this->content, '*/', $this->count);
52 2
                    if (false === $pos) {
53 1
                        $this->line += substr_count(substr($this->content, $this->count), "\n");
54 1
                        $this->error('Unterminated multiline comment');
55
                    }
56 1
                    $this->line += substr_count(substr($this->content, $this->count, $pos - $this->count + 2), "\n");
57 1
                    $this->count = $pos + 2;
58 590
                },
59
                self::REGEX_DQUOTE => function () {
60 564
                    $this->begin(self::STATE_INSTRING);
61 564
                    $this->textBuffer = '';
62 590
                },
63
                self::REGEX_BOOL => function (&$yylval) {
64 76
                    $yylval = TypeCaster::toBool($yylval);
65
66 76
                    return Token::T_BOOL;
67 590
                },
68
                self::REGEX_NULL => function (&$yylval) {
69 68
                    $yylval = null;
70
71 68
                    return Token::T_NULL;
72 590
                },
73
                self::REGEX_NUM => function (&$yylval) {
74 144
                    $yylval = TypeCaster::toNum($yylval);
75
76 144
                    return Token::T_NUM;
77 590
                },
78
                self::REGEX_NAME => function () {
79 495
                    return Token::T_NAME;
80 590
                },
81
                self::REGEX_HEREDOC => function (&$yylval) {
82 4
                    $needle = "\n" . $yylval;
83 4
                    $pos = strpos($this->content, $needle, $this->count);
84 4
                    if (false === $pos) {
85 1
                        $this->line += substr_count(substr($this->content, $this->count), "\n");
86 1
                        $this->error('Unterminated HEREDOC');
87
                    }
88
89 3
                    $yylval = substr($this->content, $this->count, $pos - $this->count);
90 3
                    $this->line += substr_count($yylval, "\n") + 1;
91 3
                    $this->count += strlen($yylval) + strlen($needle);
92
93 3
                    return Token::T_END_STR;
94 590
                },
95
                self::REGEX_TOKEN => function ($yylval) {
96 570
                    return $yylval;
97 590
                },
98
                self::REGEX_VAR => function () {
99 9
                    return Token::T_VAR;
100 590
                },
101
                self::REGEX_ANY => function ($yylval) {
102
                    $this->error('Unexpected char \'' . $yylval . '\'');
103 590
                },
104
                self::EOF => function () {
105 583
                    return Token::T_EOF;
106 590
                },
107
            ],
108 590
            self::STATE_INSTRING => [
109
                '[^\\\"$]+' => function (&$yylval) {
110 564
                    $this->textBuffer .= $yylval;
111 564
                    if ('$' == substr($this->content, $this->count, 1)) {
112 5
                        $yylval = $this->textBuffer;
113 5
                        $this->textBuffer = '';
114
115 5
                        return Token::T_STRING;
116
                    }
117 590
                },
118
                '?:\\\(.)' => function ($yylval) {
119 72
                    switch ($yylval) {
120 72
                        case 'n':
121 2
                            $this->textBuffer .= "\n";
122 2
                            break;
123 72
                        case 't':
124 2
                            $this->textBuffer .= "\t";
125 2
                            break;
126 72
                        case '\\':
127 5
                        case '/':
128 5
                        case '"':
129 69
                            $this->textBuffer .= $yylval;
130 69
                            break;
131 3
                        case 'u':
132 2
                            $utfCode = substr($this->content, $this->count, 4);
133 2
                            if (preg_match('/[A-Fa-f0-9]{4,4}/', $utfCode)) {
134 2
                                $utf = hexdec($utfCode);
135 2
                                $this->count += 4;
136
                                // UTF-32 ?
137 2
                                if ($utf >= 0xD800 && $utf <= 0xDBFF && preg_match('/^\\\\u[dD][c-fC-F][0-9a-fA-F][0-9a-fA-F]/', substr($this->content, $this->count, 6), $matches)) {
138 1
                                    $utf_hi = hexdec(substr($matches[0], -4));
139 1
                                    $utf = (($utf & 0x3FF) << 10) + ($utf_hi & 0x3FF) + 0x10000;
140 1
                                    $this->count += 6;
141
                                }
142 2
                                $this->textBuffer .= $this->fromCharCode($utf);
143 2
                                break;
144
                            }
145
                            /* no break */
146
                        default:
147 1
                            $this->textBuffer .= '\\' . $yylval;
148 1
                            break;
149
                    }
150 590
                },
151
                '\$' => function (&$yylval) {
152 7
                    if (preg_match('/^{([A-Za-z0-9_]+)}/', substr($this->content, $this->count), $matches)) {
153 7
                        $this->count += strlen($matches[0]);
154 7
                        $yylval = $matches[1];
155
156 7
                        return Token::T_ENCAPSED_VAR;
157
                    }
158
159
                    $this->textBuffer .= $yylval;
160 590
                },
161
                self::REGEX_DQUOTE => function (&$yylval) {
162 563
                    $yylval = $this->textBuffer;
163 563
                    $this->begin(self::STATE_INITIAL);
164
165 563
                    return Token::T_END_STR;
166 590
                },
167
                self::EOF => function () {
168 1
                    $this->error('Unterminated string');
169 590
                },
170
            ],
171
        ];
172
    }
173
174 2
    private function fromCharCode($bytes)
175
    {
176
        switch (true) {
177 2
            case (0x7F & $bytes) == $bytes:
178
                return chr($bytes);
179
180 2
            case (0x07FF & $bytes) == $bytes:
181 1
                return chr(0xc0 | ($bytes >> 6))
182 1
                     . chr(0x80 | ($bytes & 0x3F));
183
184 1
            case (0xFFFF & $bytes) == $bytes:
185
                return chr(0xe0 | ($bytes >> 12))
186
                     . chr(0x80 | (($bytes >> 6) & 0x3F))
187
                     . chr(0x80 | ($bytes & 0x3F));
188
189
            default:
190 1
                return chr(0xF0 | ($bytes >> 18))
191 1
                     . chr(0x80 | (($bytes >> 12) & 0x3F))
192 1
                     . chr(0x80 | (($bytes >> 6) & 0x3F))
193 1
                     . chr(0x80 | ($bytes & 0x3F));
194
        }
195
    }
196
}
197