Passed
Push — master ( 16d2f0...7c26e5 )
by William
06:07 queued 03:22
created

Token::getInlineToken()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 14
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 8
nc 1
nop 0
dl 0
loc 14
ccs 13
cts 13
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use function hexdec;
8
use function mb_strlen;
9
use function mb_substr;
10
use function str_replace;
11
use function stripcslashes;
12
use function strtoupper;
13
14
/**
15
 * Defines a token along with a set of types and flags and utility functions.
16
 *
17
 * An array of tokens will result after parsing the query.
18
 *
19
 * A structure representing a lexeme that explicitly indicates its categorization for the purpose of parsing.
20
 */
21
class Token
22
{
23
    // Types of tokens (a vague description of a token's purpose).
24
25
    /**
26
     * This type is used when the token is invalid or its type cannot be
27
     * determined because of the ambiguous context. Further analysis might be
28
     * required to detect its type.
29
     */
30
    public const TYPE_NONE = 0;
31
32
    /**
33
     * SQL specific keywords: SELECT, UPDATE, INSERT, etc.
34
     */
35
    public const TYPE_KEYWORD = 1;
36
37
    /**
38
     * Any type of legal operator.
39
     *
40
     * Arithmetic operators: +, -, *, /, etc.
41
     * Logical operators: ===, <>, !==, etc.
42
     * Bitwise operators: &, |, ^, etc.
43
     * Assignment operators: =, +=, -=, etc.
44
     * SQL specific operators: . (e.g. .. WHERE database.table ..),
45
     *                         * (e.g. SELECT * FROM ..)
46
     */
47
    public const TYPE_OPERATOR = 2;
48
49
    /**
50
     * Spaces, tabs, new lines, etc.
51
     */
52
    public const TYPE_WHITESPACE = 3;
53
54
    /**
55
     * Any type of legal comment.
56
     *
57
     * Bash (#), C (/* *\/) or SQL (--) comments:
58
     *
59
     *      -- SQL-comment
60
     *
61
     *      #Bash-like comment
62
     *
63
     *      /*C-like comment*\/
64
     *
65
     * or:
66
     *
67
     *      /*C-like
68
     *        comment*\/
69
     *
70
     * Backslashes were added to respect PHP's comments syntax.
71
     */
72
    public const TYPE_COMMENT = 4;
73
74
    /**
75
     * Boolean values: true or false.
76
     */
77
    public const TYPE_BOOL = 5;
78
79
    /**
80
     * Numbers: 4, 0x8, 15.16, 23e42, etc.
81
     */
82
    public const TYPE_NUMBER = 6;
83
84
    /**
85
     * Literal strings: 'string', "test".
86
     * Some of these strings are actually symbols.
87
     */
88
    public const TYPE_STRING = 7;
89
90
    /**
91
     * Database, table names, variables, etc.
92
     * For example: ```SELECT `foo`, `bar` FROM `database`.`table`;```.
93
     */
94
    public const TYPE_SYMBOL = 8;
95
96
    /**
97
     * Delimits an unknown string.
98
     * For example: ```SELECT * FROM test;```, `test` is a delimiter.
99
     */
100
    public const TYPE_DELIMITER = 9;
101
102
    /**
103
     * Labels in LOOP statement, ITERATE statement etc.
104
     * For example (only for begin label):
105
     *  begin_label: BEGIN [statement_list] END [end_label]
106
     *  begin_label: LOOP [statement_list] END LOOP [end_label]
107
     *  begin_label: REPEAT [statement_list] ... END REPEAT [end_label]
108
     *  begin_label: WHILE ... DO [statement_list] END WHILE [end_label].
109
     */
110
    public const TYPE_LABEL = 10;
111
112
    /**
113
     *  All tokens types
114
     */
115
    public const TYPE_ALL = [
116
        self::TYPE_NONE,
117
        self::TYPE_KEYWORD,
118
        self::TYPE_OPERATOR,
119
        self::TYPE_WHITESPACE,
120
        self::TYPE_COMMENT,
121
        self::TYPE_BOOL,
122
        self::TYPE_NUMBER,
123
        self::TYPE_STRING,
124
        self::TYPE_SYMBOL,
125
        self::TYPE_DELIMITER,
126
        self::TYPE_LABEL,
127
    ];
128
129
    // Flags that describe the tokens in more detail.
130
    // All keywords must have flag 1 so `Context::isKeyword` method doesn't
131
    // require strict comparison.
132
    public const FLAG_KEYWORD_RESERVED = 2;
133
    public const FLAG_KEYWORD_COMPOSED = 4;
134
    public const FLAG_KEYWORD_DATA_TYPE = 8;
135
    public const FLAG_KEYWORD_KEY = 16;
136
    public const FLAG_KEYWORD_FUNCTION = 32;
137
138
    // Numbers related flags.
139
    public const FLAG_NUMBER_HEX = 1;
140
    public const FLAG_NUMBER_FLOAT = 2;
141
    public const FLAG_NUMBER_APPROXIMATE = 4;
142
    public const FLAG_NUMBER_NEGATIVE = 8;
143
    public const FLAG_NUMBER_BINARY = 16;
144
145
    // Strings related flags.
146
    public const FLAG_STRING_SINGLE_QUOTES = 1;
147
    public const FLAG_STRING_DOUBLE_QUOTES = 2;
148
149
    // Comments related flags.
150
    public const FLAG_COMMENT_BASH = 1;
151
    public const FLAG_COMMENT_C = 2;
152
    public const FLAG_COMMENT_SQL = 4;
153
    public const FLAG_COMMENT_MYSQL_CMD = 8;
154
155
    // Operators related flags.
156
    public const FLAG_OPERATOR_ARITHMETIC = 1;
157
    public const FLAG_OPERATOR_LOGICAL = 2;
158
    public const FLAG_OPERATOR_BITWISE = 4;
159
    public const FLAG_OPERATOR_ASSIGNMENT = 8;
160
    public const FLAG_OPERATOR_SQL = 16;
161
162
    // Symbols related flags.
163
    public const FLAG_SYMBOL_VARIABLE = 1;
164
    public const FLAG_SYMBOL_BACKTICK = 2;
165
    public const FLAG_SYMBOL_USER = 4;
166
    public const FLAG_SYMBOL_SYSTEM = 8;
167
    public const FLAG_SYMBOL_PARAMETER = 16;
168
169
    /**
170
     * The token it its raw string representation.
171
     *
172
     * @var string
173
     */
174
    public $token;
175
176
    /**
177
     * The value this token contains (i.e. token after some evaluation).
178
     *
179
     * @var mixed
180
     */
181
    public $value;
182
183
    /**
184
     * The keyword value this token contains, always uppercase.
185
     *
186
     * @var mixed|string|null
187
     */
188
    public $keyword;
189
190
    /**
191
     * The type of this token.
192
     *
193
     * @var int
194
     */
195
    public $type;
196
197
    /**
198
     * The flags of this token.
199
     *
200
     * @var int
201
     */
202
    public $flags;
203
204
    /**
205
     * The position in the initial string where this token started.
206
     *
207
     * The position is counted in chars, not bytes, so you should
208
     * use mb_* functions to properly handle utf-8 multibyte chars.
209
     *
210
     * @var int|null
211
     */
212
    public $position;
213
214
    /**
215
     * @param string $token the value of the token
216
     * @param int    $type  the type of the token
217
     * @param int    $flags the flags of the token
218
     */
219 1314
    public function __construct($token, $type = 0, $flags = 0)
220
    {
221 1314
        $this->token = $token;
222 1314
        $this->type = $type;
223 1314
        $this->flags = $flags;
224 1314
        $this->keyword = null;
225 1314
        $this->value = $this->extract();
226
    }
227
228
    /**
229
     * Does little processing to the token to extract a value.
230
     *
231
     * If no processing can be done it will return the initial string.
232
     *
233
     * @return mixed
234
     */
235 1314
    public function extract()
236
    {
237 1314
        switch ($this->type) {
238
            case self::TYPE_KEYWORD:
239 1234
                $this->keyword = strtoupper($this->token);
240 1234
                if (! ($this->flags & self::FLAG_KEYWORD_RESERVED)) {
241
                    // Unreserved keywords should stay the way they are because they
242
                    // might represent field names.
243 676
                    return $this->token;
244
                }
245
246 1220
                return $this->keyword;
247
248
            case self::TYPE_WHITESPACE:
249 1258
                return ' ';
250
251
            case self::TYPE_BOOL:
252 8
                return strtoupper($this->token) === 'TRUE';
253
254
            case self::TYPE_NUMBER:
255 570
                $ret = str_replace('--', '', $this->token); // e.g. ---42 === -42
256 570
                if ($this->flags & self::FLAG_NUMBER_HEX) {
257 6
                    if ($this->flags & self::FLAG_NUMBER_NEGATIVE) {
258 6
                        $ret = str_replace('-', '', $this->token);
259 6
                        $ret = -hexdec($ret);
260
                    } else {
261 6
                        $ret = hexdec($ret);
262
                    }
263 570
                } elseif (($this->flags & self::FLAG_NUMBER_APPROXIMATE) || ($this->flags & self::FLAG_NUMBER_FLOAT)) {
264 14
                    $ret = (float) $ret;
265 568
                } elseif (! ($this->flags & self::FLAG_NUMBER_BINARY)) {
266 568
                    $ret = (int) $ret;
267
                }
268
269 570
                return $ret;
270
271
            case self::TYPE_STRING:
272
                // Trims quotes.
273 610
                $str = $this->token;
274 610
                $str = mb_substr($str, 1, -1, 'UTF-8');
275
276
                // Removes surrounding quotes.
277 610
                $quote = $this->token[0];
278 610
                $str = str_replace($quote . $quote, $quote, $str);
279
280
                // Finally unescapes the string.
281
                //
282
                // `stripcslashes` replaces escape sequences with their
283
                // representation.
284
                //
285
                // NOTE: In MySQL, `\f` and `\v` have no representation,
286
                // even they usually represent: form-feed and vertical tab.
287 610
                $str = str_replace('\f', 'f', $str);
288 610
                $str = str_replace('\v', 'v', $str);
289 610
                $str = stripcslashes($str);
290
291 610
                return $str;
292
293
            case self::TYPE_SYMBOL:
294 416
                $str = $this->token;
295 416
                if (isset($str[0]) && ($str[0] === '@')) {
296
                    // `mb_strlen($str)` must be used instead of `null` because
297
                    // in PHP 5.3- the `null` parameter isn't handled correctly.
298 100
                    $str = mb_substr(
299 100
                        $str,
300 100
                        ! empty($str[1]) && ($str[1] === '@') ? 2 : 1,
301 100
                        mb_strlen($str),
302 100
                        'UTF-8'
303 100
                    );
304
                }
305
306 416
                if (isset($str[0]) && ($str[0] === ':')) {
307 6
                    $str = mb_substr($str, 1, mb_strlen($str), 'UTF-8');
308
                }
309
310 416
                if (isset($str[0]) && (($str[0] === '`') || ($str[0] === '"') || ($str[0] === '\''))) {
311 360
                    $quote = $str[0];
312 360
                    $str = mb_substr($str, 1, -1, 'UTF-8');
313 360
                    $str = str_replace($quote . $quote, $quote, $str);
314
                }
315
316 416
                return $str;
317
        }
318
319 1302
        return $this->token;
320
    }
321
322
    /**
323
     * Converts the token into an inline token by replacing tabs and new lines.
324
     *
325
     * @return string
326
     */
327 2
    public function getInlineToken()
328
    {
329 2
        return str_replace(
330 2
            [
331 2
                "\r",
332 2
                "\n",
333 2
                "\t",
334 2
            ],
335 2
            [
336 2
                '\r',
337 2
                '\n',
338 2
                '\t',
339 2
            ],
340 2
            $this->token
341 2
        );
342
    }
343
}
344