Completed
Pull Request — master (#173)
by Madhura
07:06
created

Token   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 317
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Test Coverage

Coverage 98.39%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 24
lcom 1
cbo 0
dl 0
loc 317
ccs 61
cts 62
cp 0.9839
rs 10
c 2
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 8 1
D extract() 0 84 22
A getInlineToken() 0 8 1
1
<?php
2
3
/**
4
 * Defines a token along with a set of types and flags and utility functions.
5
 *
6
 * An array of tokens will result after parsing the query.
7
 */
8
9
namespace PhpMyAdmin\SqlParser;
10
11
/**
12
 * A structure representing a lexeme that explicitly indicates its
13
 * categorization for the purpose of parsing.
14
 *
15
 * @category Tokens
16
 *
17
 * @license  https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
18
 */
19
class Token
20
{
21
    // Types of tokens (a vague description of a token's purpose).
22
23
    /**
24
     * This type is used when the token is invalid or its type cannot be
25
     * determined because of the ambiguous context. Further analysis might be
26
     * required to detect its type.
27
     *
28
     * @var int
29
     */
30
    const TYPE_NONE = 0;
31
32
    /**
33
     * SQL specific keywords: SELECT, UPDATE, INSERT, etc.
34
     *
35
     * @var int
36
     */
37
    const TYPE_KEYWORD = 1;
38
39
    /**
40
     * Any type of legal operator.
41
     *
42
     * Arithmetic operators: +, -, *, /, etc.
43
     * Logical operators: ===, <>, !==, etc.
44
     * Bitwise operators: &, |, ^, etc.
45
     * Assignment operators: =, +=, -=, etc.
46
     * SQL specific operators: . (e.g. .. WHERE database.table ..),
47
     *                         * (e.g. SELECT * FROM ..)
48
     *
49
     * @var int
50
     */
51
    const TYPE_OPERATOR = 2;
52
53
    /**
54
     * Spaces, tabs, new lines, etc.
55
     *
56
     * @var int
57
     */
58
    const TYPE_WHITESPACE = 3;
59
60
    /**
61
     * Any type of legal comment.
62
     *
63
     * Bash (#), C (/* *\/) or SQL (--) comments:
64
     *
65
     *      -- SQL-comment
66
     *
67
     *      #Bash-like comment
68
     *
69
     *      /*C-like comment*\/
70
     *
71
     * or:
72
     *
73
     *      /*C-like
74
     *        comment*\/
75
     *
76
     * Backslashes were added to respect PHP's comments syntax.
77
     *
78
     * @var int
79
     */
80
    const TYPE_COMMENT = 4;
81
82
    /**
83
     * Boolean values: true or false.
84
     *
85
     * @var int
86
     */
87
    const TYPE_BOOL = 5;
88
89
    /**
90
     * Numbers: 4, 0x8, 15.16, 23e42, etc.
91
     *
92
     * @var int
93
     */
94
    const TYPE_NUMBER = 6;
95
96
    /**
97
     * Literal strings: 'string', "test".
98
     * Some of these strings are actually symbols.
99
     *
100
     * @var int
101
     */
102
    const TYPE_STRING = 7;
103
104
    /**
105
     * Database, table names, variables, etc.
106
     * For example: ```SELECT `foo`, `bar` FROM `database`.`table`;```.
107
     *
108
     * @var int
109
     */
110
    const TYPE_SYMBOL = 8;
111
112
    /**
113
     * Delimits an unknown string.
114
     * For example: ```SELECT * FROM test;```, `test` is a delimiter.
115
     *
116
     * @var int
117
     */
118
    const TYPE_DELIMITER = 9;
119
120
    /**
121
     * Labels in LOOP statement, ITERATE statement etc.
122
     * For example (only for begin label):
123
     *  begin_label: BEGIN [statement_list] END [end_label]
124
     *  begin_label: LOOP [statement_list] END LOOP [end_label]
125
     *  begin_label: REPEAT [statement_list] ... END REPEAT [end_label]
126
     *  begin_label: WHILE ... DO [statement_list] END WHILE [end_label].
127
     *
128
     * @var int
129
     */
130
    const TYPE_LABEL = 10;
131
132
    // Flags that describe the tokens in more detail.
133
    // All keywords must have flag 1 so `Context::isKeyword` method doesn't
134
    // require strict comparison.
135
    const FLAG_KEYWORD_RESERVED = 2;
136
    const FLAG_KEYWORD_COMPOSED = 4;
137
    const FLAG_KEYWORD_DATA_TYPE = 8;
138
    const FLAG_KEYWORD_KEY = 16;
139
    const FLAG_KEYWORD_FUNCTION = 32;
140
141
    // Numbers related flags.
142
    const FLAG_NUMBER_HEX = 1;
143
    const FLAG_NUMBER_FLOAT = 2;
144
    const FLAG_NUMBER_APPROXIMATE = 4;
145
    const FLAG_NUMBER_NEGATIVE = 8;
146
    const FLAG_NUMBER_BINARY = 16;
147
148
    // Strings related flags.
149
    const FLAG_STRING_SINGLE_QUOTES = 1;
150
    const FLAG_STRING_DOUBLE_QUOTES = 2;
151
152
    // Comments related flags.
153
    const FLAG_COMMENT_BASH = 1;
154
    const FLAG_COMMENT_C = 2;
155
    const FLAG_COMMENT_SQL = 4;
156
    const FLAG_COMMENT_MYSQL_CMD = 8;
157
158
    // Operators related flags.
159
    const FLAG_OPERATOR_ARITHMETIC = 1;
160
    const FLAG_OPERATOR_LOGICAL = 2;
161
    const FLAG_OPERATOR_BITWISE = 4;
162
    const FLAG_OPERATOR_ASSIGNMENT = 8;
163
    const FLAG_OPERATOR_SQL = 16;
164
165
    // Symbols related flags.
166
    const FLAG_SYMBOL_VARIABLE = 1;
167
    const FLAG_SYMBOL_BACKTICK = 2;
168
    const FLAG_SYMBOL_USER = 4;
169
    const FLAG_SYMBOL_SYSTEM = 8;
170
    const FLAG_SYMBOL_PARAMETER = 16;
171
172
    /**
173
     * The token it its raw string representation.
174
     *
175
     * @var string
176
     */
177
    public $token;
178
179
    /**
180
     * The value this token contains (i.e. token after some evaluation).
181
     *
182
     * @var mixed
183
     */
184
    public $value;
185
186
    /**
187
     * The keyword value this token contains, always uppercase.
188
     *
189
     * @var mixed
190
     */
191
    public $keyword;
192
193
    /**
194
     * The type of this token.
195
     *
196
     * @var int
197
     */
198
    public $type;
199
200
    /**
201
     * The flags of this token.
202
     *
203
     * @var int
204
     */
205
    public $flags;
206
207
    /**
208
     * The position in the initial string where this token started.
209
     *
210
     * @var int
211
     */
212
    public $position;
213
214
    /**
215
     * Constructor.
216
     *
217
     * @param string $token the value of the token
218
     * @param int    $type  the type of the token
219
     * @param int    $flags the flags of the token
220
     */
221 388
    public function __construct($token, $type = 0, $flags = 0)
222
    {
223 388
        $this->token = $token;
224 388
        $this->type = $type;
225 388
        $this->flags = $flags;
226 388
        $this->keyword = null;
227 388
        $this->value = $this->extract();
228 388
    }
229
230
    /**
231
     * Does little processing to the token to extract a value.
232
     *
233
     * If no processing can be done it will return the initial string.
234
     *
235
     * @return mixed
236
     */
237 388
    public function extract()
238
    {
239 388
        switch ($this->type) {
240 388
            case self::TYPE_KEYWORD:
241 349
                $this->keyword = strtoupper($this->token);
242 349
                if (!($this->flags & self::FLAG_KEYWORD_RESERVED)) {
243
                    // Unreserved keywords should stay the way they are because they
244
                    // might represent field names.
245 159
                    return $this->token;
246
                }
247
248 342
                return $this->keyword;
249 387
            case self::TYPE_WHITESPACE:
250 361
                return ' ';
251 386
            case self::TYPE_BOOL:
252 2
                return strtoupper($this->token) === 'TRUE';
253 385
            case self::TYPE_NUMBER:
254 178
                $ret = str_replace('--', '', $this->token); // e.g. ---42 === -42
255 178
                if ($this->flags & self::FLAG_NUMBER_HEX) {
256 2
                    if ($this->flags & self::FLAG_NUMBER_NEGATIVE) {
257 1
                        $ret = str_replace('-', '', $this->token);
258 1
                        sscanf($ret, '%x', $ret);
259 1
                        $ret = -$ret;
260
                    } else {
261 2
                        sscanf($ret, '%x', $ret);
262
                    }
263 178
                } elseif (($this->flags & self::FLAG_NUMBER_APPROXIMATE)
264 178
                || ($this->flags & self::FLAG_NUMBER_FLOAT)
265
                ) {
266 5
                    sscanf($ret, '%f', $ret);
267
                } else {
268 178
                    sscanf($ret, '%d', $ret);
269
                }
270
271 178
                return $ret;
272 384
            case self::TYPE_STRING:
273
                // Trims quotes.
274 160
                $str = $this->token;
275 160
                $str = mb_substr($str, 1, -1, 'UTF-8');
276
277
                // Removes surrounding quotes.
278 160
                $quote = $this->token[0];
279 160
                $str = str_replace($quote . $quote, $quote, $str);
280
281
                // Finally unescapes the string.
282
                //
283
                // `stripcslashes` replaces escape sequences with their
284
                // representation.
285
                //
286
                // NOTE: In MySQL, `\f` and `\v` have no representation,
287
                // even they usually represent: form-feed and vertical tab.
288 160
                $str = str_replace('\f', 'f', $str);
289 160
                $str = str_replace('\v', 'v', $str);
290 160
                $str = stripcslashes($str);
291
292 160
                return $str;
293 383
            case self::TYPE_SYMBOL:
294 98
                $str = $this->token;
295 98
                if ((isset($str[0])) && ($str[0] === '@')) {
296
                    // `mb_strlen($str)` must be used instead of `null` because
297
                    // in PHP 5.3- the `null` parameter isn't handled correctly.
298 29
                    $str = mb_substr(
299 29
                        $str,
300 29
                        ((!empty($str[1])) && ($str[1] === '@')) ? 2 : 1,
301 29
                        mb_strlen($str),
302 29
                        'UTF-8'
303
                    );
304
                }
305 98
                if ((isset($str[0])) && ($str[0] === ':')) {
306
                    $str = mb_substr($str, 1, mb_strlen($str), 'UTF-8');
307
                }
308 98
                if ((isset($str[0])) && (($str[0] === '`')
309 25
                || ($str[0] === '"') || ($str[0] === '\''))
310
                ) {
311 82
                    $quote = $str[0];
312 82
                    $str = str_replace($quote . $quote, $quote, $str);
313 82
                    $str = mb_substr($str, 1, -1, 'UTF-8');
314
                }
315
316 98
                return $str;
317
        }
318
319 382
        return $this->token;
320
    }
321
322
    /**
323
     * Converts the token into an inline token by replacing tabs and new lines.
324
     *
325
     * @return string
326
     */
327 1
    public function getInlineToken()
328
    {
329 1
        return str_replace(
330 1
            array("\r", "\n", "\t"),
331 1
            array('\r', '\n', '\t'),
332 1
            $this->token
333
        );
334
    }
335
}
336