Passed
Push — master ( 6e1435...eac7d2 )
by Michal
03:55
created

Token::extract()   D

Complexity

Conditions 20
Paths 14

Size

Total Lines 81
Code Lines 50

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 46
CRAP Score 20

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 81
ccs 46
cts 46
cp 1
rs 4.9645
cc 20
eloc 50
nc 14
nop 0
crap 20

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Defines a token along with a set of types and flags and utility functions.
5
 *
6
 * An array of tokens will result after parsing the query.
7
 */
8
9
namespace PhpMyAdmin\SqlParser;
10
11
/**
12
 * A structure representing a lexeme that explicitly indicates its
13
 * categorization for the purpose of parsing.
14
 *
15
 * @category Tokens
16
 *
17
 * @license  https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
18
 */
19
class Token
20
{
21
    // Types of tokens (a vague description of a token's purpose).
22
23
    /**
24
     * This type is used when the token is invalid or its type cannot be
25
     * determined because of the ambiguous context. Further analysis might be
26
     * required to detect its type.
27
     *
28
     * @var int
29
     */
30
    const TYPE_NONE = 0;
31
32
    /**
33
     * SQL specific keywords: SELECT, UPDATE, INSERT, etc.
34
     *
35
     * @var int
36
     */
37
    const TYPE_KEYWORD = 1;
38
39
    /**
40
     * Any type of legal operator.
41
     *
42
     * Arithmetic operators: +, -, *, /, etc.
43
     * Logical operators: ===, <>, !==, etc.
44
     * Bitwise operators: &, |, ^, etc.
45
     * Assignment operators: =, +=, -=, etc.
46
     * SQL specific operators: . (e.g. .. WHERE database.table ..),
47
     *                         * (e.g. SELECT * FROM ..)
48
     *
49
     * @var int
50
     */
51
    const TYPE_OPERATOR = 2;
52
53
    /**
54
     * Spaces, tabs, new lines, etc.
55
     *
56
     * @var int
57
     */
58
    const TYPE_WHITESPACE = 3;
59
60
    /**
61
     * Any type of legal comment.
62
     *
63
     * Bash (#), C (/* *\/) or SQL (--) comments:
64
     *
65
     *      -- SQL-comment
66
     *
67
     *      #Bash-like comment
68
     *
69
     *      /*C-like comment*\/
70
     *
71
     * or:
72
     *
73
     *      /*C-like
74
     *        comment*\/
75
     *
76
     * Backslashes were added to respect PHP's comments syntax.
77
     *
78
     * @var int
79
     */
80
    const TYPE_COMMENT = 4;
81
82
    /**
83
     * Boolean values: true or false.
84
     *
85
     * @var int
86
     */
87
    const TYPE_BOOL = 5;
88
89
    /**
90
     * Numbers: 4, 0x8, 15.16, 23e42, etc.
91
     *
92
     * @var int
93
     */
94
    const TYPE_NUMBER = 6;
95
96
    /**
97
     * Literal strings: 'string', "test".
98
     * Some of these strings are actually symbols.
99
     *
100
     * @var int
101
     */
102
    const TYPE_STRING = 7;
103
104
    /**
105
     * Database, table names, variables, etc.
106
     * For example: ```SELECT `foo`, `bar` FROM `database`.`table`;```.
107
     *
108
     * @var int
109
     */
110
    const TYPE_SYMBOL = 8;
111
112
    /**
113
     * Delimits an unknown string.
114
     * For example: ```SELECT * FROM test;```, `test` is a delimiter.
115
     *
116
     * @var int
117
     */
118
    const TYPE_DELIMITER = 9;
119
120
    /**
121
     * Labels in LOOP statement, ITERATE statement etc.
122
     * For example (only for begin label):
123
     *  begin_label: BEGIN [statement_list] END [end_label]
124
     *  begin_label: LOOP [statement_list] END LOOP [end_label]
125
     *  begin_label: REPEAT [statement_list] ... END REPEAT [end_label]
126
     *  begin_label: WHILE ... DO [statement_list] END WHILE [end_label].
127
     *
128
     * @var int
129
     */
130
    const TYPE_LABEL = 10;
131
132
    // Flags that describe the tokens in more detail.
133
    // All keywords must have flag 1 so `Context::isKeyword` method doesn't
134
    // require strict comparison.
135
    const FLAG_KEYWORD_RESERVED = 2;
136
    const FLAG_KEYWORD_COMPOSED = 4;
137
    const FLAG_KEYWORD_DATA_TYPE = 8;
138
    const FLAG_KEYWORD_KEY = 16;
139
    const FLAG_KEYWORD_FUNCTION = 32;
140
141
    // Numbers related flags.
142
    const FLAG_NUMBER_HEX = 1;
143
    const FLAG_NUMBER_FLOAT = 2;
144
    const FLAG_NUMBER_APPROXIMATE = 4;
145
    const FLAG_NUMBER_NEGATIVE = 8;
146
    const FLAG_NUMBER_BINARY = 16;
147
148
    // Strings related flags.
149
    const FLAG_STRING_SINGLE_QUOTES = 1;
150
    const FLAG_STRING_DOUBLE_QUOTES = 2;
151
152
    // Comments related flags.
153
    const FLAG_COMMENT_BASH = 1;
154
    const FLAG_COMMENT_C = 2;
155
    const FLAG_COMMENT_SQL = 4;
156
    const FLAG_COMMENT_MYSQL_CMD = 8;
157
158
    // Operators related flags.
159
    const FLAG_OPERATOR_ARITHMETIC = 1;
160
    const FLAG_OPERATOR_LOGICAL = 2;
161
    const FLAG_OPERATOR_BITWISE = 4;
162
    const FLAG_OPERATOR_ASSIGNMENT = 8;
163
    const FLAG_OPERATOR_SQL = 16;
164
165
    // Symbols related flags.
166
    const FLAG_SYMBOL_VARIABLE = 1;
167
    const FLAG_SYMBOL_BACKTICK = 2;
168
    const FLAG_SYMBOL_USER = 4;
169
    const FLAG_SYMBOL_SYSTEM = 8;
170
171
    /**
172
     * The token it its raw string representation.
173
     *
174
     * @var string
175
     */
176
    public $token;
177
178
    /**
179
     * The value this token contains (i.e. token after some evaluation).
180
     *
181
     * @var mixed
182
     */
183
    public $value;
184
185
    /**
186
     * The keyword value this token contains, always uppercase.
187
     *
188
     * @var mixed
189
     */
190
    public $keyword;
191
192
    /**
193
     * The type of this token.
194
     *
195
     * @var int
196
     */
197
    public $type;
198
199
    /**
200
     * The flags of this token.
201
     *
202
     * @var int
203
     */
204
    public $flags;
205
206
    /**
207
     * The position in the initial string where this token started.
208
     *
209
     * @var int
210
     */
211
    public $position;
212
213
    /**
214
     * Constructor.
215
     *
216
     * @param string $token the value of the token
217
     * @param int    $type  the type of the token
218
     * @param int    $flags the flags of the token
219
     */
220 381
    public function __construct($token, $type = 0, $flags = 0)
221
    {
222 381
        $this->token = $token;
223 381
        $this->type = $type;
224 381
        $this->flags = $flags;
225 381
        $this->keyword = null;
226 381
        $this->value = $this->extract();
227 381
    }
228
229
    /**
230
     * Does little processing to the token to extract a value.
231
     *
232
     * If no processing can be done it will return the initial string.
233
     *
234
     * @return mixed
235
     */
236 381
    public function extract()
237
    {
238 381
        switch ($this->type) {
239 381
            case self::TYPE_KEYWORD:
240 342
                $this->keyword = strtoupper($this->token);
241 342
                if (!($this->flags & self::FLAG_KEYWORD_RESERVED)) {
242
                    // Unreserved keywords should stay the way they are because they
243
                    // might represent field names.
244 155
                    return $this->token;
245
                }
246
247 335
                return $this->keyword;
248 380
            case self::TYPE_WHITESPACE:
249 354
                return ' ';
250 379
            case self::TYPE_BOOL:
251 2
                return strtoupper($this->token) === 'TRUE';
252 378
            case self::TYPE_NUMBER:
253 174
                $ret = str_replace('--', '', $this->token); // e.g. ---42 === -42
254 174
                if ($this->flags & self::FLAG_NUMBER_HEX) {
255 2
                    if ($this->flags & self::FLAG_NUMBER_NEGATIVE) {
256 1
                        $ret = str_replace('-', '', $this->token);
257 1
                        sscanf($ret, '%x', $ret);
258 1
                        $ret = -$ret;
259
                    } else {
260 2
                        sscanf($ret, '%x', $ret);
261
                    }
262 174
                } elseif (($this->flags & self::FLAG_NUMBER_APPROXIMATE)
263 174
                || ($this->flags & self::FLAG_NUMBER_FLOAT)
264
                ) {
265 5
                    sscanf($ret, '%f', $ret);
266
                } else {
267 174
                    sscanf($ret, '%d', $ret);
268
                }
269
270 174
                return $ret;
271 377
            case self::TYPE_STRING:
272
                // Trims quotes.
273 157
                $str = $this->token;
274 157
                $str = mb_substr($str, 1, -1, 'UTF-8');
275
276
                // Removes surrounding quotes.
277 157
                $quote = $this->token[0];
278 157
                $str = str_replace($quote . $quote, $quote, $str);
279
280
                // Finally unescapes the string.
281
                //
282
                // `stripcslashes` replaces escape sequences with their
283
                // representation.
284
                //
285
                // NOTE: In MySQL, `\f` and `\v` have no representation,
286
                // even they usually represent: form-feed and vertical tab.
287 157
                $str = str_replace('\f', 'f', $str);
288 157
                $str = str_replace('\v', 'v', $str);
289 157
                $str = stripcslashes($str);
290
291 157
                return $str;
292 376
            case self::TYPE_SYMBOL:
293 98
                $str = $this->token;
294 98
                if ((isset($str[0])) && ($str[0] === '@')) {
295
                    // `mb_strlen($str)` must be used instead of `null` because
296
                    // in PHP 5.3- the `null` parameter isn't handled correctly.
297 29
                    $str = mb_substr(
298
                        $str,
299 29
                        ((!empty($str[1])) && ($str[1] === '@')) ? 2 : 1,
300
                        mb_strlen($str),
301 29
                        'UTF-8'
302
                    );
303
                }
304 98
                if ((isset($str[0])) && (($str[0] === '`')
305 98
                || ($str[0] === '"') || ($str[0] === '\''))
306
                ) {
307 82
                    $quote = $str[0];
308 82
                    $str = str_replace($quote . $quote, $quote, $str);
309 82
                    $str = mb_substr($str, 1, -1, 'UTF-8');
310
                }
311
312 98
                return $str;
313
        }
314
315 375
        return $this->token;
316
    }
317
318
    /**
319
     * Converts the token into an inline token by replacing tabs and new lines.
320
     *
321
     * @return string
322
     */
323 1
    public function getInlineToken()
324
    {
325 1
        return str_replace(
326 1
            array("\r", "\n", "\t"),
327 1
            array('\r', '\n', '\t'),
328 1
            $this->token
329
        );
330
    }
331
}
332