Completed
Push — master ( 65f66e...428edc )
by Michal
04:14
created

Token::extract()   C

Complexity

Conditions 20
Paths 14

Size

Total Lines 64
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 49
CRAP Score 20

Importance

Changes 0
Metric Value
cc 20
eloc 44
nc 14
nop 0
dl 0
loc 64
ccs 49
cts 49
cp 1
crap 20
rs 5.9268
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Defines a token along with a set of types and flags and utility functions.
5
 *
6
 * An array of tokens will result after parsing the query.
7
 */
8
9
namespace PhpMyAdmin\SqlParser;
10
11
/**
12
 * A structure representing a lexeme that explicitly indicates its
13
 * categorization for the purpose of parsing.
14
 *
15
 * @category Tokens
16
 *
17
 * @license  https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
18
 */
19
class Token
20
{
21
    // Types of tokens (a vague description of a token's purpose).
22
23
    /**
24
     * This type is used when the token is invalid or its type cannot be
25
     * determined because of the ambiguous context. Further analysis might be
26
     * required to detect its type.
27
     *
28
     * @var int
29
     */
30
    const TYPE_NONE = 0;
31
32
    /**
33
     * SQL specific keywords: SELECT, UPDATE, INSERT, etc.
34
     *
35
     * @var int
36
     */
37
    const TYPE_KEYWORD = 1;
38
39
    /**
40
     * Any type of legal operator.
41
     *
42
     * Arithmetic operators: +, -, *, /, etc.
43
     * Logical operators: ===, <>, !==, etc.
44
     * Bitwise operators: &, |, ^, etc.
45
     * Assignment operators: =, +=, -=, etc.
46
     * SQL specific operators: . (e.g. .. WHERE database.table ..),
47
     *                         * (e.g. SELECT * FROM ..)
48
     *
49
     * @var int
50
     */
51
    const TYPE_OPERATOR = 2;
52
53
    /**
54
     * Spaces, tabs, new lines, etc.
55
     *
56
     * @var int
57
     */
58
    const TYPE_WHITESPACE = 3;
59
60
    /**
61
     * Any type of legal comment.
62
     *
63
     * Bash (#), C (/* *\/) or SQL (--) comments:
64
     *
65
     *      -- SQL-comment
66
     *
67
     *      #Bash-like comment
68
     *
69
     *      /*C-like comment*\/
70
     *
71
     * or:
72
     *
73
     *      /*C-like
74
     *        comment*\/
75
     *
76
     * Backslashes were added to respect PHP's comments syntax.
77
     *
78
     * @var int
79
     */
80
    const TYPE_COMMENT = 4;
81
82
    /**
83
     * Boolean values: true or false.
84
     *
85
     * @var int
86
     */
87
    const TYPE_BOOL = 5;
88
89
    /**
90
     * Numbers: 4, 0x8, 15.16, 23e42, etc.
91
     *
92
     * @var int
93
     */
94
    const TYPE_NUMBER = 6;
95
96
    /**
97
     * Literal strings: 'string', "test".
98
     * Some of these strings are actually symbols.
99
     *
100
     * @var int
101
     */
102
    const TYPE_STRING = 7;
103
104
    /**
105
     * Database, table names, variables, etc.
106
     * For example: ```SELECT `foo`, `bar` FROM `database`.`table`;```.
107
     *
108
     * @var int
109
     */
110
    const TYPE_SYMBOL = 8;
111
112
    /**
113
     * Delimits an unknown string.
114
     * For example: ```SELECT * FROM test;```, `test` is a delimiter.
115
     *
116
     * @var int
117
     */
118
    const TYPE_DELIMITER = 9;
119
120
    /**
121
     * Labels in LOOP statement, ITERATE statement etc.
122
     * For example (only for begin label):
123
     *  begin_label: BEGIN [statement_list] END [end_label]
124
     *  begin_label: LOOP [statement_list] END LOOP [end_label]
125
     *  begin_label: REPEAT [statement_list] ... END REPEAT [end_label]
126
     *  begin_label: WHILE ... DO [statement_list] END WHILE [end_label].
127
     *
128
     * @var int
129
     */
130
    const TYPE_LABEL = 10;
131
132
    // Flags that describe the tokens in more detail.
133
    // All keywords must have flag 1 so `Context::isKeyword` method doesn't
134
    // require strict comparison.
135
    const FLAG_KEYWORD_RESERVED = 2;
136
    const FLAG_KEYWORD_COMPOSED = 4;
137
    const FLAG_KEYWORD_DATA_TYPE = 8;
138
    const FLAG_KEYWORD_KEY = 16;
139
    const FLAG_KEYWORD_FUNCTION = 32;
140
141
    // Numbers related flags.
142
    const FLAG_NUMBER_HEX = 1;
143
    const FLAG_NUMBER_FLOAT = 2;
144
    const FLAG_NUMBER_APPROXIMATE = 4;
145
    const FLAG_NUMBER_NEGATIVE = 8;
146
    const FLAG_NUMBER_BINARY = 16;
147
148
    // Strings related flags.
149
    const FLAG_STRING_SINGLE_QUOTES = 1;
150
    const FLAG_STRING_DOUBLE_QUOTES = 2;
151
152
    // Comments related flags.
153
    const FLAG_COMMENT_BASH = 1;
154
    const FLAG_COMMENT_C = 2;
155
    const FLAG_COMMENT_SQL = 4;
156
    const FLAG_COMMENT_MYSQL_CMD = 8;
157
158
    // Operators related flags.
159
    const FLAG_OPERATOR_ARITHMETIC = 1;
160
    const FLAG_OPERATOR_LOGICAL = 2;
161
    const FLAG_OPERATOR_BITWISE = 4;
162
    const FLAG_OPERATOR_ASSIGNMENT = 8;
163
    const FLAG_OPERATOR_SQL = 16;
164
165
    // Symbols related flags.
166
    const FLAG_SYMBOL_VARIABLE = 1;
167
    const FLAG_SYMBOL_BACKTICK = 2;
168
    const FLAG_SYMBOL_USER = 4;
169
    const FLAG_SYMBOL_SYSTEM = 8;
170
171
    /**
172
     * The token it its raw string representation.
173
     *
174
     * @var string
175
     */
176
    public $token;
177
178
    /**
179
     * The value this token contains (i.e. token after some evaluation).
180
     *
181
     * @var mixed
182
     */
183
    public $value;
184
185
    /**
186
     * The type of this token.
187
     *
188
     * @var int
189
     */
190
    public $type;
191
192
    /**
193
     * The flags of this token.
194
     *
195
     * @var int
196
     */
197
    public $flags;
198
199
    /**
200
     * The position in the initial string where this token started.
201
     *
202
     * @var int
203
     */
204
    public $position;
205
206
    /**
207
     * Constructor.
208
     *
209
     * @param string $token the value of the token
210
     * @param int    $type  the type of the token
211
     * @param int    $flags the flags of the token
212
     */
213 361
    public function __construct($token, $type = 0, $flags = 0)
214
    {
215 361
        $this->token = $token;
216 361
        $this->type = $type;
217 361
        $this->flags = $flags;
218 361
        $this->value = $this->extract();
219 361
    }
220
221
    /**
222
     * Does little processing to the token to extract a value.
223
     *
224
     * If no processing can be done it will return the initial string.
225
     *
226
     * @return mixed
227
     */
228 361
    public function extract()
229
    {
230 361
        switch ($this->type) {
231 361
            case self::TYPE_KEYWORD:
232 323
                if (!($this->flags & self::FLAG_KEYWORD_RESERVED)) {
233
                    // Unreserved keywords should stay the way they are because they
234
                    // might represent field names.
235 140
                    return $this->token;
236
                }
237
238 316
                return strtoupper($this->token);
239 360
            case self::TYPE_WHITESPACE:
240 335
                return ' ';
241 359
            case self::TYPE_BOOL:
242 2
                return strtoupper($this->token) === 'TRUE';
243 358
            case self::TYPE_NUMBER:
244 165
                $ret = str_replace('--', '', $this->token); // e.g. ---42 === -42
245 165
                if ($this->flags & self::FLAG_NUMBER_HEX) {
246 2
                    if ($this->flags & self::FLAG_NUMBER_NEGATIVE) {
247 1
                        $ret = str_replace('-', '', $this->token);
248 1
                        sscanf($ret, '%x', $ret);
249 1
                        $ret = -$ret;
250 1
                    } else {
251 2
                        sscanf($ret, '%x', $ret);
252
                    }
253 165
                } elseif (($this->flags & self::FLAG_NUMBER_APPROXIMATE)
254 165
                || ($this->flags & self::FLAG_NUMBER_FLOAT)
255 165
                ) {
256 5
                    sscanf($ret, '%f', $ret);
257 5
                } else {
258 165
                    sscanf($ret, '%d', $ret);
259
                }
260
261 165
                return $ret;
262 357
            case self::TYPE_STRING:
263 141
                $quote = $this->token[0];
264 141
                $str = str_replace($quote . $quote, $quote, $this->token);
265
266 141
                return mb_substr($str, 1, -1, 'UTF-8'); // trims quotes
267 356
            case self::TYPE_SYMBOL:
268 90
                $str = $this->token;
269 90
                if ((isset($str[0])) && ($str[0] === '@')) {
270
                    // `mb_strlen($str)` must be used instead of `null` because
271
                    // in PHP 5.3- the `null` parameter isn't handled correctly.
272 22
                    $str = mb_substr(
273 22
                        $str,
274 22
                        ((!empty($str[1])) && ($str[1] === '@')) ? 2 : 1,
275 22
                        mb_strlen($str),
276
                        'UTF-8'
277 22
                    );
278 22
                }
279 90
                if ((isset($str[0])) && (($str[0] === '`')
280 18
                || ($str[0] === '"') || ($str[0] === '\''))
281 90
                ) {
282 81
                    $quote = $str[0];
283 81
                    $str = str_replace($quote . $quote, $quote, $str);
284 81
                    $str = mb_substr($str, 1, -1, 'UTF-8');
285 81
                }
286
287 90
                return $str;
288 355
        }
289
290 355
        return $this->token;
291
    }
292
293
    /**
294
     * Converts the token into an inline token by replacing tabs and new lines.
295
     *
296
     * @return string
297
     */
298 1
    public function getInlineToken()
299
    {
300 1
        return str_replace(
301 1
            array("\r", "\n", "\t"),
302 1
            array('\r', '\n', '\t'),
303 1
            $this->token
304 1
        );
305
    }
306
}
307