Tokenizer::getRegexFunction()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 4
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
/**
3
 * Author: Nil Portugués Calderó <[email protected]>
4
 * Date: 6/26/14
5
 * Time: 12:10 AM.
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace NilPortugues\Sql\QueryFormatter\Tokenizer;
12
13
use NilPortugues\Sql\QueryFormatter\Helper\Token;
14
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Boundary;
15
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Comment;
16
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Numeral;
17
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Quoted;
18
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Reserved;
19
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\LiteralString;
20
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\UserDefined;
21
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\WhiteSpace;
22
23
/**
24
 * Class Tokenizer.
25
 */
26
class Tokenizer
27
{
28
    const TOKEN_TYPE_WHITESPACE = 0;
29
    const TOKEN_TYPE_WORD = 1;
30
    const TOKEN_TYPE_QUOTE = 2;
31
    const TOKEN_TYPE_BACK_TICK_QUOTE = 3;
32
    const TOKEN_TYPE_RESERVED = 4;
33
    const TOKEN_TYPE_RESERVED_TOP_LEVEL = 5;
34
    const TOKEN_TYPE_RESERVED_NEWLINE = 6;
35
    const TOKEN_TYPE_BOUNDARY = 7;
36
    const TOKEN_TYPE_COMMENT = 8;
37
    const TOKEN_TYPE_BLOCK_COMMENT = 9;
38
    const TOKEN_TYPE_NUMBER = 10;
39
    const TOKEN_TYPE_ERROR = 11;
40
    const TOKEN_TYPE_VARIABLE = 12;
41
    const TOKEN_TYPE = 0;
42
    const TOKEN_VALUE = 1;
43
44
    /**
45
     * @var string
46
     */
47
    protected $regexBoundaries;
48
49
    /**
50
     * @var string
51
     */
52
    protected $regexReserved;
53
54
    /**
55
     * @var string
56
     */
57
    protected $regexReservedNewLine;
58
59
    /**
60
     * @var string
61
     */
62
    protected $regexReservedTopLevel;
63
64
    /**
65
     * @var string
66
     */
67
    protected $regexFunction;
68
69
    /**
70
     * @var int
71
     */
72
    protected $maxCacheKeySize = 15;
73
74
    /**
75
     * @var array
76
     */
77
    protected $tokenCache = [];
78
79
    /**
80
     * @var array
81
     */
82
    protected $nextToken = [];
83
84
    /**
85
     * @var int
86
     */
87
    protected $currentStringLength = 0;
88
89
    /**
90
     * @var int
91
     */
92
    protected $oldStringLength = 0;
93
94
    /**
95
     * @var string
96
     */
97
    protected $previousToken = '';
98
99
    /**
100
     * @var int
101
     */
102
    protected $tokenLength = 0;
103
104
    /**
105
     * @var array
106
     */
107
    protected $tokens = [];
108
109
    /**
110
     * Builds all the regular expressions needed to Tokenize the input.
111
     */
112
    public function __construct()
113
    {
114
        $reservedMap = \array_combine(Token::$reserved, \array_map('strlen', Token::$reserved));
115
        \arsort($reservedMap);
116
        Token::$reserved = \array_keys($reservedMap);
117
118
        $this->regexFunction = $this->initRegex(Token::$functions);
119
        $this->regexBoundaries = $this->initRegex(Token::$boundaries);
120
        $this->regexReserved = $this->initRegex(Token::$reserved);
121
        $this->regexReservedTopLevel = \str_replace(' ', '\\s+', $this->initRegex(Token::$reservedTopLevel));
122
        $this->regexReservedNewLine = \str_replace(' ', '\\s+', $this->initRegex(Token::$reservedNewLine));
123
    }
124
125
    /**
126
     * @param $variable
127
     *
128
     * @return string
129
     */
130
    protected function initRegex($variable)
131
    {
132
        return '('.implode('|', \array_map(array($this, 'quoteRegex'), $variable)).')';
133
    }
134
135
    /**
136
     * Takes a SQL string and breaks it into tokens.
137
     * Each token is an associative array with type and value.
138
     *
139
     * @param string $string
140
     *
141
     * @return array
142
     */
143
    public function tokenize($string)
144
    {
145
        return (\strlen($string) > 0) ? $this->processTokens($string) : [];
146
    }
147
148
    /**
149
     * @param string $string
150
     *
151
     * @return array
152
     */
153
    protected function processTokens($string)
154
    {
155
        $this->tokens = [];
156
        $this->previousToken = '';
157
        $this->currentStringLength = \strlen($string);
158
        $this->oldStringLength = \strlen($string) + 1;
159
160
        while ($this->currentStringLength >= 0) {
161
            if ($this->oldStringLength <= $this->currentStringLength) {
162
                break;
163
            }
164
            $string = $this->processOneToken($string);
165
        }
166
167
        return $this->tokens;
168
    }
169
170
    /**
171
     * @param string $string
172
     *
173
     * @return string
174
     */
175
    protected function processOneToken($string)
176
    {
177
        $token = $this->getToken($string, $this->currentStringLength, $this->previousToken);
178
        $this->tokens[] = $token;
179
        $this->tokenLength = \strlen($token[self::TOKEN_VALUE]);
180
        $this->previousToken = $token;
181
182
        $this->oldStringLength = $this->currentStringLength;
183
        $this->currentStringLength -= $this->tokenLength;
184
185
        return \substr($string, $this->tokenLength);
186
    }
187
188
    /**
189
     * @param string $string
190
     * @param int    $currentStringLength
191
     * @param string string
192
     *
193
     * @return array|mixed
194
     */
195
    protected function getToken($string, $currentStringLength, $previousToken)
196
    {
197
        $cacheKey = $this->useTokenCache($string, $currentStringLength);
198
        if (!empty($cacheKey) && isset($this->tokenCache[$cacheKey])) {
199
            return $this->getNextTokenFromCache($cacheKey);
200
        }
201
202
        return $this->getNextTokenFromString($string, $previousToken, $cacheKey);
203
    }
204
205
    /**
206
     * @param string $string
207
     * @param int    $currentStringLength
208
     *
209
     * @return string
210
     */
211
    protected function useTokenCache($string, $currentStringLength)
212
    {
213
        $cacheKey = '';
214
215
        if ($currentStringLength >= $this->maxCacheKeySize) {
216
            $cacheKey = \substr($string, 0, $this->maxCacheKeySize);
217
        }
218
219
        return $cacheKey;
220
    }
221
222
    /**
223
     * @param string $cacheKey
224
     *
225
     * @return mixed
226
     */
227
    protected function getNextTokenFromCache($cacheKey)
228
    {
229
        return $this->tokenCache[$cacheKey];
230
    }
231
232
    /**
233
     * Get the next token and the token type and store it in cache.
234
     *
235
     * @param string $string
236
     * @param string $token
237
     * @param string $cacheKey
238
     *
239
     * @return array
240
     */
241
    protected function getNextTokenFromString($string, $token, $cacheKey)
242
    {
243
        $token = $this->parseNextToken($string, $token);
0 ignored issues
show
Documentation introduced by
$token is of type string, but the function expects a array|null.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
244
245
        if ($cacheKey && \strlen($token[self::TOKEN_VALUE]) < $this->maxCacheKeySize) {
246
            $this->tokenCache[$cacheKey] = $token;
247
        }
248
249
        return $token;
250
    }
251
252
    /**
253
     * Return the next token and token type in a SQL string.
254
     * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
255
     *
256
     * @param string $string   The SQL string
257
     * @param array  $previous The result of the previous parseNextToken() call
258
     *
259
     * @return array An associative array containing the type and value of the token.
260
     */
261
    protected function parseNextToken($string, $previous = null)
262
    {
263
        $matches = [];
264
        $this->nextToken = [];
265
266
        WhiteSpace::isWhiteSpace($this, $string, $matches);
267
        Comment::isComment($this, $string);
268
        Quoted::isQuoted($this, $string);
269
        UserDefined::isUserDefinedVariable($this, $string);
270
        Numeral::isNumeral($this, $string, $matches);
271
        Boundary::isBoundary($this, $string, $matches);
272
        Reserved::isReserved($this, $string, $previous);
273
        LiteralString::isFunction($this, $string, $matches);
274
        LiteralString::getNonReservedString($this, $string, $matches);
275
276
        return $this->nextToken;
277
    }
278
279
    /**
280
     * @return array
281
     */
282
    public function getNextToken()
283
    {
284
        return $this->nextToken;
285
    }
286
287
    /**
288
     * @param array $nextToken
289
     *
290
     * @return $this
291
     */
292
    public function setNextToken($nextToken)
293
    {
294
        $this->nextToken = $nextToken;
295
296
        return $this;
297
    }
298
299
    /**
300
     * @return string
301
     */
302
    public function getRegexBoundaries()
303
    {
304
        return $this->regexBoundaries;
305
    }
306
307
    /**
308
     * @return string
309
     */
310
    public function getRegexFunction()
311
    {
312
        return $this->regexFunction;
313
    }
314
315
    /**
316
     * @return string
317
     */
318
    public function getRegexReserved()
319
    {
320
        return $this->regexReserved;
321
    }
322
323
    /**
324
     * @return string
325
     */
326
    public function getRegexReservedNewLine()
327
    {
328
        return $this->regexReservedNewLine;
329
    }
330
331
    /**
332
     * @return string
333
     */
334
    public function getRegexReservedTopLevel()
335
    {
336
        return $this->regexReservedTopLevel;
337
    }
338
339
    /**
340
     * Helper function for building regular expressions for reserved words and boundary characters.
341
     *
342
     * @param string $string
343
     *
344
     * @return string
345
     */
346
    protected function quoteRegex($string)
347
    {
348
        return \preg_quote($string, '/');
349
    }
350
}
351