Tokenizer::tokenize()   F
last analyzed

Complexity

Conditions 44
Paths 58

Size

Total Lines 196
Code Lines 133

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 133
c 1
b 0
f 0
dl 0
loc 196
rs 3.3333
cc 44
nc 58
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Platine Expression
5
 *
6
 * Platine Expression is an expression parser, evaluator with support of custom
7
 * operators and functions
8
 *
9
 * This content is released under the MIT License (MIT)
10
 *
11
 * Copyright (c) 2020 Platine Expression
12
 * Copyright (c) Alexander Kiryukhin
13
 *
14
 * Permission is hereby granted, free of charge, to any person obtaining a copy
15
 * of this software and associated documentation files (the "Software"), to deal
16
 * in the Software without restriction, including without limitation the rights
17
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
 * copies of the Software, and to permit persons to whom the Software is
19
 * furnished to do so, subject to the following conditions:
20
 *
21
 * The above copyright notice and this permission notice shall be included in all
22
 * copies or substantial portions of the Software.
23
 *
24
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
 * SOFTWARE.
31
 */
32
33
/**
34
 * @file Tokenizer.php
35
 *
36
 * The Tokenizer class
37
 *
38
 *  @package    Platine\Expression
39
 *  @author Platine Developers Team
40
 *  @copyright  Copyright (c) 2020
41
 *  @license    http://opensource.org/licenses/MIT  MIT License
42
 *  @link   https://www.platine-php.com
43
 *  @version 1.0.0
44
 *  @filesource
45
 */
46
declare(strict_types=1);
47
48
namespace Platine\Expression;
49
50
use Platine\Expression\Exception\IncorrectBracketsException;
51
use Platine\Expression\Exception\UnknownOperatorException;
52
use RuntimeException;
53
use SplStack;
54
55
/**
56
 * @class Tokenizer
57
 * @package Platine\Expression
58
 */
59
class Tokenizer
60
{
61
    /**
62
     * List of token
63
     * @var Token[]
64
     */
65
    protected array $tokens = [];
66
67
    /**
68
     * The expression to evaluate
69
     * @var string
70
     */
71
    protected string $input = '';
72
73
    /**
74
     * The number buffering
75
     * @var string
76
     */
77
    protected string $numberBuffer = '';
78
79
    /**
80
     * The string buffering
81
     * @var string
82
     */
83
    protected string $stringBuffer = '';
84
85
    /**
86
     * Whether to allow negative or not
87
     * @var bool
88
     */
89
    protected bool $allowNegative = true;
90
91
    /**
92
     * The list of operator
93
     * @var array<string, Operator>
94
     */
95
    protected array $operators = [];
96
97
    /**
98
     * Whether the current pointer is inside single quoted string
99
     * @var bool
100
     */
101
    protected bool $inSingleQuotedString = false;
102
103
    /**
104
     * Whether the current pointer is inside double quoted string
105
     * @var bool
106
     */
107
    protected bool $inDoubleQuotedString = false;
108
109
    /**
110
     * Create new instance
111
     * @param string $input
112
     * @param array<string, Operator> $operators
113
     */
114
    public function __construct(string $input, array $operators)
115
    {
116
        $this->input = $input;
117
        $this->operators = $operators;
118
    }
119
120
    /**
121
     * Produce the tokens
122
     * @return $this
123
     */
124
    public function tokenize(): self
125
    {
126
        $isLastCharEscape = false;
127
        foreach (str_split($this->input) as $ch) {
128
            switch (true) {
129
                case $this->inSingleQuotedString:
130
                    if ($ch === '\\') {
131
                        if ($isLastCharEscape) {
132
                            $this->stringBuffer .= '\\';
133
                            $isLastCharEscape = false;
134
                        } else {
135
                            $isLastCharEscape = true;
136
                        }
137
138
                        continue 2;
139
                    } elseif ($ch === "'") {
140
                        if ($isLastCharEscape) {
141
                            $this->stringBuffer .= "'";
142
                            $isLastCharEscape = false;
143
                        } else {
144
                            $this->tokens[] = new Token(Token::STRING, $this->stringBuffer);
145
                            $this->inSingleQuotedString = false;
146
                            $this->stringBuffer = '';
147
                        }
148
149
                        continue 2;
150
                    }
151
152
                    if ($isLastCharEscape) {
153
                        $this->stringBuffer .= '\\';
154
                        $isLastCharEscape = false;
155
                    }
156
157
                    $this->stringBuffer .= $ch;
158
159
                    continue 2;
160
161
                case $this->inDoubleQuotedString:
162
                    if ($ch === '\\') {
163
                        if ($isLastCharEscape) {
164
                            $this->stringBuffer .= '\\';
165
                            $isLastCharEscape = false;
166
                        } else {
167
                            $isLastCharEscape = true;
168
                        }
169
170
                        continue 2;
171
                    } elseif ($ch === '"') {
172
                        if ($isLastCharEscape) {
173
                            $this->stringBuffer .= '"';
174
                            $isLastCharEscape = false;
175
                        } else {
176
                            $this->tokens[] = new Token(Token::STRING, $this->stringBuffer);
177
                            $this->inDoubleQuotedString = false;
178
                            $this->stringBuffer = '';
179
                        }
180
181
                        continue 2;
182
                    }
183
184
                    if ($isLastCharEscape) {
185
                        $this->stringBuffer .= '\\';
186
                        $isLastCharEscape = false;
187
                    }
188
189
                    $this->stringBuffer .= $ch;
190
191
                    continue 2;
192
193
                case $ch === '[':
194
                    $this->tokens[] = new Token(Token::FUNCTION, 'array');
195
                    $this->allowNegative = true;
196
                    $this->tokens[] = new Token(Token::LEFT_PARENTHESIS, '');
197
198
                    continue 2;
199
200
                case $ch == ' ' || $ch == "\n" || $ch == "\r" || $ch == "\t":
201
                    $this->emptyNumberBufferAsLiteral();
202
                    $this->emptyStringBufferAsVariable();
203
                    $this->tokens[] = new Token(Token::SPACE, '');
204
                    continue 2;
205
206
                case $this->isNumber($ch):
207
                    if ($this->stringBuffer != '') {
208
                        $this->stringBuffer .= $ch;
209
210
                        continue 2;
211
                    }
212
                    $this->numberBuffer .= $ch;
213
                    $this->allowNegative = false;
214
                    break;
215
216
                case strtolower($ch) === 'e':
217
                    if (strlen($this->numberBuffer) > 0 && strpos($this->numberBuffer, '.') !== false) {
218
                        $this->numberBuffer .= 'e';
219
                        $this->allowNegative = false;
220
221
                        break;
222
                    }
223
                // no break
224
                // Intentionally fall through
225
                case $this->isAlpha($ch):
226
                    if (strlen($this->numberBuffer) > 0) {
227
                        $this->emptyNumberBufferAsLiteral();
228
                        $this->tokens[] = new Token(Token::OPERATOR, '*');
229
                    }
230
                    $this->allowNegative = false;
231
                    $this->stringBuffer .= $ch;
232
233
                    break;
234
235
                case $ch === '"':
236
                    $this->inDoubleQuotedString = true;
237
238
                    continue 2;
239
240
                case $ch === "'":
241
                    $this->inSingleQuotedString = true;
242
243
                    continue 2;
244
245
                case $this->isDot($ch):
246
                    $this->numberBuffer .= $ch;
247
                    $this->allowNegative = false;
248
249
                    break;
250
251
                case $this->isLeftParenthesis($ch):
252
                    if ($this->stringBuffer != '') {
253
                        $this->tokens[] = new Token(Token::FUNCTION, $this->stringBuffer);
254
                        $this->stringBuffer = '';
255
                    } elseif (strlen($this->numberBuffer) > 0) {
256
                        $this->emptyNumberBufferAsLiteral();
257
                        $this->tokens[] = new Token(Token::OPERATOR, '*');
258
                    }
259
                    $this->allowNegative = true;
260
                    $this->tokens[] = new Token(Token::LEFT_PARENTHESIS, '');
261
262
                    break;
263
264
                case $this->isRightParenthesis($ch) || $ch === ']':
265
                    $this->emptyNumberBufferAsLiteral();
266
                    $this->emptyStringBufferAsVariable();
267
                    $this->allowNegative = false;
268
                    $this->tokens[] = new Token(Token::RIGHT_PARENTHESIS, '');
269
270
                    break;
271
272
                case $this->isComma($ch):
273
                    $this->emptyNumberBufferAsLiteral();
274
                    $this->emptyStringBufferAsVariable();
275
                    $this->allowNegative = true;
276
                    $this->tokens[] = new Token(Token::PARAM_SEPARATOR, '');
277
278
                    break;
279
280
                default:
281
                    // special case for unary operations
282
                    if ($ch == '-' || $ch == '+') {
283
                        if ($this->allowNegative) {
284
                            $this->allowNegative = false;
285
                            $this->tokens[] = new Token(Token::OPERATOR, $ch == '-' ? 'uNeg' : 'uPos');
286
287
                            continue 2;
288
                        }
289
                        // could be in exponent, in which case negative
290
                        // should be added to the number buffer
291
                        if ($this->numberBuffer && 'e' == $this->numberBuffer[strlen($this->numberBuffer) - 1]) {
292
                            $this->numberBuffer .= $ch;
293
294
                            continue 2;
295
                        }
296
                    }
297
                    $this->emptyNumberBufferAsLiteral();
298
                    $this->emptyStringBufferAsVariable();
299
300
                    if ($ch !== '$') {
301
                        if (count($this->tokens) > 0) {
302
                            if (Token::OPERATOR === $this->tokens[count($this->tokens) - 1]->getType()) {
303
                                $token = $this->tokens[count($this->tokens) - 1];
304
                                $this->tokens[count($this->tokens) - 1]
305
                                        ->setValue($token->getValue() . $ch);
306
                            } else {
307
                                $this->tokens[] = new Token(Token::OPERATOR, $ch);
308
                            }
309
                        } else {
310
                            $this->tokens[] = new Token(Token::OPERATOR, $ch);
311
                        }
312
                    }
313
                    $this->allowNegative = true;
314
            }
315
        }
316
        $this->emptyNumberBufferAsLiteral();
317
        $this->emptyStringBufferAsVariable();
318
319
        return $this;
320
    }
321
322
    /**
323
     * Build the reverse polish notation
324
     * @return Token[]
325
     */
326
    public function buildReversePolishNotation(): array
327
    {
328
        $tokens  = [];
329
        /** @var SplStack<Token> $stack */
330
        $stack = new SplStack();
331
332
        /** @var SplStack<int> $paramCounter */
333
        $paramCounter = new SplStack();
334
        foreach ($this->tokens as $token) {
335
            switch ($token->getType()) {
336
                case Token::LITERAL:
337
                case Token::VARIABLE:
338
                case Token::STRING:
339
                    $tokens[] = $token;
340
341
                    if ($paramCounter->count() > 0 && $paramCounter->top() === 0) {
342
                        $paramCounter->push($paramCounter->pop() + 1);
343
                    }
344
345
                    break;
346
347
                case Token::FUNCTION:
348
                    if ($paramCounter->count() > 0 && $paramCounter->top() === 0) {
349
                        $paramCounter->push($paramCounter->pop() + 1);
350
                    }
351
                    $stack->push($token);
352
                    $paramCounter->push(0);
353
354
                    break;
355
356
                case Token::LEFT_PARENTHESIS:
357
                    $stack->push($token);
358
359
                    break;
360
361
                case Token::PARAM_SEPARATOR:
362
                    while (Token::LEFT_PARENTHESIS !== $stack->top()->getType()) {
363
                        if ($stack->count() === 0) {
364
                            throw new IncorrectBracketsException('Incorrect brackets');
365
                        }
366
                        $tokens[] = $stack->pop();
367
                    }
368
369
                    $paramCounter->push($paramCounter->pop() + 1);
370
                    break;
371
372
                case Token::OPERATOR:
373
                    if (!array_key_exists($token->getValue(), $this->operators)) {
374
                        throw new UnknownOperatorException(sprintf(
375
                            'Unknown operator [%s]',
376
                            $token->getValue()
377
                        ));
378
                    }
379
                    $op1 = $this->operators[$token->getValue()];
380
                    while ($stack->count() > 0 && Token::OPERATOR === $stack->top()->getType()) {
381
                        if (!array_key_exists($stack->top()->getValue(), $this->operators)) {
382
                            throw new UnknownOperatorException(sprintf(
383
                                'Unknown operator [%s]',
384
                                $stack->top()->getValue()
385
                            ));
386
                        }
387
                        $op2 = $this->operators[$stack->top()->getValue()];
388
                        if ($op2->getPriority() >= $op1->getPriority()) {
389
                            $tokens[] = $stack->pop();
390
391
                            continue;
392
                        }
393
394
                        break;
395
                    }
396
                    $stack->push($token);
397
                    break;
398
399
                case Token::RIGHT_PARENTHESIS:
400
                    while (true) {
401
                        try {
402
                            $ctoken = $stack->pop();
403
                            if (Token::LEFT_PARENTHESIS === $ctoken->getType()) {
404
                                break;
405
                            }
406
                            $tokens[] = $ctoken;
407
                        } catch (RuntimeException $ex) {
408
                            throw new IncorrectBracketsException('Incorrect brackets');
409
                        }
410
                    }
411
                    if ($stack->count() > 0 && Token::FUNCTION === $stack->top()->getType()) {
412
                        /** @var Token $funcToken */
413
                        $funcToken = $stack->pop();
414
                        $funcToken->setParamCount($paramCounter->pop());
415
                        $tokens[] = $funcToken;
416
                    }
417
                    break;
418
                case Token::SPACE:
419
                    // do nothing
420
            }
421
        }
422
423
        while ($stack->count() !== 0) {
424
            if (
425
                in_array(
426
                    $stack->top()->getType(),
427
                    [Token::LEFT_PARENTHESIS, Token::RIGHT_PARENTHESIS]
428
                )
429
            ) {
430
                throw new IncorrectBracketsException('Incorrect brackets');
431
            }
432
433
            if (Token::SPACE === $stack->top()->getType()) {
434
                $stack->pop();
435
436
                continue;
437
            }
438
            $tokens[] = $stack->pop();
439
        }
440
441
        return $tokens;
442
    }
443
444
    /**
445
     * Put the current number buffer content to token
446
     * as literal
447
     * @return void
448
     */
449
    protected function emptyNumberBufferAsLiteral(): void
450
    {
451
        if (strlen($this->numberBuffer) > 0) {
452
            $this->tokens[] = new Token(Token::LITERAL, $this->numberBuffer);
453
            $this->numberBuffer = '';
454
        }
455
    }
456
457
    /**
458
     * Put the current string buffer content to token
459
     * as variable
460
     * @return void
461
     */
462
    protected function emptyStringBufferAsVariable(): void
463
    {
464
        if (strlen($this->stringBuffer) > 0) {
465
            $this->tokens[] = new Token(Token::VARIABLE, $this->stringBuffer);
466
            $this->stringBuffer = '';
467
        }
468
    }
469
470
    /**
471
     * Whether the give argument is number
472
     * @param string $chr
473
     * @return bool
474
     */
475
    protected function isNumber(string $chr): bool
476
    {
477
        return $chr >= '0' && $chr <= '9';
478
    }
479
480
    /**
481
     * Whether the give argument is alphabetic
482
     * @param string $chr
483
     * @return bool
484
     */
485
    protected function isAlpha(string $chr): bool
486
    {
487
        return $chr >= 'a' && $chr <= 'z' || $chr >= 'A' && $chr <= 'Z' || $chr === '_';
488
    }
489
490
    /**
491
     * Whether the give argument is dot
492
     * @param string $chr
493
     * @return bool
494
     */
495
    protected function isDot(string $chr): bool
496
    {
497
        return $chr === '.';
498
    }
499
500
    /**
501
     * Whether the give argument is left parenthesis
502
     * @param string $chr
503
     * @return bool
504
     */
505
    protected function isLeftParenthesis(string $chr): bool
506
    {
507
        return $chr === '(';
508
    }
509
510
    /**
511
     * Whether the give argument is right parenthesis
512
     * @param string $chr
513
     * @return bool
514
     */
515
    protected function isRightParenthesis(string $chr): bool
516
    {
517
        return $chr === ')';
518
    }
519
520
    /**
521
     * Whether the give argument is comma
522
     * @param string $chr
523
     * @return bool
524
     */
525
    protected function isComma(string $chr): bool
526
    {
527
        return $chr === ',';
528
    }
529
}
530