Lexer::addOperator()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 2
1
<?php
2
3
/*
4
 * This file is part of the fubhy/math-php package.
5
 *
6
 * (c) Sebastian Siemssen <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Fubhy\Math;
13
14
use Fubhy\Math\Exception\IncorrectParenthesisException;
15
use Fubhy\Math\Exception\IncorrectExpressionException;
16
use Fubhy\Math\Exception\UnknownConstantException;
17
use Fubhy\Math\Exception\UnknownFunctionException;
18
use Fubhy\Math\Exception\UnknownOperatorException;
19
use Fubhy\Math\Exception\UnknownTokenException;
20
use Fubhy\Math\Token\ParenthesisCloseToken;
21
use Fubhy\Math\Token\ParenthesisOpenToken;
22
use Fubhy\Math\Token\CommaToken;
23
use Fubhy\Math\Token\FunctionToken;
24
use Fubhy\Math\Token\NumberToken;
25
use Fubhy\Math\Token\Operator\OperatorTokenInterface;
26
use Fubhy\Math\Token\VariableToken;
27
28
/**
29
 * Lexical analyzer for mathematical expressions.
30
 *
31
 * @author Sebastian Siemssen <[email protected]>
32
 */
33
class Lexer
34
{
35
    /**
36
     * Static cache of the compiled regular expression.
37
     *
38
     * @var string
39
     */
40
    protected $compiledRegex;
41
42
    /**
43
     * The list of registered operators.
44
     *
45
     * @var array
46
     */
47
    protected $operators = [];
48
49
    /**
50
     * The list of registered constants.
51
     *
52
     * @var array
53
     */
54
    protected $constants = [];
55
56
    /**
57
     * The list of registered functions.
58
     *
59
     * @var array
60
     */
61
    protected $functions = [];
62
63
    /**
64
     * The list of parsed variables.
65
     * 
66
     * @var array
67
     */
68
    protected $variables = [];
69
70
    /**
71
     * Registers a function with the lexer.
72
     *
73
     * @param string $name
74
     *     The name of the function.
75
     * @param callable $function
76
     *     The callback to be executed.
77
     * @param int $arguments
78
     *     The number of arguments required for the function.
79
     *
80
     * @return $this
81
     */
82
    public function addFunction($name, callable $function, $arguments = 1)
83
    {
84
        $this->functions[$name] = [$arguments, $function];
85
        return $this;
86
    }
87
88
    /**
89
     * Removes a function from the function registry.
90
     *
91
     * @param string $name
92
     *   The name of the function to remove.
93
     *
94
     * @return $this
95
     */
96
    public function removeFunction($name)
97
    {
98
        unset($this->functions[$name]);
99
        return $this;
100
    }
101
102
    /**
103
     * Registers an operator with the lexer.
104
     *
105
     * @param string $name
106
     *     The name of the operator.
107
     * @param string $operator
108
     *     The full qualified class name of the operator token.
109
     *
110
     * @return $this
111
     */
112
    public function addOperator($name, $operator)
113
    {
114
        if (!is_subclass_of($operator, 'Fubhy\Math\Token\Operator\OperatorTokenInterface')) {
115
            throw new \InvalidArgumentException();
116
        }
117
118
        // Clear the static cache when a new operator is added.
119
        unset($this->compiledRegex);
120
121
        $this->operators[$name] = $operator;
122
        return $this;
123
    }
124
125
    /**
126
     * Removes an operator from the operator registry.
127
     *
128
     * @param string $name
129
     *   The name of the operator to remove.
130
     *
131
     * @return $this
132
     */
133
    public function removeOperator($name)
134
    {
135
        unset($this->operators[$name]);
136
137
        // Clear the static cache when an operator is removed.
138
        unset($this->compiledRegex);
139
140
        return $this;
141
    }
142
143
    /**
144
     * Registers a constant with the lexer.
145
     *
146
     * @param string $name
147
     *     The name of the constant.
148
     * @param int $value
149
     *     The value of the constant.
150
     *
151
     * @return $this
152
     */
153
    public function addConstant($name, $value)
154
    {
155
        $this->constants[$name] = $value;
156
        return $this;
157
    }
158
159
    /**
160
     * Removes a constant from the constant registry.
161
     *
162
     * @param string $name
163
     *   The name of the constant to remove.
164
     *
165
     * @return $this
166
     */
167
    public function removeConstant($name)
168
    {
169
        unset($this->operators[$name]);
170
        return $this;
171
    }
172
173
    /**
174
     * Generates a token stream from a mathematical expression.
175
     *
176
     * @param string $input
177
     *     The mathematical expression to tokenize.
178
     *
179
     * @return array
180
     *     The generated token stream.
181
     */
182
    public function tokenize($input)
183
    {
184
        $matches = [];
185
        $regex = $this->getCompiledTokenRegex();
186
187
        if (preg_match_all($regex, $input, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE) === FALSE) {
188
            // There was a failure when evaluating the regular expression.
189
            throw new \LogicException();
190
        };
191
192
        $types = [
193
            'number', 'operator', 'function', 'open', 'close', 'comma', 'constant', 'variable',
194
        ];
195
196
        // Traverse over all matches and create the corresponding tokens.
197
        return array_map(function ($match) use ($types) {
198
            foreach ($types as $type) {
199
                if (!empty($match[$type][0])) {
200
                    return $this->createToken($type, $match[$type][0], $match[$type][1], $match);
201
                }
202
            }
203
204
            // There was a match outside of one of the token types.
205
            throw new \LogicException();
206
        }, $matches);
207
    }
208
209
    /**
210
     * Reorganizes a list of tokens into reverse polish (postfix) notation.
211
     *
212
     * Uses an implementation of the Shunting-yard algorithm.
213
     *
214
     * http://en.wikipedia.org/wiki/Shunting-yard_algorithm
215
     *
216
     * @param \Fubhy\Math\Token\TokenInterface[] $tokens
217
     *     The tokens to be reorganized into reverse polish (postfix) notation.
218
     *
219
     * @return \Fubhy\Math\Token\TokenInterface[]
220
     *     The given tokens in reverse polish (postfix) notation.
221
     *
222
     * @throws \Fubhy\Math\Exception\IncorrectParenthesisException
223
     * @throws \Fubhy\Math\Exception\IncorrectExpressionException
224
     */
225
    public function postfix($tokens)
226
    {
227
        $output = [];
228
        $stack = [];
229
230
        foreach ($tokens as $token) {
231
            if ($token instanceof NumberToken || $token instanceof VariableToken) {
232
                $output[] = $token;
233
            }
234
            elseif ($token instanceof FunctionToken) {
235
                array_push($stack, $token);
236
            }
237
            elseif ($token instanceof ParenthesisOpenToken) {
238
                array_push($stack, $token);
239
            }
240
            elseif ($token instanceof CommaToken) {
241 View Code Duplication
                while (($current = array_pop($stack)) && (!$current instanceof ParenthesisOpenToken)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
242
                    $output[] = $current;
243
244
                    if (empty($stack)) {
245
                        throw new IncorrectExpressionException();
246
                    }
247
                }
248
            }
249
            elseif ($token instanceof ParenthesisCloseToken) {
250
                while (($current = array_pop($stack)) && !($current instanceof ParenthesisOpenToken)) {
251
                    $output[] = $current;
252
                }
253
254
                if (!empty($stack) && ($stack[count($stack) - 1] instanceof FunctionToken)) {
255
                    $output[] = array_pop($stack);
256
                }
257
            }
258
            elseif ($token instanceof OperatorTokenInterface) {
259
                while (!empty($stack)) {
260
                    $last = end($stack);
261
                    if (!($last instanceof OperatorTokenInterface)) {
262
                        break;
263
                    }
264
265
                    $associativity = $token->getAssociativity();
266
                    $precedence = $token->getPrecedence();
267
                    $last_precedence = $last->getPrecedence();
268
                    if (!(
269
                        ($associativity === OperatorTokenInterface::ASSOCIATIVITY_LEFT && $precedence <= $last_precedence) ||
270
                        ($associativity === OperatorTokenInterface::ASSOCIATIVITY_RIGHT && $precedence < $last_precedence)
271
                    )) {
272
                        break;
273
                    }
274
275
                    $output[] = array_pop($stack);
276
                }
277
278
                array_push($stack, $token);
279
            }
280
        }
281
282 View Code Duplication
        while (!empty($stack)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
283
            $token = array_pop($stack);
284
            if ($token instanceof ParenthesisOpenToken || $token instanceof ParenthesisCloseToken) {
285
                throw new IncorrectParenthesisException();
286
            }
287
288
            $output[] = $token;
289
        }
290
291
        return $output;
292
    }
293
294
    /**
295
     * Creates a token object of the given type.
296
     *
297
     * @param string $type
298
     *     The type of the token.
299
     * @param string $value
300
     *     The matched string.
301
     * @param int $offset
302
     *     The offset of the matched string.
303
     * @param $match
304
     *     The full match as returned by preg_match_all().
305
     *
306
     * @return \Fubhy\Math\Token\TokenInterface
307
     *     The created token object.
308
     *
309
     * @throws \Fubhy\Math\Exception\UnknownConstantException
310
     * @throws \Fubhy\Math\Exception\UnknownFunctionException
311
     * @throws \Fubhy\Math\Exception\UnknownOperatorException
312
     * @throws \Fubhy\Math\Exception\UnknownTokenException
313
     */
314
    protected function createToken($type, $value, $offset, $match)
315
    {
316
        switch ($type) {
317
            case 'number':
318
                return new NumberToken($offset, $value);
319
320
            case 'open':
321
                return new ParenthesisOpenToken($offset, $value);
322
323
            case 'close':
324
                return new ParenthesisCloseToken($offset, $value);
325
326
            case 'comma':
327
                return new CommaToken($offset, $value);
328
329
            case 'operator':
330
                foreach ($this->operators as $id => $operator) {
331
                    if (!empty($match["op_$id"][0])) {
332
                        return new $operator($offset, $value);
333
                    }
334
                }
335
                throw new UnknownOperatorException($offset, $value);
336
337
            case 'function':
338
                if (isset($this->functions[$value])) {
339
                    return new FunctionToken($offset, $this->functions[$value]);
340
                }
341
                throw new UnknownFunctionException($offset, $value);
342
343
            case 'constant':
344
                $constant = substr($value, 1);
345
                if (isset($this->constants[$constant])) {
346
                    return new NumberToken($offset, $this->constants[$constant]);
347
                }
348
                throw new UnknownConstantException($offset, $constant);
349
350
            case 'variable':
351
                $variable = substr($value, 1, -1);
352
                if (!in_array($variable, $this->variables)) {
353
                    $this->variables[] = $variable;
354
                }
355
                return new VariableToken($offset, $variable);
356
        }
357
358
        throw new UnknownTokenException($offset, $value);
359
    }
360
361
    /**
362
     * Builds a concatenated regular expression for all available operators.
363
     *
364
     * @return string
365
     *     The regular expression for matching all available operators.
366
     */
367
    protected function getOperatorRegex()
368
    {
369
        $operators = [];
370
        foreach ($this->operators as $id => $operator) {
371
            $pattern = call_user_func([$operator, 'getRegexPattern']);
372
            $operators[] = "(?P<op_$id>{$pattern})";
373
        }
374
        return implode('|', $operators);
375
    }
376
377
    /**
378
     * Compiles the regular expressions of all token types.
379
     *
380
     * @return string
381
     *     The compiled regular expression.
382
     */
383
    protected function getCompiledTokenRegex()
384
    {
385
        if (isset($this->compiledRegex)) {
386
            return $this->compiledRegex;
387
        }
388
389
        $regex = [
390
            sprintf('(?P<number>%s)', '\-?\d+\.?\d*(E-?\d+)?'),
391
            sprintf('(?P<function>%s)', '[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*'),
392
            sprintf('(?P<constant>%s)', '\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*'),
393
            sprintf('(?P<variable>%s)', '\[[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\]'),
394
            sprintf('(?P<open>%s)', '\('),
395
            sprintf('(?P<close>%s)', '\)'),
396
            sprintf('(?P<comma>%s)', '\,'),
397
            sprintf('(?P<operator>%s)', $this->getOperatorRegex()),
398
        ];
399
400
        $regex = implode('|', $regex);
401
        return $this->compiledRegex = "/$regex/i";
402
    }
403
404
    /**
405
     * Return the parsed variables identifiers.
406
     * 
407
     * @return array
408
     */
409
    public function getVariables()
410
    {
411
        return $this->variables;
412
    }
413
414
}
415