1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* This file is part of the fubhy/math-php package. |
5
|
|
|
* |
6
|
|
|
* (c) Sebastian Siemssen <[email protected]> |
7
|
|
|
* |
8
|
|
|
* For the full copyright and license information, please view the LICENSE |
9
|
|
|
* file that was distributed with this source code. |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace Fubhy\Math; |
13
|
|
|
|
14
|
|
|
use Fubhy\Math\Exception\IncorrectParenthesisException; |
15
|
|
|
use Fubhy\Math\Exception\IncorrectExpressionException; |
16
|
|
|
use Fubhy\Math\Exception\UnknownConstantException; |
17
|
|
|
use Fubhy\Math\Exception\UnknownFunctionException; |
18
|
|
|
use Fubhy\Math\Exception\UnknownOperatorException; |
19
|
|
|
use Fubhy\Math\Exception\UnknownTokenException; |
20
|
|
|
use Fubhy\Math\Token\ParenthesisCloseToken; |
21
|
|
|
use Fubhy\Math\Token\ParenthesisOpenToken; |
22
|
|
|
use Fubhy\Math\Token\CommaToken; |
23
|
|
|
use Fubhy\Math\Token\FunctionToken; |
24
|
|
|
use Fubhy\Math\Token\NumberToken; |
25
|
|
|
use Fubhy\Math\Token\Operator\OperatorTokenInterface; |
26
|
|
|
use Fubhy\Math\Token\VariableToken; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* Lexical analyzer for mathematical expressions. |
30
|
|
|
* |
31
|
|
|
* @author Sebastian Siemssen <[email protected]> |
32
|
|
|
*/ |
33
|
|
|
class Lexer |
34
|
|
|
{ |
35
|
|
|
/** |
36
|
|
|
* Static cache of the compiled regular expression. |
37
|
|
|
* |
38
|
|
|
* @var string |
39
|
|
|
*/ |
40
|
|
|
protected $compiledRegex; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* The list of registered operators. |
44
|
|
|
* |
45
|
|
|
* @var array |
46
|
|
|
*/ |
47
|
|
|
protected $operators = []; |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* The list of registered constants. |
51
|
|
|
* |
52
|
|
|
* @var array |
53
|
|
|
*/ |
54
|
|
|
protected $constants = []; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* The list of registered functions. |
58
|
|
|
* |
59
|
|
|
* @var array |
60
|
|
|
*/ |
61
|
|
|
protected $functions = []; |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* The list of parsed variables. |
65
|
|
|
* |
66
|
|
|
* @var array |
67
|
|
|
*/ |
68
|
|
|
protected $variables = []; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Registers a function with the lexer. |
72
|
|
|
* |
73
|
|
|
* @param string $name |
74
|
|
|
* The name of the function. |
75
|
|
|
* @param callable $function |
76
|
|
|
* The callback to be executed. |
77
|
|
|
* @param int $arguments |
78
|
|
|
* The number of arguments required for the function. |
79
|
|
|
* |
80
|
|
|
* @return $this |
81
|
|
|
*/ |
82
|
|
|
public function addFunction($name, callable $function, $arguments = 1) |
83
|
|
|
{ |
84
|
|
|
$this->functions[$name] = [$arguments, $function]; |
85
|
|
|
return $this; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Removes a function from the function registry. |
90
|
|
|
* |
91
|
|
|
* @param string $name |
92
|
|
|
* The name of the function to remove. |
93
|
|
|
* |
94
|
|
|
* @return $this |
95
|
|
|
*/ |
96
|
|
|
public function removeFunction($name) |
97
|
|
|
{ |
98
|
|
|
unset($this->functions[$name]); |
99
|
|
|
return $this; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* Registers an operator with the lexer. |
104
|
|
|
* |
105
|
|
|
* @param string $name |
106
|
|
|
* The name of the operator. |
107
|
|
|
* @param string $operator |
108
|
|
|
* The full qualified class name of the operator token. |
109
|
|
|
* |
110
|
|
|
* @return $this |
111
|
|
|
*/ |
112
|
|
|
public function addOperator($name, $operator) |
113
|
|
|
{ |
114
|
|
|
if (!is_subclass_of($operator, 'Fubhy\Math\Token\Operator\OperatorTokenInterface')) { |
115
|
|
|
throw new \InvalidArgumentException(); |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
// Clear the static cache when a new operator is added. |
119
|
|
|
unset($this->compiledRegex); |
120
|
|
|
|
121
|
|
|
$this->operators[$name] = $operator; |
122
|
|
|
return $this; |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* Removes an operator from the operator registry. |
127
|
|
|
* |
128
|
|
|
* @param string $name |
129
|
|
|
* The name of the operator to remove. |
130
|
|
|
* |
131
|
|
|
* @return $this |
132
|
|
|
*/ |
133
|
|
|
public function removeOperator($name) |
134
|
|
|
{ |
135
|
|
|
unset($this->operators[$name]); |
136
|
|
|
|
137
|
|
|
// Clear the static cache when an operator is removed. |
138
|
|
|
unset($this->compiledRegex); |
139
|
|
|
|
140
|
|
|
return $this; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Registers a constant with the lexer. |
145
|
|
|
* |
146
|
|
|
* @param string $name |
147
|
|
|
* The name of the constant. |
148
|
|
|
* @param int $value |
149
|
|
|
* The value of the constant. |
150
|
|
|
* |
151
|
|
|
* @return $this |
152
|
|
|
*/ |
153
|
|
|
public function addConstant($name, $value) |
154
|
|
|
{ |
155
|
|
|
$this->constants[$name] = $value; |
156
|
|
|
return $this; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Removes a constant from the constant registry. |
161
|
|
|
* |
162
|
|
|
* @param string $name |
163
|
|
|
* The name of the constant to remove. |
164
|
|
|
* |
165
|
|
|
* @return $this |
166
|
|
|
*/ |
167
|
|
|
public function removeConstant($name) |
168
|
|
|
{ |
169
|
|
|
unset($this->operators[$name]); |
170
|
|
|
return $this; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
/** |
174
|
|
|
* Generates a token stream from a mathematical expression. |
175
|
|
|
* |
176
|
|
|
* @param string $input |
177
|
|
|
* The mathematical expression to tokenize. |
178
|
|
|
* |
179
|
|
|
* @return array |
180
|
|
|
* The generated token stream. |
181
|
|
|
*/ |
182
|
|
|
public function tokenize($input) |
183
|
|
|
{ |
184
|
|
|
$matches = []; |
185
|
|
|
$regex = $this->getCompiledTokenRegex(); |
186
|
|
|
|
187
|
|
|
if (preg_match_all($regex, $input, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE) === FALSE) { |
188
|
|
|
// There was a failure when evaluating the regular expression. |
189
|
|
|
throw new \LogicException(); |
190
|
|
|
}; |
191
|
|
|
|
192
|
|
|
$types = [ |
193
|
|
|
'number', 'operator', 'function', 'open', 'close', 'comma', 'constant', 'variable', |
194
|
|
|
]; |
195
|
|
|
|
196
|
|
|
// Traverse over all matches and create the corresponding tokens. |
197
|
|
|
return array_map(function ($match) use ($types) { |
198
|
|
|
foreach ($types as $type) { |
199
|
|
|
if (!empty($match[$type][0])) { |
200
|
|
|
return $this->createToken($type, $match[$type][0], $match[$type][1], $match); |
201
|
|
|
} |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
// There was a match outside of one of the token types. |
205
|
|
|
throw new \LogicException(); |
206
|
|
|
}, $matches); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
/** |
210
|
|
|
* Reorganizes a list of tokens into reverse polish (postfix) notation. |
211
|
|
|
* |
212
|
|
|
* Uses an implementation of the Shunting-yard algorithm. |
213
|
|
|
* |
214
|
|
|
* http://en.wikipedia.org/wiki/Shunting-yard_algorithm |
215
|
|
|
* |
216
|
|
|
* @param \Fubhy\Math\Token\TokenInterface[] $tokens |
217
|
|
|
* The tokens to be reorganized into reverse polish (postfix) notation. |
218
|
|
|
* |
219
|
|
|
* @return \Fubhy\Math\Token\TokenInterface[] |
220
|
|
|
* The given tokens in reverse polish (postfix) notation. |
221
|
|
|
* |
222
|
|
|
* @throws \Fubhy\Math\Exception\IncorrectParenthesisException |
223
|
|
|
* @throws \Fubhy\Math\Exception\IncorrectExpressionException |
224
|
|
|
*/ |
225
|
|
|
public function postfix($tokens) |
226
|
|
|
{ |
227
|
|
|
$output = []; |
228
|
|
|
$stack = []; |
229
|
|
|
|
230
|
|
|
foreach ($tokens as $token) { |
231
|
|
|
if ($token instanceof NumberToken || $token instanceof VariableToken) { |
232
|
|
|
$output[] = $token; |
233
|
|
|
} |
234
|
|
|
elseif ($token instanceof FunctionToken) { |
235
|
|
|
array_push($stack, $token); |
236
|
|
|
} |
237
|
|
|
elseif ($token instanceof ParenthesisOpenToken) { |
238
|
|
|
array_push($stack, $token); |
239
|
|
|
} |
240
|
|
|
elseif ($token instanceof CommaToken) { |
241
|
|
View Code Duplication |
while (($current = array_pop($stack)) && (!$current instanceof ParenthesisOpenToken)) { |
|
|
|
|
242
|
|
|
$output[] = $current; |
243
|
|
|
|
244
|
|
|
if (empty($stack)) { |
245
|
|
|
throw new IncorrectExpressionException(); |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
} |
249
|
|
|
elseif ($token instanceof ParenthesisCloseToken) { |
250
|
|
|
while (($current = array_pop($stack)) && !($current instanceof ParenthesisOpenToken)) { |
251
|
|
|
$output[] = $current; |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
if (!empty($stack) && ($stack[count($stack) - 1] instanceof FunctionToken)) { |
255
|
|
|
$output[] = array_pop($stack); |
256
|
|
|
} |
257
|
|
|
} |
258
|
|
|
elseif ($token instanceof OperatorTokenInterface) { |
259
|
|
|
while (!empty($stack)) { |
260
|
|
|
$last = end($stack); |
261
|
|
|
if (!($last instanceof OperatorTokenInterface)) { |
262
|
|
|
break; |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
$associativity = $token->getAssociativity(); |
266
|
|
|
$precedence = $token->getPrecedence(); |
267
|
|
|
$last_precedence = $last->getPrecedence(); |
268
|
|
|
if (!( |
269
|
|
|
($associativity === OperatorTokenInterface::ASSOCIATIVITY_LEFT && $precedence <= $last_precedence) || |
270
|
|
|
($associativity === OperatorTokenInterface::ASSOCIATIVITY_RIGHT && $precedence < $last_precedence) |
271
|
|
|
)) { |
272
|
|
|
break; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
$output[] = array_pop($stack); |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
array_push($stack, $token); |
279
|
|
|
} |
280
|
|
|
} |
281
|
|
|
|
282
|
|
View Code Duplication |
while (!empty($stack)) { |
|
|
|
|
283
|
|
|
$token = array_pop($stack); |
284
|
|
|
if ($token instanceof ParenthesisOpenToken || $token instanceof ParenthesisCloseToken) { |
285
|
|
|
throw new IncorrectParenthesisException(); |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
$output[] = $token; |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
return $output; |
292
|
|
|
} |
293
|
|
|
|
294
|
|
|
/** |
295
|
|
|
* Creates a token object of the given type. |
296
|
|
|
* |
297
|
|
|
* @param string $type |
298
|
|
|
* The type of the token. |
299
|
|
|
* @param string $value |
300
|
|
|
* The matched string. |
301
|
|
|
* @param int $offset |
302
|
|
|
* The offset of the matched string. |
303
|
|
|
* @param $match |
304
|
|
|
* The full match as returned by preg_match_all(). |
305
|
|
|
* |
306
|
|
|
* @return \Fubhy\Math\Token\TokenInterface |
307
|
|
|
* The created token object. |
308
|
|
|
* |
309
|
|
|
* @throws \Fubhy\Math\Exception\UnknownConstantException |
310
|
|
|
* @throws \Fubhy\Math\Exception\UnknownFunctionException |
311
|
|
|
* @throws \Fubhy\Math\Exception\UnknownOperatorException |
312
|
|
|
* @throws \Fubhy\Math\Exception\UnknownTokenException |
313
|
|
|
*/ |
314
|
|
|
protected function createToken($type, $value, $offset, $match) |
315
|
|
|
{ |
316
|
|
|
switch ($type) { |
317
|
|
|
case 'number': |
318
|
|
|
return new NumberToken($offset, $value); |
319
|
|
|
|
320
|
|
|
case 'open': |
321
|
|
|
return new ParenthesisOpenToken($offset, $value); |
322
|
|
|
|
323
|
|
|
case 'close': |
324
|
|
|
return new ParenthesisCloseToken($offset, $value); |
325
|
|
|
|
326
|
|
|
case 'comma': |
327
|
|
|
return new CommaToken($offset, $value); |
328
|
|
|
|
329
|
|
|
case 'operator': |
330
|
|
|
foreach ($this->operators as $id => $operator) { |
331
|
|
|
if (!empty($match["op_$id"][0])) { |
332
|
|
|
return new $operator($offset, $value); |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
throw new UnknownOperatorException($offset, $value); |
336
|
|
|
|
337
|
|
|
case 'function': |
338
|
|
|
if (isset($this->functions[$value])) { |
339
|
|
|
return new FunctionToken($offset, $this->functions[$value]); |
340
|
|
|
} |
341
|
|
|
throw new UnknownFunctionException($offset, $value); |
342
|
|
|
|
343
|
|
|
case 'constant': |
344
|
|
|
$constant = substr($value, 1); |
345
|
|
|
if (isset($this->constants[$constant])) { |
346
|
|
|
return new NumberToken($offset, $this->constants[$constant]); |
347
|
|
|
} |
348
|
|
|
throw new UnknownConstantException($offset, $constant); |
349
|
|
|
|
350
|
|
|
case 'variable': |
351
|
|
|
$variable = substr($value, 1, -1); |
352
|
|
|
if (!in_array($variable, $this->variables)) { |
353
|
|
|
$this->variables[] = $variable; |
354
|
|
|
} |
355
|
|
|
return new VariableToken($offset, $variable); |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
throw new UnknownTokenException($offset, $value); |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
/** |
362
|
|
|
* Builds a concatenated regular expression for all available operators. |
363
|
|
|
* |
364
|
|
|
* @return string |
365
|
|
|
* The regular expression for matching all available operators. |
366
|
|
|
*/ |
367
|
|
|
protected function getOperatorRegex() |
368
|
|
|
{ |
369
|
|
|
$operators = []; |
370
|
|
|
foreach ($this->operators as $id => $operator) { |
371
|
|
|
$pattern = call_user_func([$operator, 'getRegexPattern']); |
372
|
|
|
$operators[] = "(?P<op_$id>{$pattern})"; |
373
|
|
|
} |
374
|
|
|
return implode('|', $operators); |
375
|
|
|
} |
376
|
|
|
|
377
|
|
|
/** |
378
|
|
|
* Compiles the regular expressions of all token types. |
379
|
|
|
* |
380
|
|
|
* @return string |
381
|
|
|
* The compiled regular expression. |
382
|
|
|
*/ |
383
|
|
|
protected function getCompiledTokenRegex() |
384
|
|
|
{ |
385
|
|
|
if (isset($this->compiledRegex)) { |
386
|
|
|
return $this->compiledRegex; |
387
|
|
|
} |
388
|
|
|
|
389
|
|
|
$regex = [ |
390
|
|
|
sprintf('(?P<number>%s)', '\-?\d+\.?\d*(E-?\d+)?'), |
391
|
|
|
sprintf('(?P<function>%s)', '[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*'), |
392
|
|
|
sprintf('(?P<constant>%s)', '\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*'), |
393
|
|
|
sprintf('(?P<variable>%s)', '\[[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\]'), |
394
|
|
|
sprintf('(?P<open>%s)', '\('), |
395
|
|
|
sprintf('(?P<close>%s)', '\)'), |
396
|
|
|
sprintf('(?P<comma>%s)', '\,'), |
397
|
|
|
sprintf('(?P<operator>%s)', $this->getOperatorRegex()), |
398
|
|
|
]; |
399
|
|
|
|
400
|
|
|
$regex = implode('|', $regex); |
401
|
|
|
return $this->compiledRegex = "/$regex/i"; |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
/** |
405
|
|
|
* Return the parsed variables identifiers. |
406
|
|
|
* |
407
|
|
|
* @return array |
408
|
|
|
*/ |
409
|
|
|
public function getVariables() |
410
|
|
|
{ |
411
|
|
|
return $this->variables; |
412
|
|
|
} |
413
|
|
|
|
414
|
|
|
} |
415
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.