1 | <?php |
||
15 | final class Tokenizer implements TokenizerInterface |
||
16 | { |
||
17 | const TOKEN_AND = 'And'; |
||
18 | const TOKEN_OR = 'Or'; |
||
19 | const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict'; |
||
20 | const TOKEN_NOT_EQUAL = 'NotEqual'; |
||
21 | const TOKEN_EQUAL_STRICT = 'EqualStrict'; |
||
22 | const TOKEN_EQUAL = 'Equal'; |
||
23 | const TOKEN_IN = 'In'; |
||
24 | const TOKEN_BOOL = 'Bool'; |
||
25 | const TOKEN_NULL = 'Null'; |
||
26 | const TOKEN_METHOD = 'Method'; |
||
27 | const TOKEN_FUNCTION = 'Function'; |
||
28 | const TOKEN_VARIABLE = 'Variable'; |
||
29 | const TOKEN_FLOAT = 'Float'; |
||
30 | const TOKEN_INTEGER = 'Integer'; |
||
31 | const TOKEN_ENCAPSED_STRING = 'EncapsedString'; |
||
32 | const TOKEN_SMALLER_EQUAL = 'SmallerEqual'; |
||
33 | const TOKEN_GREATER_EQUAL = 'GreaterEqual'; |
||
34 | const TOKEN_SMALLER = 'Smaller'; |
||
35 | const TOKEN_GREATER = 'Greater'; |
||
36 | const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses'; |
||
37 | const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses'; |
||
38 | const TOKEN_OPENING_ARRAY = 'OpeningArray'; |
||
39 | const TOKEN_CLOSING_ARRAY = 'ClosingArray'; |
||
40 | const TOKEN_COMMA = 'Comma'; |
||
41 | const TOKEN_REGEX = 'Regex'; |
||
42 | const TOKEN_COMMENT = 'Comment'; |
||
43 | const TOKEN_NEWLINE = 'Newline'; |
||
44 | const TOKEN_SPACE = 'Space'; |
||
45 | const TOKEN_UNKNOWN = 'Unknown'; |
||
46 | |||
47 | private $internalTokens = []; |
||
48 | |||
49 | private $regex = ''; |
||
50 | |||
51 | private $regexRequiresReassembly = false; |
||
52 | |||
53 | 238 | public function __construct() |
|
54 | { |
||
55 | 238 | $this->registerToken(self::TOKEN_AND, '&&', 145); |
|
56 | 238 | $this->registerToken(self::TOKEN_OR, '\|\|', 140); |
|
57 | 238 | $this->registerToken(self::TOKEN_NOT_EQUAL_STRICT, '!==', 135); |
|
58 | 238 | $this->registerToken(self::TOKEN_NOT_EQUAL, '<>|!=', 130); |
|
59 | 238 | $this->registerToken(self::TOKEN_EQUAL_STRICT, '===', 125); |
|
60 | 238 | $this->registerToken(self::TOKEN_EQUAL, '==', 120); |
|
61 | 238 | $this->registerToken(self::TOKEN_IN, '\bin\b', 115); |
|
62 | 238 | $this->registerToken(self::TOKEN_BOOL, '\b(?:true|false)\b', 110); |
|
63 | 238 | $this->registerToken(self::TOKEN_NULL, '\bnull\b', 105); |
|
64 | 238 | $this->registerToken(self::TOKEN_METHOD, '\.\s*[a-zA-Z_]\w*\s*\(', 100); |
|
65 | 238 | $this->registerToken(self::TOKEN_FUNCTION, '[a-zA-Z_]\w*\s*\(', 95); |
|
66 | 238 | $this->registerToken(self::TOKEN_FLOAT, '-?\d+(?:\.\d+)', 90); |
|
67 | 238 | $this->registerToken(self::TOKEN_INTEGER, '-?\d+', 85); |
|
68 | 238 | $this->registerToken(self::TOKEN_ENCAPSED_STRING, '"[^"]*"|\'[^\']*\'', 80); |
|
69 | 238 | $this->registerToken(self::TOKEN_SMALLER_EQUAL, '<=', 75); |
|
70 | 238 | $this->registerToken(self::TOKEN_GREATER_EQUAL, '>=', 70); |
|
71 | 238 | $this->registerToken(self::TOKEN_SMALLER, '<', 65); |
|
72 | 238 | $this->registerToken(self::TOKEN_GREATER, '>', 60); |
|
73 | 238 | $this->registerToken(self::TOKEN_OPENING_PARENTHESIS, '\(', 55); |
|
74 | 238 | $this->registerToken(self::TOKEN_CLOSING_PARENTHESIS, '\)', 50); |
|
75 | 238 | $this->registerToken(self::TOKEN_OPENING_ARRAY, '\[', 45); |
|
76 | 238 | $this->registerToken(self::TOKEN_CLOSING_ARRAY, '\]', 40); |
|
77 | 238 | $this->registerToken(self::TOKEN_COMMA, ',', 35); |
|
78 | 238 | $this->registerToken(self::TOKEN_REGEX, '/[^/\*].*/[igm]{0,3}', 30); |
|
79 | 238 | $this->registerToken(self::TOKEN_COMMENT, '//[^\r\n]*|/\*.*?\*/', 25); |
|
80 | 238 | $this->registerToken(self::TOKEN_NEWLINE, '\r?\n', 20); |
|
81 | 238 | $this->registerToken(self::TOKEN_SPACE, '\s+', 15); |
|
82 | 238 | $this->registerToken(self::TOKEN_VARIABLE, '[a-zA-Z_]\w*', 10); |
|
83 | 238 | $this->registerToken(self::TOKEN_UNKNOWN, '.', 5); |
|
84 | 238 | } |
|
85 | |||
86 | |||
87 | 228 | public function tokenize(string $string) : Stack |
|
88 | { |
||
89 | 228 | $stack = new Stack(); |
|
90 | 228 | $regex = $this->getRegex(); |
|
91 | 228 | $baseNameSpace = __NAMESPACE__ . '\\Tokens\\Token'; |
|
92 | 228 | $offset = 0; |
|
93 | |||
94 | 228 | while (preg_match($regex, $string, $matches, 0, $offset)) { |
|
95 | 226 | $token = $this->getMatchedToken($matches); |
|
96 | 226 | $className = $baseNameSpace . $token; |
|
97 | |||
98 | 226 | $stack->attach(new $className( |
|
99 | 226 | $matches[$token], |
|
100 | $offset, |
||
101 | $stack |
||
102 | )); |
||
103 | |||
104 | 226 | $offset += strlen($matches[0]); |
|
105 | } |
||
106 | |||
107 | 228 | return $stack; |
|
108 | } |
||
109 | |||
110 | 238 | public function registerToken(string $class, string $regex, int $priority = null) |
|
111 | { |
||
112 | 238 | $token = new stdClass(); |
|
113 | 238 | $token->class = $class; |
|
114 | 238 | $token->regex = $regex; |
|
115 | 238 | $token->priority = $priority ?? $this->getPriority($class); |
|
116 | |||
117 | 238 | $this->internalTokens[$class] = $token; |
|
118 | 238 | $this->regexRequiresReassembly = true; |
|
119 | 238 | } |
|
120 | |||
121 | 228 | private function getMatchedToken(array $matches) : string |
|
122 | { |
||
123 | 228 | foreach ($matches as $key => $value) { |
|
124 | 226 | if ($value !== '' && !is_int($key)) { |
|
125 | 226 | return $key; |
|
126 | } |
||
127 | } |
||
128 | |||
129 | 2 | return 'Unknown'; |
|
130 | } |
||
131 | |||
132 | |||
133 | 228 | private function getRegex() : string |
|
134 | { |
||
135 | 228 | if (!$this->regex || $this->regexRequiresReassembly) { |
|
136 | 228 | $regex = []; |
|
137 | |||
138 | 228 | foreach ($this->getQueue() as $token) { |
|
139 | 228 | $regex[] = "(?<$token->class>$token->regex)"; |
|
140 | } |
||
141 | |||
142 | 228 | $this->regex = sprintf('~(%s)~As', implode('|', $regex)); |
|
143 | 228 | $this->regexRequiresReassembly = false; |
|
144 | } |
||
145 | |||
146 | 228 | return $this->regex; |
|
147 | } |
||
148 | |||
149 | |||
150 | 228 | private function getQueue() : SplPriorityQueue |
|
160 | |||
161 | private function getPriority(string $class) : int |
||
162 | { |
||
163 | return $this->internalTokens[$class]->priority ?? 10; |
||
164 | } |
||
165 | } |
||
166 |