1 | <?php |
||
2 | |||
3 | namespace Bavix\Lexer; |
||
4 | |||
5 | use Bavix\Exceptions; |
||
6 | |||
7 | class Lexer |
||
8 | { |
||
9 | |||
10 | const RAW = 1; |
||
11 | const OPERATOR = 2; |
||
12 | const PRINTER = 4; |
||
13 | const LITERAL = 8; |
||
14 | |||
15 | /** |
||
16 | * @var string |
||
17 | */ |
||
18 | protected $openLiteralRegExp = "\{%[ \t\n\r \v]*literal[ \t\n\r \v]*%\}"; |
||
19 | |||
20 | /** |
||
21 | * @var string |
||
22 | */ |
||
23 | protected $closeLiteralRegExp = "\{%[ \t\n\r \v]*endliteral[ \t\n\r \v]*%\}"; |
||
24 | |||
25 | /** |
||
26 | * @var array |
||
27 | */ |
||
28 | protected $literals = []; |
||
29 | |||
30 | /** |
||
31 | * @var array |
||
32 | */ |
||
33 | protected $prints = [ |
||
34 | self::OPERATOR => false, |
||
35 | self::RAW => true, |
||
36 | self::PRINTER => true, |
||
37 | ]; |
||
38 | |||
39 | /** |
||
40 | * @var array |
||
41 | */ |
||
42 | protected $escaping = [ |
||
43 | self::OPERATOR => false, |
||
44 | self::RAW => false, |
||
45 | self::PRINTER => true, |
||
46 | ]; |
||
47 | |||
48 | /** |
||
49 | * @var array |
||
50 | */ |
||
51 | protected $phpTags = [ |
||
52 | '<?php' => '<!--', |
||
53 | '<?=' => '<!--', |
||
54 | '<?' => '<!--', |
||
55 | '?>' => '-->', |
||
56 | ]; |
||
57 | |||
58 | /** |
||
59 | * @param $last |
||
60 | * @param $data |
||
61 | * @param string $equal |
||
62 | * @return bool |
||
63 | */ |
||
64 | 25 | protected function last($last, $data, $equal = '.'): bool |
|
65 | { |
||
66 | return |
||
67 | // last exists |
||
68 | 25 | $last && |
|
69 | |||
70 | // if exists then type is string? |
||
71 | 25 | in_array($last->type, [\T_STRING, \T_VARIABLE], true) && |
|
72 | |||
73 | // if type is string then data is '('? |
||
74 | 25 | $data === $equal && |
|
75 | |||
76 | // if true then token is variable ? |
||
77 | 25 | preg_match('~[a-z_]+~i', $last->token); |
|
78 | } |
||
79 | |||
80 | /** |
||
81 | * @param array $tokens |
||
82 | * @return array |
||
83 | */ |
||
84 | 32 | protected function analysis(array $tokens): array |
|
85 | { |
||
86 | 32 | $queue = new Queue($tokens); |
|
87 | 32 | $queue->pop(); // remove open <?php |
|
88 | |||
89 | $open = [ |
||
90 | // open |
||
91 | 32 | '{!' => self::RAW, |
|
92 | 32 | '{%' => self::OPERATOR, |
|
93 | 32 | '{{' => self::PRINTER, |
|
94 | ]; |
||
95 | |||
96 | $close = [ |
||
97 | // close |
||
98 | 32 | '!}' => self::RAW, |
|
99 | 32 | '%}' => self::OPERATOR, |
|
100 | 32 | '}}' => self::PRINTER, |
|
101 | ]; |
||
102 | |||
103 | 32 | $begin = array_flip($open); |
|
104 | |||
105 | $end = [ |
||
106 | 32 | self::RAW => '!', |
|
107 | 32 | self::OPERATOR => '%', |
|
108 | 32 | self::PRINTER => '}', |
|
109 | ]; |
||
110 | |||
111 | $storage = [ |
||
112 | 32 | self::RAW => [], |
|
113 | 32 | self::OPERATOR => [], |
|
114 | 32 | self::PRINTER => [], |
|
115 | ]; |
||
116 | |||
117 | 32 | $isOpen = false; |
|
118 | 32 | $iterate = 0; |
|
119 | 32 | $anyType = null; |
|
120 | 32 | $lastChar = null; |
|
121 | 32 | $type = null; |
|
122 | 32 | $mixed = []; |
|
123 | 32 | $last = null; |
|
124 | 32 | $dot = null; |
|
125 | 32 | $code = ''; |
|
126 | 32 | $print = null; |
|
127 | |||
128 | 32 | while (!$queue->isEmpty()) { |
|
129 | 32 | $read = $queue->pop(); |
|
130 | |||
131 | 32 | $_type = Validator::getValue($read); |
|
132 | 32 | $data = $read[1] ?? $read; |
|
133 | |||
134 | 32 | if ($_type === \T_OPEN_TAG || $_type === \T_OPEN_TAG_WITH_ECHO || $_type === \T_CLOSE_TAG) { |
|
135 | 3 | continue; |
|
136 | } |
||
137 | |||
138 | 32 | if ($type && $_type === \T_INLINE_HTML) { |
|
139 | 3 | $lvl = 1; |
|
140 | 3 | $rEnd = $data; |
|
141 | |||
142 | do { |
||
143 | 3 | $read = $queue->pop(); |
|
144 | |||
145 | 3 | $_type = Validator::getValue($read); |
|
146 | 3 | $_data = $read[1] ?? $read; |
|
147 | |||
148 | 3 | if ($_type === \T_OPEN_TAG || $_type === \T_OPEN_TAG_WITH_ECHO || $_type === \T_CLOSE_TAG) { |
|
149 | 3 | continue; |
|
150 | } |
||
151 | |||
152 | 3 | if ($_type === \T_NS_SEPARATOR) { |
|
153 | $lvl++; |
||
154 | } |
||
155 | |||
156 | 3 | if ($_data === $rEnd) { |
|
157 | $lvl--; |
||
158 | } |
||
159 | |||
160 | 3 | $data .= $_data; |
|
161 | |||
162 | 3 | if ($queue->isEmpty()) { |
|
163 | 3 | throw new \ParseError('Error code `' . $code . $data . '`'); |
|
164 | } |
||
165 | |||
166 | 3 | } while ($lvl); |
|
167 | } |
||
168 | |||
169 | 32 | if ($_type === \T_STRING) { |
|
170 | 32 | $isVar = preg_match('~[a-z_]+[\w_]*~i', $data); |
|
171 | |||
172 | 32 | $_type = Validator::getType($data, $isVar ? \T_VARIABLE : \T_STRING, $type); |
|
173 | |||
174 | 32 | if (\defined($data)) { |
|
175 | $_type = Validator::T_CONSTANT; |
||
176 | } |
||
177 | |||
178 | 32 | if ($isVar && !empty($mixed)) { |
|
179 | 3 | $mix = current($mixed); |
|
180 | |||
181 | 3 | if ($mix->type === \T_FOR && $data === 'in') { |
|
182 | 1 | $_type = Validator::get('T_FOR_IN'); |
|
183 | } |
||
184 | } |
||
185 | } |
||
186 | |||
187 | // $i++, --$i, $i += 1, $i.=1... |
||
188 | 32 | $print = $print && !in_array($_type, [ |
|
0 ignored issues
–
show
introduced
by
![]() |
|||
189 | 30 | \T_INC, // i++, ++i |
|
190 | \T_DEC, // i--, --i |
||
191 | \T_PLUS_EQUAL, // i+=1 |
||
192 | \T_MINUS_EQUAL, // i-=1 |
||
193 | \T_MUL_EQUAL, // i*=1 |
||
194 | \T_DIV_EQUAL, // i/=1 |
||
195 | \T_CONCAT_EQUAL, // i.=1 |
||
196 | \T_SR_EQUAL, // i >>= 1 |
||
197 | \T_SL_EQUAL, // i <<= 1 |
||
198 | \T_XOR_EQUAL, // i^=1 |
||
199 | \T_OR_EQUAL, // i|=1 |
||
200 | \T_AND_EQUAL, // i&=1 |
||
201 | \T_MOD_EQUAL, // i%=1 |
||
202 | 32 | ], true); |
|
203 | |||
204 | 32 | $code .= $data; |
|
205 | |||
206 | 32 | if ($dot && $anyType === \T_WHITESPACE) { |
|
207 | 1 | throw new Exceptions\Runtime('Undefined dot `' . implode(' ', $mixed) . ' ' . $data . '`'); |
|
208 | } |
||
209 | |||
210 | 32 | if ($_type === \T_WHITESPACE) { |
|
211 | 29 | $lastChar = $data; |
|
212 | 29 | $anyType = $_type; |
|
213 | 29 | continue; |
|
214 | } |
||
215 | |||
216 | 32 | $anyType = $_type; |
|
217 | |||
218 | 32 | if (!$type && $data === '{' && $code !== '{{') { |
|
219 | 31 | $code = $data; |
|
220 | } |
||
221 | |||
222 | 32 | $index = $lastChar . $data; |
|
223 | |||
224 | 32 | if ((!$isOpen && isset($open[$index]) && $type) || (isset($close[$index]) && !$type)) { |
|
225 | 3 | throw new Exceptions\Logic('Syntax error `' . $lastChar . $data . '`'); |
|
226 | } |
||
227 | |||
228 | 32 | if (!$isOpen && isset($open[$index])) { |
|
229 | 31 | if ($dot) { |
|
230 | throw new Exceptions\Runtime('Undefined dot'); |
||
231 | } |
||
232 | |||
233 | 31 | $isOpen = true; |
|
234 | 31 | $type = $open[$lastChar . $data]; |
|
235 | 31 | $print = $this->prints[$type]; |
|
236 | 32 | } else if (isset($close[$index])) { |
|
237 | 27 | if ($dot) { |
|
238 | throw new Exceptions\Runtime('Undefined dot `' . \implode(' ', $mixed) . '`'); |
||
239 | } |
||
240 | |||
241 | 27 | if ($type !== $close[$lastChar . $data]) { |
|
242 | 3 | throw new Exceptions\Runtime( |
|
243 | 3 | 'Undefined syntax code `' . $begin[$type] . ' ' . \implode(' ', $mixed) . $data . '`'); |
|
244 | } |
||
245 | |||
246 | 24 | if (empty($mixed)) { |
|
247 | 3 | throw new Exceptions\Blank('Empty tokens `' . $code . '`'); |
|
248 | } |
||
249 | |||
250 | 21 | $token = current($mixed); |
|
251 | 21 | $name = $token->name; |
|
252 | 21 | $fragment = \preg_replace('~[ \t\n\r\v]{2,}~', ' ', $code); |
|
253 | |||
254 | 21 | $storage[$type][] = [ |
|
255 | 21 | 'type' => $type, |
|
256 | 21 | 'print' => $print, |
|
257 | 21 | 'escape' => $this->escaping[$type], |
|
258 | 21 | 'name' => $name, |
|
259 | 21 | 'code' => $code, |
|
260 | 21 | 'fragment' => \trim(\mb_substr($fragment, 2, -2)), |
|
261 | 21 | 'tokens' => $mixed |
|
262 | ]; |
||
263 | |||
264 | 21 | $isOpen = false; |
|
265 | 21 | $mixed = []; |
|
266 | 21 | $type = null; |
|
267 | 21 | $last = null; |
|
268 | 21 | $code = ''; |
|
269 | 32 | } else if ($type) { |
|
270 | 28 | if ($end[$type] !== $data) { |
|
271 | 25 | if ($this->last($last, $data, '(')) { |
|
272 | $last->type = \T_FUNCTION; |
||
273 | 25 | } else if ($this->last($last, $data, '.') || $dot) { |
|
274 | 1 | $dot = !$dot; |
|
0 ignored issues
–
show
|
|||
275 | 1 | $last->token .= $data; |
|
276 | |||
277 | 1 | continue; |
|
278 | } |
||
279 | |||
280 | 25 | $mixed[] = $last = new Token($data, $_type); |
|
281 | } else { |
||
282 | 24 | $_next = $queue->next(); |
|
283 | |||
284 | 24 | if ($end[$type] === $data && $_next) { |
|
285 | 24 | $_nextToken = $_next[1] ?? $_next; |
|
286 | |||
287 | 24 | if ($_nextToken !== '}') { |
|
288 | 1 | $mixed[] = $last = new Token($data, $_type); |
|
289 | } |
||
290 | } |
||
291 | } |
||
292 | } |
||
293 | |||
294 | 32 | $lastChar = $data; |
|
295 | 32 | $iterate++; |
|
296 | } |
||
297 | |||
298 | // set literal & cleanup literals |
||
299 | 19 | $storage[self::LITERAL] = $this->literals; |
|
300 | 19 | $this->literals = []; |
|
301 | |||
302 | 19 | return $storage; |
|
303 | } |
||
304 | |||
305 | /** |
||
306 | * @param array $matches |
||
307 | * |
||
308 | * @return string |
||
309 | */ |
||
310 | 1 | protected function literal(array $matches): string |
|
311 | { |
||
312 | // hash from matches |
||
313 | 1 | $hash = '[!' . __FUNCTION__ . '::read(' . \crc32($matches[1]) . ')!]'; |
|
314 | |||
315 | // save hash and value to literals array |
||
316 | 1 | $this->literals[$hash] = $matches[1]; |
|
317 | |||
318 | // return hash value for replace |
||
319 | 1 | return $hash; |
|
320 | } |
||
321 | |||
322 | /** |
||
323 | * @param string $source |
||
324 | * |
||
325 | * @return array |
||
326 | * @deprecated use fragments |
||
327 | */ |
||
328 | public function tokens(&$source): array |
||
329 | { |
||
330 | $source = $this->filter($source); |
||
331 | return $this->lexemes($source); |
||
332 | } |
||
333 | |||
334 | /** |
||
335 | * @param string $source |
||
336 | * @return string |
||
337 | */ |
||
338 | 34 | public function filter(string $source): string |
|
339 | { |
||
340 | // literal from source to array |
||
341 | 34 | $filter = \preg_replace_callback( |
|
342 | 34 | "~{$this->openLiteralRegExp}(\X*?){$this->closeLiteralRegExp}~u", |
|
343 | 34 | [$this, 'literal'], |
|
344 | 34 | $source |
|
345 | ); |
||
346 | |||
347 | // if check literal open then throw |
||
348 | 34 | if (\preg_match("~{$this->openLiteralRegExp}~u", $filter)) { |
|
349 | 1 | throw new Exceptions\Logic('Literal isn\'t closed'); |
|
350 | } |
||
351 | |||
352 | // if check literal close then throw |
||
353 | 33 | if (\preg_match("~{$this->closeLiteralRegExp}~u", $filter)) { |
|
354 | 1 | throw new Exceptions\Logic('Literal isn\'t open'); |
|
355 | } |
||
356 | |||
357 | // remove comments |
||
358 | 32 | $filter = \preg_replace('~\{(?<q>\*)\X*?(\k<q>)\}~u', '', $filter); |
|
359 | |||
360 | 32 | return \strtr($filter, $this->phpTags); // remove php tags |
|
361 | } |
||
362 | |||
363 | /** |
||
364 | * @param string $source |
||
365 | * @return array |
||
366 | */ |
||
367 | 32 | public function lexemes(string $source): array |
|
368 | { |
||
369 | 32 | $lexCode = \preg_replace('~("|\'|#|\/{2}|\/\*)~u', '?>$1<?php ', $source); |
|
370 | |||
371 | // analysis tokens |
||
372 | 32 | return $this->analysis( |
|
373 | // source progress with helped tokenizer |
||
374 | 32 | \token_get_all('<?php' . PHP_EOL . $lexCode) |
|
375 | ); |
||
376 | } |
||
377 | |||
378 | /** |
||
379 | * @param string $source |
||
380 | * @return LexerObject |
||
381 | */ |
||
382 | 34 | public function lexerObject(string $source): LexerObject |
|
383 | { |
||
384 | 34 | $template = $this->filter($source); |
|
385 | 32 | $lexemes = $this->lexemes($template); |
|
386 | 19 | return new LexerObject($source, $template, $lexemes); |
|
387 | } |
||
388 | |||
389 | } |
||
390 |