bavix /
lexer
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Bavix\Lexer; |
||
| 4 | |||
| 5 | use Bavix\Exceptions; |
||
| 6 | |||
| 7 | class Lexer |
||
| 8 | { |
||
| 9 | |||
| 10 | const RAW = 1; |
||
| 11 | const OPERATOR = 2; |
||
| 12 | const PRINTER = 4; |
||
| 13 | const LITERAL = 8; |
||
| 14 | |||
| 15 | /** |
||
| 16 | * @var string |
||
| 17 | */ |
||
| 18 | protected $openLiteralRegExp = "\{%[ \t\n\r \v]*literal[ \t\n\r \v]*%\}"; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * @var string |
||
| 22 | */ |
||
| 23 | protected $closeLiteralRegExp = "\{%[ \t\n\r \v]*endliteral[ \t\n\r \v]*%\}"; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * @var array |
||
| 27 | */ |
||
| 28 | protected $literals = []; |
||
| 29 | |||
| 30 | /** |
||
| 31 | * @var array |
||
| 32 | */ |
||
| 33 | protected $prints = [ |
||
| 34 | self::OPERATOR => false, |
||
| 35 | self::RAW => true, |
||
| 36 | self::PRINTER => true, |
||
| 37 | ]; |
||
| 38 | |||
| 39 | /** |
||
| 40 | * @var array |
||
| 41 | */ |
||
| 42 | protected $escaping = [ |
||
| 43 | self::OPERATOR => false, |
||
| 44 | self::RAW => false, |
||
| 45 | self::PRINTER => true, |
||
| 46 | ]; |
||
| 47 | |||
| 48 | /** |
||
| 49 | * @var array |
||
| 50 | */ |
||
| 51 | protected $phpTags = [ |
||
| 52 | '<?php' => '<!--', |
||
| 53 | '<?=' => '<!--', |
||
| 54 | '<?' => '<!--', |
||
| 55 | '?>' => '-->', |
||
| 56 | ]; |
||
| 57 | |||
| 58 | /** |
||
| 59 | * @param $last |
||
| 60 | * @param $data |
||
| 61 | * @param string $equal |
||
| 62 | * @return bool |
||
| 63 | */ |
||
| 64 | 25 | protected function last($last, $data, $equal = '.'): bool |
|
| 65 | { |
||
| 66 | return |
||
| 67 | // last exists |
||
| 68 | 25 | $last && |
|
| 69 | |||
| 70 | // if exists then type is string? |
||
| 71 | 25 | in_array($last->type, [\T_STRING, \T_VARIABLE], true) && |
|
| 72 | |||
| 73 | // if type is string then data is '('? |
||
| 74 | 25 | $data === $equal && |
|
| 75 | |||
| 76 | // if true then token is variable ? |
||
| 77 | 25 | preg_match('~[a-z_]+~i', $last->token); |
|
| 78 | } |
||
| 79 | |||
| 80 | /** |
||
| 81 | * @param array $tokens |
||
| 82 | * @return array |
||
| 83 | */ |
||
| 84 | 32 | protected function analysis(array $tokens): array |
|
| 85 | { |
||
| 86 | 32 | $queue = new Queue($tokens); |
|
| 87 | 32 | $queue->pop(); // remove open <?php |
|
| 88 | |||
| 89 | $open = [ |
||
| 90 | // open |
||
| 91 | 32 | '{!' => self::RAW, |
|
| 92 | 32 | '{%' => self::OPERATOR, |
|
| 93 | 32 | '{{' => self::PRINTER, |
|
| 94 | ]; |
||
| 95 | |||
| 96 | $close = [ |
||
| 97 | // close |
||
| 98 | 32 | '!}' => self::RAW, |
|
| 99 | 32 | '%}' => self::OPERATOR, |
|
| 100 | 32 | '}}' => self::PRINTER, |
|
| 101 | ]; |
||
| 102 | |||
| 103 | 32 | $begin = array_flip($open); |
|
| 104 | |||
| 105 | $end = [ |
||
| 106 | 32 | self::RAW => '!', |
|
| 107 | 32 | self::OPERATOR => '%', |
|
| 108 | 32 | self::PRINTER => '}', |
|
| 109 | ]; |
||
| 110 | |||
| 111 | $storage = [ |
||
| 112 | 32 | self::RAW => [], |
|
| 113 | 32 | self::OPERATOR => [], |
|
| 114 | 32 | self::PRINTER => [], |
|
| 115 | ]; |
||
| 116 | |||
| 117 | 32 | $isOpen = false; |
|
| 118 | 32 | $iterate = 0; |
|
| 119 | 32 | $anyType = null; |
|
| 120 | 32 | $lastChar = null; |
|
| 121 | 32 | $type = null; |
|
| 122 | 32 | $mixed = []; |
|
| 123 | 32 | $last = null; |
|
| 124 | 32 | $dot = null; |
|
| 125 | 32 | $code = ''; |
|
| 126 | 32 | $print = null; |
|
| 127 | |||
| 128 | 32 | while (!$queue->isEmpty()) { |
|
| 129 | 32 | $read = $queue->pop(); |
|
| 130 | |||
| 131 | 32 | $_type = Validator::getValue($read); |
|
| 132 | 32 | $data = $read[1] ?? $read; |
|
| 133 | |||
| 134 | 32 | if ($_type === \T_OPEN_TAG || $_type === \T_OPEN_TAG_WITH_ECHO || $_type === \T_CLOSE_TAG) { |
|
| 135 | 3 | continue; |
|
| 136 | } |
||
| 137 | |||
| 138 | 32 | if ($type && $_type === \T_INLINE_HTML) { |
|
| 139 | 3 | $lvl = 1; |
|
| 140 | 3 | $rEnd = $data; |
|
| 141 | |||
| 142 | do { |
||
| 143 | 3 | $read = $queue->pop(); |
|
| 144 | |||
| 145 | 3 | $_type = Validator::getValue($read); |
|
| 146 | 3 | $_data = $read[1] ?? $read; |
|
| 147 | |||
| 148 | 3 | if ($_type === \T_OPEN_TAG || $_type === \T_OPEN_TAG_WITH_ECHO || $_type === \T_CLOSE_TAG) { |
|
| 149 | 3 | continue; |
|
| 150 | } |
||
| 151 | |||
| 152 | 3 | if ($_type === \T_NS_SEPARATOR) { |
|
| 153 | $lvl++; |
||
| 154 | } |
||
| 155 | |||
| 156 | 3 | if ($_data === $rEnd) { |
|
| 157 | $lvl--; |
||
| 158 | } |
||
| 159 | |||
| 160 | 3 | $data .= $_data; |
|
| 161 | |||
| 162 | 3 | if ($queue->isEmpty()) { |
|
| 163 | 3 | throw new \ParseError('Error code `' . $code . $data . '`'); |
|
| 164 | } |
||
| 165 | |||
| 166 | 3 | } while ($lvl); |
|
| 167 | } |
||
| 168 | |||
| 169 | 32 | if ($_type === \T_STRING) { |
|
| 170 | 32 | $isVar = preg_match('~[a-z_]+[\w_]*~i', $data); |
|
| 171 | |||
| 172 | 32 | $_type = Validator::getType($data, $isVar ? \T_VARIABLE : \T_STRING, $type); |
|
| 173 | |||
| 174 | 32 | if (\defined($data)) { |
|
| 175 | $_type = Validator::T_CONSTANT; |
||
| 176 | } |
||
| 177 | |||
| 178 | 32 | if ($isVar && !empty($mixed)) { |
|
| 179 | 3 | $mix = current($mixed); |
|
| 180 | |||
| 181 | 3 | if ($mix->type === \T_FOR && $data === 'in') { |
|
| 182 | 1 | $_type = Validator::get('T_FOR_IN'); |
|
| 183 | } |
||
| 184 | } |
||
| 185 | } |
||
| 186 | |||
| 187 | // $i++, --$i, $i += 1, $i.=1... |
||
| 188 | 32 | $print = $print && !in_array($_type, [ |
|
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 189 | 30 | \T_INC, // i++, ++i |
|
| 190 | \T_DEC, // i--, --i |
||
| 191 | \T_PLUS_EQUAL, // i+=1 |
||
| 192 | \T_MINUS_EQUAL, // i-=1 |
||
| 193 | \T_MUL_EQUAL, // i*=1 |
||
| 194 | \T_DIV_EQUAL, // i/=1 |
||
| 195 | \T_CONCAT_EQUAL, // i.=1 |
||
| 196 | \T_SR_EQUAL, // i >>= 1 |
||
| 197 | \T_SL_EQUAL, // i <<= 1 |
||
| 198 | \T_XOR_EQUAL, // i^=1 |
||
| 199 | \T_OR_EQUAL, // i|=1 |
||
| 200 | \T_AND_EQUAL, // i&=1 |
||
| 201 | \T_MOD_EQUAL, // i%=1 |
||
| 202 | 32 | ], true); |
|
| 203 | |||
| 204 | 32 | $code .= $data; |
|
| 205 | |||
| 206 | 32 | if ($dot && $anyType === \T_WHITESPACE) { |
|
| 207 | 1 | throw new Exceptions\Runtime('Undefined dot `' . implode(' ', $mixed) . ' ' . $data . '`'); |
|
| 208 | } |
||
| 209 | |||
| 210 | 32 | if ($_type === \T_WHITESPACE) { |
|
| 211 | 29 | $lastChar = $data; |
|
| 212 | 29 | $anyType = $_type; |
|
| 213 | 29 | continue; |
|
| 214 | } |
||
| 215 | |||
| 216 | 32 | $anyType = $_type; |
|
| 217 | |||
| 218 | 32 | if (!$type && $data === '{' && $code !== '{{') { |
|
| 219 | 31 | $code = $data; |
|
| 220 | } |
||
| 221 | |||
| 222 | 32 | $index = $lastChar . $data; |
|
| 223 | |||
| 224 | 32 | if ((!$isOpen && isset($open[$index]) && $type) || (isset($close[$index]) && !$type)) { |
|
| 225 | 3 | throw new Exceptions\Logic('Syntax error `' . $lastChar . $data . '`'); |
|
| 226 | } |
||
| 227 | |||
| 228 | 32 | if (!$isOpen && isset($open[$index])) { |
|
| 229 | 31 | if ($dot) { |
|
| 230 | throw new Exceptions\Runtime('Undefined dot'); |
||
| 231 | } |
||
| 232 | |||
| 233 | 31 | $isOpen = true; |
|
| 234 | 31 | $type = $open[$lastChar . $data]; |
|
| 235 | 31 | $print = $this->prints[$type]; |
|
| 236 | 32 | } else if (isset($close[$index])) { |
|
| 237 | 27 | if ($dot) { |
|
| 238 | throw new Exceptions\Runtime('Undefined dot `' . \implode(' ', $mixed) . '`'); |
||
| 239 | } |
||
| 240 | |||
| 241 | 27 | if ($type !== $close[$lastChar . $data]) { |
|
| 242 | 3 | throw new Exceptions\Runtime( |
|
| 243 | 3 | 'Undefined syntax code `' . $begin[$type] . ' ' . \implode(' ', $mixed) . $data . '`'); |
|
| 244 | } |
||
| 245 | |||
| 246 | 24 | if (empty($mixed)) { |
|
| 247 | 3 | throw new Exceptions\Blank('Empty tokens `' . $code . '`'); |
|
| 248 | } |
||
| 249 | |||
| 250 | 21 | $token = current($mixed); |
|
| 251 | 21 | $name = $token->name; |
|
| 252 | 21 | $fragment = \preg_replace('~[ \t\n\r\v]{2,}~', ' ', $code); |
|
| 253 | |||
| 254 | 21 | $storage[$type][] = [ |
|
| 255 | 21 | 'type' => $type, |
|
| 256 | 21 | 'print' => $print, |
|
| 257 | 21 | 'escape' => $this->escaping[$type], |
|
| 258 | 21 | 'name' => $name, |
|
| 259 | 21 | 'code' => $code, |
|
| 260 | 21 | 'fragment' => \trim(\mb_substr($fragment, 2, -2)), |
|
| 261 | 21 | 'tokens' => $mixed |
|
| 262 | ]; |
||
| 263 | |||
| 264 | 21 | $isOpen = false; |
|
| 265 | 21 | $mixed = []; |
|
| 266 | 21 | $type = null; |
|
| 267 | 21 | $last = null; |
|
| 268 | 21 | $code = ''; |
|
| 269 | 32 | } else if ($type) { |
|
| 270 | 28 | if ($end[$type] !== $data) { |
|
| 271 | 25 | if ($this->last($last, $data, '(')) { |
|
| 272 | $last->type = \T_FUNCTION; |
||
| 273 | 25 | } else if ($this->last($last, $data, '.') || $dot) { |
|
| 274 | 1 | $dot = !$dot; |
|
|
0 ignored issues
–
show
|
|||
| 275 | 1 | $last->token .= $data; |
|
| 276 | |||
| 277 | 1 | continue; |
|
| 278 | } |
||
| 279 | |||
| 280 | 25 | $mixed[] = $last = new Token($data, $_type); |
|
| 281 | } else { |
||
| 282 | 24 | $_next = $queue->next(); |
|
| 283 | |||
| 284 | 24 | if ($end[$type] === $data && $_next) { |
|
| 285 | 24 | $_nextToken = $_next[1] ?? $_next; |
|
| 286 | |||
| 287 | 24 | if ($_nextToken !== '}') { |
|
| 288 | 1 | $mixed[] = $last = new Token($data, $_type); |
|
| 289 | } |
||
| 290 | } |
||
| 291 | } |
||
| 292 | } |
||
| 293 | |||
| 294 | 32 | $lastChar = $data; |
|
| 295 | 32 | $iterate++; |
|
| 296 | } |
||
| 297 | |||
| 298 | // set literal & cleanup literals |
||
| 299 | 19 | $storage[self::LITERAL] = $this->literals; |
|
| 300 | 19 | $this->literals = []; |
|
| 301 | |||
| 302 | 19 | return $storage; |
|
| 303 | } |
||
| 304 | |||
| 305 | /** |
||
| 306 | * @param array $matches |
||
| 307 | * |
||
| 308 | * @return string |
||
| 309 | */ |
||
| 310 | 1 | protected function literal(array $matches): string |
|
| 311 | { |
||
| 312 | // hash from matches |
||
| 313 | 1 | $hash = '[!' . __FUNCTION__ . '::read(' . \crc32($matches[1]) . ')!]'; |
|
| 314 | |||
| 315 | // save hash and value to literals array |
||
| 316 | 1 | $this->literals[$hash] = $matches[1]; |
|
| 317 | |||
| 318 | // return hash value for replace |
||
| 319 | 1 | return $hash; |
|
| 320 | } |
||
| 321 | |||
| 322 | /** |
||
| 323 | * @param string $source |
||
| 324 | * |
||
| 325 | * @return array |
||
| 326 | * @deprecated use fragments |
||
| 327 | */ |
||
| 328 | public function tokens(&$source): array |
||
| 329 | { |
||
| 330 | $source = $this->filter($source); |
||
| 331 | return $this->lexemes($source); |
||
| 332 | } |
||
| 333 | |||
| 334 | /** |
||
| 335 | * @param string $source |
||
| 336 | * @return string |
||
| 337 | */ |
||
| 338 | 34 | public function filter(string $source): string |
|
| 339 | { |
||
| 340 | // literal from source to array |
||
| 341 | 34 | $filter = \preg_replace_callback( |
|
| 342 | 34 | "~{$this->openLiteralRegExp}(\X*?){$this->closeLiteralRegExp}~u", |
|
| 343 | 34 | [$this, 'literal'], |
|
| 344 | 34 | $source |
|
| 345 | ); |
||
| 346 | |||
| 347 | // if check literal open then throw |
||
| 348 | 34 | if (\preg_match("~{$this->openLiteralRegExp}~u", $filter)) { |
|
| 349 | 1 | throw new Exceptions\Logic('Literal isn\'t closed'); |
|
| 350 | } |
||
| 351 | |||
| 352 | // if check literal close then throw |
||
| 353 | 33 | if (\preg_match("~{$this->closeLiteralRegExp}~u", $filter)) { |
|
| 354 | 1 | throw new Exceptions\Logic('Literal isn\'t open'); |
|
| 355 | } |
||
| 356 | |||
| 357 | // remove comments |
||
| 358 | 32 | $filter = \preg_replace('~\{(?<q>\*)\X*?(\k<q>)\}~u', '', $filter); |
|
| 359 | |||
| 360 | 32 | return \strtr($filter, $this->phpTags); // remove php tags |
|
| 361 | } |
||
| 362 | |||
| 363 | /** |
||
| 364 | * @param string $source |
||
| 365 | * @return array |
||
| 366 | */ |
||
| 367 | 32 | public function lexemes(string $source): array |
|
| 368 | { |
||
| 369 | 32 | $lexCode = \preg_replace('~("|\'|#|\/{2}|\/\*)~u', '?>$1<?php ', $source); |
|
| 370 | |||
| 371 | // analysis tokens |
||
| 372 | 32 | return $this->analysis( |
|
| 373 | // source progress with helped tokenizer |
||
| 374 | 32 | \token_get_all('<?php' . PHP_EOL . $lexCode) |
|
| 375 | ); |
||
| 376 | } |
||
| 377 | |||
| 378 | /** |
||
| 379 | * @param string $source |
||
| 380 | * @return LexerObject |
||
| 381 | */ |
||
| 382 | 34 | public function lexerObject(string $source): LexerObject |
|
| 383 | { |
||
| 384 | 34 | $template = $this->filter($source); |
|
| 385 | 32 | $lexemes = $this->lexemes($template); |
|
| 386 | 19 | return new LexerObject($source, $template, $lexemes); |
|
| 387 | } |
||
| 388 | |||
| 389 | } |
||
| 390 |