| 1 | <?php |
||
| 32 | abstract class Language |
||
| 33 | { |
||
| 34 | /** |
||
| 35 | * Tokenizer rules |
||
| 36 | * |
||
| 37 | * @var Rule[] |
||
| 38 | */ |
||
| 39 | private $_rules; |
||
| 40 | |||
| 41 | /** |
||
| 42 | * @var array |
||
| 43 | */ |
||
| 44 | protected $_options = []; |
||
| 45 | |||
| 46 | /** |
||
| 47 | * Language constructor. |
||
| 48 | * |
||
| 49 | * @param array $options |
||
| 50 | */ |
||
| 51 | 22 | public function __construct(array $options = []) { |
|
| 58 | |||
| 59 | /** |
||
| 60 | * Tokenization rules definition |
||
| 61 | * |
||
| 62 | * @return array |
||
| 63 | */ |
||
| 64 | public abstract function getRules(); |
||
| 65 | |||
| 66 | /** |
||
| 67 | * Parses source and removes wrong tokens. |
||
| 68 | * |
||
| 69 | * @param TokenIterator|string $tokens |
||
| 70 | * |
||
| 71 | * @param array $additional |
||
| 72 | * @param bool $embedded |
||
| 73 | * |
||
| 74 | * @return TokenIterator |
||
| 75 | */ |
||
| 76 | 12 | public function parse($tokens = null, $additional = [], $embedded = false) |
|
| 77 | { |
||
| 78 | 12 | if (is_string($tokens)) { |
|
| 79 | 11 | $tokens = $this->tokenize($tokens, $additional, $embedded); |
|
|
1 ignored issue
–
show
|
|||
| 80 | 12 | } elseif(!$tokens instanceof TokenIterator) { |
|
| 81 | // Todo: Own Exceptions |
||
| 82 | 1 | throw new \InvalidArgumentException('$tokens must be string or TokenIterator'); |
|
| 83 | } |
||
| 84 | |||
| 85 | 11 | $start = $tokens->current(); |
|
| 86 | |||
| 87 | |||
| 88 | 11 | $context = []; |
|
| 89 | |||
| 90 | 11 | /** @var Token[] $result */ $result = [$start]; |
|
| 91 | 11 | /** @var Token[] $all */ $all = []; |
|
| 92 | |||
| 93 | /** @var Token $token */ |
||
| 94 | 11 | for($tokens->next(); $tokens->valid(); $tokens->next()) { |
|
| 95 | 11 | $token = $tokens->current(); |
|
| 96 | |||
| 97 | 11 | if (!$token->isValid($this, $context)) { |
|
| 98 | 3 | continue; |
|
| 99 | } |
||
| 100 | |||
| 101 | 11 | if ($token->isStart()) { |
|
| 102 | 10 | if ($token instanceof LanguageToken) { |
|
| 103 | /** @var LanguageToken $token */ |
||
| 104 | 2 | $result = array_merge( |
|
| 105 | 2 | $result, |
|
| 106 | 2 | $token->getInjected()->parse($tokens)->getTokens() |
|
| 107 | 2 | ); |
|
| 108 | 2 | } else { |
|
| 109 | 10 | $all[spl_object_hash($token)] = $result[] = $token; |
|
| 110 | 10 | $context[spl_object_hash($token)] = $token->name; |
|
| 111 | } |
||
| 112 | 10 | } else { |
|
| 113 | 11 | $start = $token->getStart(); |
|
| 114 | |||
| 115 | /** @noinspection PhpUndefinedMethodInspection bug */ |
||
| 116 | 11 | if ($token instanceof LanguageToken && $token->getLanguage() === $this) { |
|
| 117 | 11 | $result[0]->setEnd($token); |
|
| 118 | |||
| 119 | 11 | if($result[0]->getRule()->postProcess) { |
|
| 120 | 1 | $source = substr($tokens->getSource(), $result[0]->pos, $result[0]->getLength()); |
|
| 121 | |||
| 122 | 1 | $tokens = $this->tokenize($source, $result, $result[0]->pos, true); |
|
| 123 | 1 | $result = $this->parse($tokens)->getTokens(); |
|
| 124 | 1 | } |
|
| 125 | |||
| 126 | # closing unclosed tokens |
||
| 127 | 11 | foreach(array_reverse($context) as $hash => $name) { |
|
| 128 | 1 | $end = new Token([$name, 'pos' => $token->pos]); |
|
| 129 | 1 | $all[$hash]->setEnd($end); |
|
| 130 | 1 | $result[] = $end; |
|
| 131 | 11 | } |
|
| 132 | |||
| 133 | 11 | $result[] = $token; |
|
| 134 | 11 | break; |
|
| 135 | } else { |
||
| 136 | 9 | if ($start) { |
|
| 137 | 8 | unset($context[spl_object_hash($start)]); |
|
| 138 | 8 | } else { |
|
| 139 | /** @noinspection PhpUnusedParameterInspection */ |
||
| 140 | 1 | $start = ArrayHelper::find(array_reverse($context), function ($k, $v) use ($token) { |
|
| 141 | 1 | return $v === $token->name; |
|
| 142 | 1 | }); |
|
| 143 | |||
| 144 | 1 | if ($start !== false) { |
|
| 145 | 1 | $token->setStart($all[$start]); |
|
| 146 | 1 | unset($context[$start]); |
|
| 147 | 1 | } |
|
| 148 | } |
||
| 149 | |||
| 150 | 9 | $result[] = $token; |
|
| 151 | } |
||
| 152 | } |
||
| 153 | 10 | } |
|
| 154 | |||
| 155 | 11 | return new TokenIterator($result, $tokens->getSource()); |
|
| 156 | } |
||
| 157 | |||
| 158 | /** |
||
| 159 | * Tokenize source |
||
| 160 | * |
||
| 161 | * @param $source |
||
| 162 | * |
||
| 163 | * @param int $offset |
||
| 164 | * @param array $additional |
||
| 165 | * |
||
| 166 | * @param bool $embedded |
||
| 167 | * |
||
| 168 | * @return TokenList |
||
| 169 | */ |
||
| 170 | 12 | private function _tokens($source, $offset = 0, $additional = [], $embedded = false) |
|
| 184 | |||
| 185 | 12 | public function tokenize($source, $additional = [], $offset = 0, $embedded = false) |
|
| 190 | |||
| 191 | /** |
||
| 192 | * @param bool $embedded |
||
| 193 | * |
||
| 194 | * @return Rule[] |
||
| 195 | */ |
||
| 196 | 12 | private function _rules($embedded = false) { |
|
| 226 | |||
| 227 | /** |
||
| 228 | * Unique language identifier, for example 'php' |
||
| 229 | * |
||
| 230 | * @return string |
||
| 231 | */ |
||
| 232 | public abstract function getIdentifier(); |
||
| 233 | |||
| 234 | /** |
||
| 235 | * Language range Rule(s) |
||
| 236 | * |
||
| 237 | * @return Rule|Rule[] |
||
| 238 | */ |
||
| 239 | 16 | public function getOpenClose() |
|
| 251 | |||
| 252 | /** |
||
| 253 | * @return Language[] |
||
| 254 | */ |
||
| 255 | 13 | public function getEmbedded() { |
|
| 258 | |||
| 259 | /** |
||
| 260 | * @param Language $lang |
||
| 261 | */ |
||
| 262 | 1 | public function embed(Language $lang) { |
|
| 265 | |||
| 266 | 1 | public function __get($name) { |
|
| 269 | |||
| 270 | 1 | public function __set($name, $value) { |
|
| 273 | } |
||
| 274 |