1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | /** |
||
6 | * Highlighter |
||
7 | * |
||
8 | * Copyright (C) 2016, Some right reserved. |
||
9 | * |
||
10 | * @author Kacper "Kadet" Donat <[email protected]> |
||
11 | * |
||
12 | * Contact with author: |
||
13 | * Xmpp: [email protected] |
||
14 | * E-mail: [email protected] |
||
15 | * |
||
16 | * From Kadet with love. |
||
17 | */ |
||
18 | |||
19 | namespace Kadet\Highlighter\Language; |
||
20 | |||
21 | use Kadet\Highlighter\Matcher\WholeMatcher; |
||
22 | use Kadet\Highlighter\Parser\Context; |
||
23 | use Kadet\Highlighter\Parser\Result; |
||
24 | use Kadet\Highlighter\Parser\Rule; |
||
25 | use Kadet\Highlighter\Parser\Rules; |
||
26 | use Kadet\Highlighter\Parser\Token\LanguageToken; |
||
27 | use Kadet\Highlighter\Parser\TokenFactory; |
||
28 | use Kadet\Highlighter\Parser\TokenIterator; |
||
29 | use Kadet\Highlighter\Parser\Tokens; |
||
30 | use Kadet\Highlighter\Parser\UnprocessedTokens; |
||
31 | use Kadet\Highlighter\Parser\Validator\Validator; |
||
32 | |||
33 | /** |
||
34 | * Greedy Language |
||
35 | * |
||
36 | * Implements greedy syntax highlighting. |
||
37 | * |
||
38 | * @package Kadet\Highlighter\Language |
||
39 | */ |
||
40 | abstract class GreedyLanguage extends Language |
||
41 | { |
||
42 | |||
43 | /** |
||
44 | * @var array |
||
45 | */ |
||
46 | protected $_options = []; |
||
47 | |||
48 | /** |
||
49 | * Tokenizer rules |
||
50 | * |
||
51 | * @var Rules |
||
52 | */ |
||
53 | public $rules; |
||
54 | |||
55 | /** |
||
56 | * Language constructor. |
||
57 | * |
||
58 | * @param array $options |
||
59 | */ |
||
60 | 28 | public function __construct(array $options = []) |
|
61 | { |
||
62 | |||
63 | 28 | $this->_options = array_merge([ |
|
64 | 28 | 'embedded' => [] |
|
65 | 28 | ], $this->_options, $options); |
|
66 | |||
67 | 28 | $this->rules = new Rules($this); |
|
68 | 28 | $this->setupRules(); |
|
69 | 28 | } |
|
70 | |||
71 | /** |
||
72 | * Tokenization rules setup |
||
73 | */ |
||
74 | abstract public function setupRules(); |
||
75 | |||
76 | /** |
||
77 | * Parses source and removes wrong tokens. |
||
78 | * |
||
79 | * @param TokenIterator|string $tokens |
||
80 | * |
||
81 | * @param array $additional |
||
82 | * @param bool $embedded |
||
83 | * |
||
84 | * @return Tokens |
||
85 | * @throws \InvalidArgumentException |
||
86 | */ |
||
87 | 37 | public function parse($tokens = null, $additional = [], $embedded = false) |
|
88 | { |
||
89 | 37 | if (is_string($tokens)) { |
|
90 | 36 | $tokens = $this->tokenize($tokens, $additional, $embedded); |
|
91 | 11 | } elseif (!$tokens instanceof TokenIterator) { |
|
92 | // Todo: Own Exceptions |
||
93 | 1 | throw new \InvalidArgumentException('$tokens must be string or TokenIterator'); |
|
94 | } |
||
95 | |||
96 | 36 | return $this->_process($tokens); |
|
97 | } |
||
98 | |||
99 | 36 | private function _process(TokenIterator $tokens) |
|
100 | { |
||
101 | 36 | $context = new Context($this); |
|
102 | 36 | $result = new Result($tokens->getSource(), $tokens->current()); |
|
103 | |||
104 | 36 | for ($tokens->next(); $tokens->valid(); $tokens->next()) { |
|
105 | 36 | if (!$tokens->current()->process($context, $this, $result, $tokens)) { |
|
106 | 36 | break; |
|
107 | } |
||
108 | } |
||
109 | |||
110 | 36 | return $result; |
|
111 | } |
||
112 | |||
113 | 37 | public function tokenize($source, $additional = [], $offset = 0, $embedded = false) |
|
114 | { |
||
115 | 37 | return new TokenIterator( |
|
116 | 37 | $this->_tokens($source, $offset, $additional, $embedded)->sort()->toArray(), |
|
117 | 37 | $source, |
|
118 | 37 | $offset |
|
119 | ); |
||
120 | } |
||
121 | |||
122 | /** |
||
123 | * Tokenize source |
||
124 | * |
||
125 | * @param $source |
||
126 | * |
||
127 | * @param int $offset |
||
128 | * @param array|\Traversable $additional |
||
129 | * |
||
130 | * @param bool $embedded |
||
131 | * |
||
132 | * @return UnprocessedTokens |
||
133 | */ |
||
134 | 37 | private function _tokens($source, $offset = 0, $additional = [], $embedded = false) |
|
135 | { |
||
136 | 37 | $result = new UnprocessedTokens(); |
|
137 | |||
138 | /** @var Language $language */ |
||
139 | 37 | foreach ($this->_rules($embedded) as $rule) { |
|
140 | 37 | foreach ($rule->match($source) as $token) { |
|
141 | 37 | $result->add($token, $offset); |
|
142 | } |
||
143 | } |
||
144 | |||
145 | 37 | return $result->batch($additional); |
|
146 | } |
||
147 | |||
148 | /** |
||
149 | * @param bool $embedded |
||
150 | * |
||
151 | * @return \Generator<Rule> |
||
152 | */ |
||
153 | 37 | private function _rules($embedded = false) |
|
154 | { |
||
155 | 37 | $rules = clone $this->rules; |
|
156 | 37 | if (is_bool($embedded)) { |
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
157 | 37 | $rules->addMany(['language.' . $this->getIdentifier() => $this->getEnds($embedded)]); |
|
158 | } |
||
159 | |||
160 | 37 | foreach ($rules->all() as $rule) { |
|
161 | 37 | yield $rule; |
|
162 | } |
||
163 | |||
164 | // todo: interface |
||
165 | 37 | foreach ($this->getEmbedded() as $language) { |
|
166 | 1 | foreach ($language instanceof GreedyLanguage ? $language->_rules(true) : $language->getEnds(true) as $rule) { |
|
167 | 1 | yield $rule; |
|
168 | } |
||
169 | } |
||
170 | 37 | } |
|
171 | |||
172 | /** |
||
173 | * Language range Rule(s) |
||
174 | * |
||
175 | * @param $embedded |
||
176 | * |
||
177 | * @return Rule|\Kadet\Highlighter\Parser\Rule[] |
||
178 | */ |
||
179 | 52 | public function getEnds($embedded = false) |
|
180 | { |
||
181 | 52 | return new Rule( |
|
182 | 52 | new WholeMatcher(), |
|
183 | [ |
||
184 | 52 | 'priority' => 10000, |
|
185 | 52 | 'factory' => new TokenFactory(LanguageToken::class), |
|
186 | 52 | 'inject' => $this, |
|
187 | 'language' => null, |
||
188 | 52 | 'context' => Validator::everywhere(), |
|
189 | ] |
||
190 | ); |
||
191 | } |
||
192 | |||
193 | /** |
||
194 | * @return Language[] |
||
195 | */ |
||
196 | 43 | public function getEmbedded() |
|
197 | { |
||
198 | 43 | return $this->_options['embedded']; |
|
199 | } |
||
200 | |||
201 | /** |
||
202 | * @param Language $lang |
||
203 | */ |
||
204 | 1 | public function embed(Language $lang) |
|
205 | { |
||
206 | 1 | $this->_options['embedded'][] = $lang; |
|
207 | 1 | } |
|
208 | |||
209 | 3 | public function __get($name) |
|
210 | { |
||
211 | 3 | return isset($this->_options[$name]) ? $this->_options[$name] : null; |
|
212 | } |
||
213 | |||
214 | 1 | public function __set($name, $value) |
|
215 | { |
||
216 | 1 | $this->_options[$name] = $value; |
|
217 | 1 | } |
|
218 | } |
||
219 |