1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @author: Viskov Sergey |
4
|
|
|
* @date: 3/10/16 |
5
|
|
|
* @time: 1:33 PM |
6
|
|
|
*/ |
7
|
|
|
|
8
|
|
|
namespace LTDBeget\sphinx; |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
use BadMethodCallException; |
12
|
|
|
use LTDBeget\ascii\AsciiChar; |
13
|
|
|
use LTDBeget\stringstream\StringStream; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Class SphinxConfigurationParser |
17
|
|
|
* @package LTDBeget\sphinx\configurator\parser |
18
|
|
|
*/ |
19
|
|
|
final class Tokenizer |
20
|
|
|
{ |
21
|
|
|
/** |
22
|
|
|
* parse and tokenize input string |
23
|
|
|
* @param string $plainData |
24
|
|
|
* @throws SyntaxErrorException |
25
|
|
|
* @throws BadMethodCallException |
26
|
|
|
* @return array |
27
|
|
|
* @throws \Hoa\Ustring\Exception |
28
|
9 |
|
* @throws \InvalidArgumentException |
29
|
|
|
* @throws \LogicException |
30
|
9 |
|
*/ |
31
|
|
|
public static function tokenize(string $plainData) : array |
32
|
|
|
{ |
33
|
|
|
$tokens = []; |
34
|
|
|
|
35
|
|
|
if(! empty($plainData)) { |
36
|
|
|
$tokens = (new self($plainData))->tokenizeInternal()->tokens; |
37
|
|
|
} |
38
|
|
|
|
39
|
9 |
|
return $tokens; |
40
|
|
|
} |
41
|
9 |
|
|
42
|
9 |
|
/** |
43
|
9 |
|
* SphinxConfigurationParser constructor. |
44
|
|
|
* @internal |
45
|
|
|
* @param string $string |
46
|
|
|
* @throws BadMethodCallException |
47
|
|
|
* @throws \Hoa\Ustring\Exception |
48
|
|
|
*/ |
49
|
|
|
private function __construct(string $string) |
50
|
9 |
|
{ |
51
|
|
|
$string = $this->removeComments($string); |
52
|
9 |
|
$this->stream = new StringStream($string); |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @internal |
57
|
|
|
* @param string $string |
58
|
|
|
* @return string |
59
|
|
|
*/ |
60
|
9 |
|
private function removeComments(string $string) : string |
61
|
|
|
{ |
62
|
|
|
return preg_replace("/(^#| #| #).*\n/im", "\n", $string); |
63
|
9 |
|
} |
64
|
2 |
|
|
65
|
|
|
/** |
66
|
2 |
|
* @internal |
67
|
|
|
* @return Tokenizer |
68
|
2 |
|
* @throws \LogicException |
69
|
|
|
* @throws \InvalidArgumentException |
70
|
|
|
* @throws SyntaxErrorException |
71
|
|
|
*/ |
72
|
|
|
private function tokenizeInternal() : Tokenizer |
73
|
|
|
{ |
74
|
|
|
do { |
75
|
9 |
|
$this->extractSection(); |
76
|
|
|
$this->saveCurrentSection(); |
77
|
9 |
|
|
78
|
|
|
} while (!$this->stream->isEnd()); |
79
|
8 |
|
|
80
|
|
|
return $this; |
81
|
|
|
} |
82
|
8 |
|
|
83
|
|
|
/** |
84
|
8 |
|
* @internal |
85
|
7 |
|
* @throws SyntaxErrorException |
86
|
|
|
* @throws \InvalidArgumentException |
87
|
|
|
* @throws \LogicException |
88
|
|
|
*/ |
89
|
2 |
|
private function extractSection() |
90
|
|
|
{ |
91
|
|
|
$this->stream->ignoreWhitespace(); |
92
|
|
|
|
93
|
|
|
$this->extractSectionType(); |
94
|
7 |
|
|
95
|
|
|
switch ($this->currentSection['type']) { |
96
|
|
|
case 'source': |
97
|
2 |
|
case 'index': |
98
|
2 |
|
$this->stream->ignoreHorizontalSpace(); |
99
|
|
|
$this->extractSectionName(); |
100
|
|
|
|
101
|
|
|
$this->extractInheritance(); |
102
|
|
|
break; |
103
|
|
|
case 'indexer': |
104
|
9 |
|
case 'searchd': |
105
|
|
|
case 'common': |
106
|
9 |
|
break; |
107
|
|
|
default: |
108
|
9 |
|
throw new SyntaxErrorException($this->stream); |
109
|
9 |
|
} |
110
|
9 |
|
|
111
|
9 |
|
$this->stream->ignoreWhitespace(); |
112
|
9 |
|
|
113
|
9 |
|
$this->extractOptions(); |
114
|
8 |
|
|
115
|
|
|
$this->stream->ignoreWhitespace(); |
116
|
1 |
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* @internal |
120
|
|
|
* @throws SyntaxErrorException |
121
|
|
|
* @throws \InvalidArgumentException |
122
|
|
|
* @throws \LogicException |
123
|
|
|
*/ |
124
|
8 |
|
private function extractSectionType() |
125
|
|
|
{ |
126
|
8 |
|
start: |
127
|
|
|
$char = $this->stream->currentAscii(); |
128
|
|
|
if ($char->isLetter()) { |
129
|
8 |
|
$this->currentSection['type'] .= $this->stream->current(); |
130
|
8 |
|
$this->stream->next(); |
131
|
|
|
goto start; |
132
|
8 |
|
} elseif ($char->isWhiteSpace()) { |
133
|
8 |
|
return; |
134
|
8 |
|
} else { |
135
|
8 |
|
throw new SyntaxErrorException($this->stream); |
136
|
8 |
|
} |
137
|
1 |
|
} |
138
|
1 |
|
|
139
|
|
|
/** |
140
|
1 |
|
* @internal |
141
|
|
|
* @throws SyntaxErrorException |
142
|
|
|
* @throws \InvalidArgumentException |
143
|
|
|
* @throws \LogicException |
144
|
|
|
*/ |
145
|
|
View Code Duplication |
private function extractSectionName() |
|
|
|
|
146
|
|
|
{ |
147
|
|
|
start: |
148
|
|
|
$char = $this->stream->currentAscii(); |
149
|
|
|
|
150
|
8 |
|
if ($char->isLetter() || $char->isDigit() || $char->is(AsciiChar::UNDERSCORE)) { |
151
|
|
|
$this->currentSection['name'] .= $this->stream->current(); |
152
|
8 |
|
$this->stream->next(); |
153
|
|
|
goto start; |
154
|
8 |
|
} elseif ($char->isWhiteSpace() || $char->is(AsciiChar::COLON)) { |
155
|
|
|
if (empty($this->currentSection['name'])) { |
156
|
8 |
|
throw new SyntaxErrorException($this->stream); |
157
|
6 |
|
} |
158
|
|
|
|
159
|
|
|
return; |
160
|
4 |
|
} else { |
161
|
3 |
|
throw new SyntaxErrorException($this->stream); |
162
|
3 |
|
} |
163
|
|
|
} |
164
|
1 |
|
|
165
|
|
|
/** |
166
|
3 |
|
* @internal |
167
|
|
|
* @throws SyntaxErrorException |
168
|
|
|
* @throws \InvalidArgumentException |
169
|
|
|
* @throws \LogicException |
170
|
|
|
*/ |
171
|
|
|
private function extractInheritance() |
172
|
3 |
|
{ |
173
|
|
|
$this->stream->ignoreHorizontalSpace(); |
174
|
3 |
|
|
175
|
|
|
$char = $this->stream->currentAscii(); |
176
|
3 |
|
|
177
|
3 |
|
if ($char->isVerticalSpace() || $char->is(AsciiChar::OPENING_BRACE())) { |
178
|
|
|
return; |
179
|
3 |
|
} |
180
|
3 |
|
|
181
|
3 |
|
if ($char->is(AsciiChar::COLON())) { |
182
|
3 |
|
$this->stream->next(); |
183
|
3 |
|
$this->stream->ignoreHorizontalSpace(); |
184
|
|
|
$this->extractInheritanceName(); |
185
|
|
|
} else { |
186
|
|
|
throw new SyntaxErrorException($this->stream); |
187
|
|
|
} |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
/** |
191
|
|
|
* @internal |
192
|
|
|
* @throws SyntaxErrorException |
193
|
7 |
|
* @throws \InvalidArgumentException |
194
|
|
|
* @throws \LogicException |
195
|
7 |
|
*/ |
196
|
|
View Code Duplication |
private function extractInheritanceName() |
|
|
|
|
197
|
7 |
|
{ |
198
|
6 |
|
start: |
199
|
|
|
$char = $this->stream->currentAscii(); |
200
|
|
|
|
201
|
6 |
|
if ($char->isLetter() || $char->isDigit() || $char->is(AsciiChar::UNDERSCORE)) { |
202
|
|
|
$this->currentSection['inheritance'] .= $this->stream->current(); |
203
|
6 |
|
$this->stream->next(); |
204
|
1 |
|
goto start; |
205
|
|
|
} elseif ($char->isWhiteSpace()) { |
206
|
|
|
if (empty($this->currentSection['inheritance'])) { |
207
|
6 |
|
throw new SyntaxErrorException($this->stream); |
208
|
2 |
|
} |
209
|
|
|
|
210
|
2 |
|
return; |
211
|
|
|
} else { |
212
|
6 |
|
throw new SyntaxErrorException($this->stream); |
213
|
5 |
|
} |
214
|
|
|
} |
215
|
1 |
|
|
216
|
|
|
/** |
217
|
|
|
* @internal |
218
|
|
|
* @throws SyntaxErrorException |
219
|
|
|
* @throws \LogicException |
220
|
|
|
* @throws \InvalidArgumentException |
221
|
|
|
*/ |
222
|
|
|
private function extractOptions() |
223
|
6 |
|
{ |
224
|
|
|
if ($this->stream->currentAscii()->is(AsciiChar::OPENING_BRACE)) { |
225
|
6 |
|
$this->stream->next(); |
226
|
6 |
|
|
227
|
5 |
|
start: |
228
|
5 |
|
$this->stream->ignoreWhitespace(); |
229
|
|
|
|
230
|
|
|
if($this->stream->isEnd()) { |
231
|
|
|
throw new SyntaxErrorException($this->stream); |
232
|
|
|
} |
233
|
|
|
|
234
|
6 |
|
if ($this->stream->currentAscii()->is(AsciiChar::CLOSING_BRACE)) { |
235
|
|
|
$this->stream->next(); |
236
|
6 |
|
|
237
|
|
|
return; |
238
|
|
|
} |
239
|
6 |
|
$this->extractOption(); |
240
|
6 |
|
goto start; |
241
|
|
|
} else { |
242
|
6 |
|
throw new SyntaxErrorException($this->stream); |
243
|
6 |
|
} |
244
|
6 |
|
} |
245
|
6 |
|
|
246
|
6 |
|
/** |
247
|
|
|
* @internal |
248
|
2 |
|
* @throws SyntaxErrorException |
249
|
|
|
* @throws \InvalidArgumentException |
250
|
|
|
* @throws \LogicException |
251
|
|
|
*/ |
252
|
|
|
private function extractOption() |
253
|
|
|
{ |
254
|
|
|
$this->extractOptionName(); |
255
|
|
|
$this->stream->ignoreHorizontalSpace(); |
256
|
6 |
|
|
257
|
|
|
if (!$this->stream->currentAscii()->is(AsciiChar::EQUALS)) { |
258
|
6 |
|
throw new SyntaxErrorException($this->stream); |
259
|
|
|
} |
260
|
6 |
|
|
261
|
6 |
|
$this->stream->next(); |
262
|
|
|
$this->stream->ignoreHorizontalSpace(); |
263
|
6 |
|
|
264
|
1 |
|
$this->extractOptionValue(); |
265
|
|
|
$this->saveCurrentOption(); |
266
|
|
|
} |
267
|
5 |
|
|
268
|
|
|
/** |
269
|
|
|
* @internal |
270
|
5 |
|
* @throws SyntaxErrorException |
271
|
5 |
|
* @throws \InvalidArgumentException |
272
|
|
|
* @throws \LogicException |
273
|
5 |
|
*/ |
274
|
|
|
private function extractOptionName() |
275
|
|
|
{ |
276
|
|
|
start: |
277
|
5 |
|
$char = $this->stream->currentAscii(); |
278
|
|
|
|
279
|
5 |
|
if ($char->isLetter() || $char->isDigit() || $char->is(AsciiChar::UNDERSCORE)) { |
280
|
5 |
|
$this->currentOption['name'] .= $this->stream->current(); |
281
|
|
|
$this->stream->next(); |
282
|
5 |
|
goto start; |
283
|
5 |
|
} elseif ($char->isHorizontalSpace()) { |
284
|
5 |
|
return; |
285
|
5 |
|
} else { |
286
|
5 |
|
throw new SyntaxErrorException($this->stream); |
287
|
|
|
} |
288
|
1 |
|
} |
289
|
1 |
|
|
290
|
|
|
/** |
291
|
|
|
* @internal |
292
|
5 |
|
* @throws SyntaxErrorException |
293
|
5 |
|
* @throws \LogicException |
294
|
|
|
* @throws \InvalidArgumentException |
295
|
5 |
|
*/ |
296
|
5 |
|
private function extractOptionValue() |
297
|
|
|
{ |
298
|
|
|
start: |
299
|
|
|
$char = $this->stream->currentAscii(); |
300
|
|
|
|
301
|
|
|
if($this->stream->isEnd()) { |
302
|
|
|
throw new SyntaxErrorException($this->stream); |
303
|
|
|
} |
304
|
|
|
|
305
|
2 |
|
if ($char->isPrintableChar() || $char->isHorizontalSpace()) { |
306
|
|
|
|
307
|
2 |
|
if ($char->is(AsciiChar::BACKSLASH)) { // if possibility of multi-line |
308
|
2 |
|
$this->stream->next(); |
309
|
2 |
|
|
310
|
2 |
|
if ($this->stream->currentAscii()->isVerticalSpace()) { // multi-line opened |
311
|
|
|
$this->currentOption['value'] .= chr(AsciiChar::BACKSLASH); |
312
|
|
|
$this->currentOption['value'] .= $this->stream->current(); |
313
|
|
|
$this->stream->next(); |
314
|
|
|
goto start; |
315
|
5 |
|
} else { // backslash as mean symbol |
316
|
|
|
$this->currentOption['value'] .= chr(AsciiChar::BACKSLASH); |
317
|
5 |
|
goto start; |
318
|
5 |
|
} |
319
|
5 |
|
} else { |
320
|
|
|
$this->currentOption['value'] .= $this->stream->current(); |
321
|
|
|
$this->stream->next(); |
322
|
|
|
goto start; |
323
|
|
|
} |
324
|
|
|
} elseif ($char->isVerticalSpace()) { |
325
|
2 |
|
return; |
326
|
|
|
} else { |
327
|
|
|
throw new SyntaxErrorException($this->stream); |
328
|
2 |
|
} |
329
|
|
|
} |
330
|
|
|
|
331
|
|
|
/** |
332
|
|
|
* @internal |
333
|
|
|
*/ |
334
|
|
|
private function saveCurrentSection() |
335
|
|
|
{ |
336
|
|
|
$this->currentSection = array_filter($this->currentSection); |
337
|
|
|
$this->tokens[] = $this->currentSection; |
338
|
|
|
$this->currentSection = $this->getEmptySectionData(); |
339
|
5 |
|
} |
340
|
|
|
|
341
|
|
|
/** |
342
|
5 |
|
* @internal |
343
|
|
|
*/ |
344
|
|
|
private function saveCurrentOption() |
345
|
|
|
{ |
346
|
|
|
$this->currentSection['options'][] = $this->currentOption; |
347
|
|
|
$this->currentOption = $this->getEmptyOptionData(); |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
/** |
351
|
|
|
* @internal |
352
|
|
|
* @return array |
353
|
|
|
*/ |
354
|
|
|
private function getEmptySectionData() : array |
355
|
|
|
{ |
356
|
|
|
return [ |
357
|
|
|
'type' => '', |
358
|
|
|
'name' => '', |
359
|
|
|
'inheritance' => '', |
360
|
|
|
'options' => [] |
361
|
|
|
]; |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
/** |
365
|
|
|
* @internal |
366
|
|
|
* @return array |
367
|
|
|
*/ |
368
|
|
|
private function getEmptyOptionData() : array |
369
|
|
|
{ |
370
|
|
|
return [ |
371
|
|
|
'name' => '', |
372
|
|
|
'value' => '' |
373
|
|
|
]; |
374
|
|
|
} |
375
|
|
|
|
376
|
|
|
/** |
377
|
|
|
* @var StringStream |
378
|
|
|
*/ |
379
|
|
|
private $stream; |
380
|
|
|
|
381
|
|
|
/** |
382
|
|
|
* Result of tokenize input string |
383
|
|
|
* @var array |
384
|
|
|
*/ |
385
|
|
|
private $tokens = []; |
386
|
|
|
|
387
|
|
|
/** |
388
|
|
|
* temporary storage of tokens for one section |
389
|
|
|
* @var array |
390
|
|
|
*/ |
391
|
|
|
private $currentSection = [ |
392
|
|
|
'type' => '', |
393
|
|
|
'name' => '', |
394
|
|
|
'inheritance' => '', |
395
|
|
|
'options' => [] |
396
|
|
|
]; |
397
|
|
|
/** |
398
|
|
|
* temporary storage of tokens for one option |
399
|
|
|
* @var array |
400
|
|
|
*/ |
401
|
|
|
private $currentOption = [ |
402
|
|
|
'name' => '', |
403
|
|
|
'value' => '' |
404
|
|
|
]; |
405
|
|
|
} |
406
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.