1
|
|
|
<?php |
2
|
|
|
namespace Thunder\Shortcode\Parser; |
3
|
|
|
|
4
|
|
|
use Thunder\Shortcode\Shortcode\ParsedShortcode; |
5
|
|
|
use Thunder\Shortcode\Shortcode\Shortcode; |
6
|
|
|
use Thunder\Shortcode\Syntax\CommonSyntax; |
7
|
|
|
use Thunder\Shortcode\Syntax\SyntaxInterface; |
8
|
|
|
use Thunder\Shortcode\Utility\RegexBuilderUtility; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* @author Tomasz Kowalczyk <[email protected]> |
12
|
|
|
*/ |
13
|
|
|
final class RegularParser implements ParserInterface |
14
|
|
|
{ |
15
|
|
|
/** @var string */ |
16
|
|
|
private $lexerRegex; |
17
|
|
|
/** @var string */ |
18
|
|
|
private $nameRegex; |
19
|
|
|
/** @psalm-var list<array{0:int,1:string,2:int}> */ |
20
|
|
|
private $tokens = array(); |
21
|
|
|
/** @var int */ |
22
|
|
|
private $tokensCount = 0; |
23
|
|
|
/** @var int */ |
24
|
|
|
private $position = 0; |
25
|
|
|
/** @var int[] */ |
26
|
|
|
private $backtracks = array(); |
27
|
|
|
/** @var int */ |
28
|
|
|
private $lastBacktrack = 0; |
29
|
|
|
|
30
|
|
|
const TOKEN_OPEN = 1; |
31
|
|
|
const TOKEN_CLOSE = 2; |
32
|
|
|
const TOKEN_MARKER = 3; |
33
|
|
|
const TOKEN_SEPARATOR = 4; |
34
|
|
|
const TOKEN_DELIMITER = 5; |
35
|
|
|
const TOKEN_STRING = 6; |
36
|
|
|
const TOKEN_WS = 7; |
37
|
|
|
|
38
|
15 |
|
public function __construct(SyntaxInterface $syntax = null) |
39
|
|
|
{ |
40
|
15 |
|
$this->lexerRegex = $this->prepareLexer($syntax ?: new CommonSyntax()); |
41
|
15 |
|
$this->nameRegex = '~^'.RegexBuilderUtility::buildNameRegex().'$~us'; |
42
|
15 |
|
} |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* @param string $text |
46
|
|
|
* |
47
|
|
|
* @return ParsedShortcode[] |
48
|
|
|
*/ |
49
|
59 |
|
public function parse($text) |
50
|
|
|
{ |
51
|
59 |
|
$nestingLevel = ini_set('xdebug.max_nesting_level', '-1'); |
52
|
59 |
|
$this->tokens = $this->tokenize($text); |
53
|
59 |
|
$this->backtracks = array(); |
54
|
59 |
|
$this->lastBacktrack = 0; |
55
|
59 |
|
$this->position = 0; |
56
|
59 |
|
$this->tokensCount = \count($this->tokens); |
57
|
|
|
|
58
|
59 |
|
$shortcodes = array(); |
59
|
59 |
|
while($this->position < $this->tokensCount) { |
60
|
58 |
|
while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_OPEN)) { |
61
|
27 |
|
$this->position++; |
62
|
27 |
|
} |
63
|
58 |
|
$names = array(); |
64
|
58 |
|
$this->beginBacktrack(); |
65
|
58 |
|
$matches = $this->shortcode($names); |
66
|
58 |
|
if(false === $matches) { |
67
|
16 |
|
$this->backtrack(); |
68
|
16 |
|
$this->match(null, true); |
69
|
16 |
|
continue; |
70
|
|
|
} |
71
|
50 |
|
if(\is_array($matches)) { |
72
|
50 |
|
foreach($matches as $shortcode) { |
73
|
50 |
|
$shortcodes[] = $shortcode; |
74
|
50 |
|
} |
75
|
50 |
|
} |
76
|
50 |
|
} |
77
|
59 |
|
ini_set('xdebug.max_nesting_level', $nestingLevel); |
78
|
|
|
|
79
|
59 |
|
return $shortcodes; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* @param string $name |
84
|
|
|
* @psalm-param array<string,string|null> $parameters |
85
|
|
|
* @param string|null $bbCode |
86
|
|
|
* @param int $offset |
87
|
|
|
* @param string|null $content |
88
|
|
|
* @param string $text |
89
|
|
|
* |
90
|
|
|
* @return ParsedShortcode |
91
|
|
|
*/ |
92
|
50 |
|
private function getObject($name, $parameters, $bbCode, $offset, $content, $text) |
93
|
|
|
{ |
94
|
50 |
|
return new ParsedShortcode(new Shortcode($name, $parameters, $content, $bbCode), $text, $offset); |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/* --- RULES ----------------------------------------------------------- */ |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* @param string[] $names |
101
|
|
|
* @psalm-param list<string> $names |
102
|
|
|
* FIXME: investigate the reason Psalm complains about references |
103
|
|
|
* @psalm-suppress ReferenceConstraintViolation |
104
|
|
|
* |
105
|
|
|
* @return ParsedShortcode[]|string|false |
106
|
|
|
*/ |
107
|
58 |
|
private function shortcode(array &$names) |
108
|
|
|
{ |
109
|
58 |
|
if(!$this->match(self::TOKEN_OPEN, false)) { return false; } |
110
|
58 |
|
$offset = $this->tokens[$this->position - 1][2]; |
111
|
58 |
|
$this->match(self::TOKEN_WS, false); |
112
|
58 |
|
if('' === $name = $this->match(self::TOKEN_STRING, false)) { return false; } |
113
|
55 |
|
if($this->lookahead(self::TOKEN_STRING)) { return false; } |
114
|
55 |
|
if(1 !== preg_match($this->nameRegex, $name, $matches)) { return false; } |
115
|
54 |
|
$this->match(self::TOKEN_WS, false); |
116
|
|
|
// bbCode |
117
|
54 |
|
$bbCode = $this->match(self::TOKEN_SEPARATOR, true) ? $this->value() : null; |
118
|
54 |
|
if(false === $bbCode) { return false; } |
119
|
|
|
// parameters |
120
|
53 |
|
if(false === ($parameters = $this->parameters())) { return false; } |
121
|
|
|
|
122
|
|
|
// self-closing |
123
|
51 |
|
if($this->match(self::TOKEN_MARKER, true)) { |
124
|
16 |
|
if(!$this->match(self::TOKEN_CLOSE, false)) { return false; } |
125
|
|
|
|
126
|
15 |
|
return array($this->getObject($name, $parameters, $bbCode, $offset, null, $this->getBacktrack())); |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
// just-closed or with-content |
130
|
39 |
|
if(!$this->match(self::TOKEN_CLOSE, false)) { return false; } |
131
|
39 |
|
$this->beginBacktrack(); |
132
|
39 |
|
$names[] = $name; |
133
|
|
|
|
134
|
|
|
// begin inlined content() |
135
|
39 |
|
$content = ''; |
136
|
39 |
|
$shortcodes = array(); |
137
|
39 |
|
$closingName = null; |
138
|
|
|
|
139
|
39 |
|
while($this->position < $this->tokensCount) { |
140
|
30 |
View Code Duplication |
while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_OPEN)) { |
|
|
|
|
141
|
26 |
|
$content .= $this->match(null, true); |
142
|
26 |
|
} |
143
|
|
|
|
144
|
30 |
|
$this->beginBacktrack(); |
145
|
|
|
/** @psalm-suppress MixedArgumentTypeCoercion */ |
146
|
30 |
|
$contentMatchedShortcodes = $this->shortcode($names); |
147
|
30 |
|
if(\is_string($contentMatchedShortcodes)) { |
148
|
6 |
|
$closingName = $contentMatchedShortcodes; |
149
|
6 |
|
break; |
150
|
|
|
} |
151
|
30 |
|
if(\is_array($contentMatchedShortcodes)) { |
152
|
16 |
|
foreach($contentMatchedShortcodes as $matchedShortcode) { |
153
|
16 |
|
$shortcodes[] = $matchedShortcode; |
154
|
16 |
|
} |
155
|
16 |
|
continue; |
156
|
|
|
} |
157
|
24 |
|
$this->backtrack(); |
158
|
|
|
|
159
|
24 |
|
$this->beginBacktrack(); |
160
|
24 |
|
if(false !== ($closingName = $this->close($names))) { |
161
|
21 |
|
$this->backtrack(); |
162
|
21 |
|
$shortcodes = array(); |
163
|
21 |
|
break; |
164
|
|
|
} |
165
|
9 |
|
$closingName = null; |
166
|
9 |
|
$this->backtrack(); |
167
|
|
|
|
168
|
9 |
|
$content .= $this->match(null, false); |
169
|
9 |
|
} |
170
|
39 |
|
$content = $this->position < $this->tokensCount ? $content : false; |
171
|
|
|
// end inlined content() |
172
|
|
|
|
173
|
39 |
|
if(null !== $closingName && $closingName !== $name) { |
174
|
6 |
|
array_pop($names); |
175
|
6 |
|
array_pop($this->backtracks); |
176
|
6 |
|
array_pop($this->backtracks); |
177
|
|
|
|
178
|
6 |
|
return $closingName; |
179
|
|
|
} |
180
|
39 |
|
if(false === $content || $closingName !== $name) { |
181
|
25 |
|
$this->backtrack(false); |
182
|
25 |
|
$text = $this->backtrack(false); |
183
|
25 |
|
array_pop($names); |
184
|
|
|
|
185
|
25 |
|
return array_merge(array($this->getObject($name, $parameters, $bbCode, $offset, null, $text)), $shortcodes); |
186
|
|
|
} |
187
|
21 |
|
$content = $this->getBacktrack(); |
188
|
21 |
|
if(!$this->close($names)) { return false; } |
|
|
|
|
189
|
21 |
|
array_pop($names); |
190
|
|
|
|
191
|
21 |
|
return array($this->getObject($name, $parameters, $bbCode, $offset, $content, $this->getBacktrack())); |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
/** |
195
|
|
|
* @param string[] $names |
196
|
|
|
* |
197
|
|
|
* @return string|false |
198
|
|
|
*/ |
199
|
24 |
|
private function close(array &$names) |
200
|
|
|
{ |
201
|
24 |
|
if(!$this->match(self::TOKEN_OPEN, true)) { return false; } |
202
|
22 |
|
if(!$this->match(self::TOKEN_MARKER, true)) { return false; } |
203
|
22 |
|
if(!$closingName = $this->match(self::TOKEN_STRING, true)) { return false; } |
204
|
22 |
|
if(!$this->match(self::TOKEN_CLOSE, false)) { return false; } |
205
|
|
|
|
206
|
22 |
|
return \in_array($closingName, $names, true) ? $closingName : false; |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
/** @psalm-return array<string,string|null>|false */ |
210
|
53 |
|
private function parameters() |
211
|
|
|
{ |
212
|
53 |
|
$parameters = array(); |
213
|
|
|
|
214
|
53 |
|
while(true) { |
215
|
53 |
|
$this->match(self::TOKEN_WS, false); |
216
|
53 |
|
if($this->lookahead(self::TOKEN_MARKER) || $this->lookahead(self::TOKEN_CLOSE)) { break; } |
217
|
29 |
|
if(!$name = $this->match(self::TOKEN_STRING, true)) { return false; } |
218
|
28 |
|
if(!$this->match(self::TOKEN_SEPARATOR, true)) { $parameters[$name] = null; continue; } |
219
|
27 |
|
if(false === ($value = $this->value())) { return false; } |
220
|
25 |
|
$this->match(self::TOKEN_WS, false); |
221
|
|
|
|
222
|
25 |
|
$parameters[$name] = $value; |
223
|
25 |
|
} |
224
|
|
|
|
225
|
51 |
|
return $parameters; |
226
|
|
|
} |
227
|
|
|
|
228
|
|
|
/** @return false|string */ |
229
|
29 |
|
private function value() |
230
|
|
|
{ |
231
|
29 |
|
$value = ''; |
232
|
|
|
|
233
|
29 |
|
if($this->match(self::TOKEN_DELIMITER, false)) { |
234
|
20 |
View Code Duplication |
while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_DELIMITER)) { |
|
|
|
|
235
|
20 |
|
$value .= $this->match(null, false); |
236
|
20 |
|
} |
237
|
|
|
|
238
|
20 |
|
return $this->match(self::TOKEN_DELIMITER, false) ? $value : false; |
239
|
|
|
} |
240
|
|
|
|
241
|
15 |
|
if('' !== $tmp = $this->match(self::TOKEN_STRING, false)) { |
242
|
14 |
|
$value .= $tmp; |
243
|
14 |
|
while('' !== $tmp = $this->match(self::TOKEN_STRING, false)) { |
244
|
|
|
$value .= $tmp; |
245
|
|
|
} |
246
|
|
|
|
247
|
14 |
|
return $value; |
248
|
|
|
} |
249
|
|
|
|
250
|
1 |
|
return false; |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
/* --- PARSER ---------------------------------------------------------- */ |
254
|
|
|
|
255
|
|
|
/** @return void */ |
256
|
58 |
|
private function beginBacktrack() |
257
|
|
|
{ |
258
|
58 |
|
$this->backtracks[] = $this->position; |
259
|
58 |
|
$this->lastBacktrack = $this->position; |
260
|
58 |
|
} |
261
|
|
|
|
262
|
|
|
/** @return string */ |
263
|
32 |
|
private function getBacktrack() |
264
|
|
|
{ |
265
|
32 |
|
$position = array_pop($this->backtracks); |
266
|
32 |
|
$backtrack = ''; |
267
|
32 |
View Code Duplication |
for($i = $position; $i < $this->position; $i++) { |
|
|
|
|
268
|
32 |
|
$backtrack .= $this->tokens[$i][1]; |
269
|
32 |
|
} |
270
|
|
|
|
271
|
32 |
|
return $backtrack; |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* @param bool $modifyPosition |
276
|
|
|
* |
277
|
|
|
* @return string |
278
|
|
|
*/ |
279
|
48 |
|
private function backtrack($modifyPosition = true) |
280
|
|
|
{ |
281
|
48 |
|
$position = array_pop($this->backtracks); |
282
|
48 |
|
if($modifyPosition) { |
283
|
33 |
|
$this->position = $position; |
284
|
33 |
|
} |
285
|
|
|
|
286
|
48 |
|
$backtrack = ''; |
287
|
48 |
View Code Duplication |
for($i = $position; $i < $this->lastBacktrack; $i++) { |
|
|
|
|
288
|
25 |
|
$backtrack .= $this->tokens[$i][1]; |
289
|
25 |
|
} |
290
|
48 |
|
$this->lastBacktrack = $position; |
291
|
|
|
|
292
|
48 |
|
return $backtrack; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* @param int $type |
297
|
|
|
* |
298
|
|
|
* @return bool |
299
|
|
|
*/ |
300
|
58 |
|
private function lookahead($type) |
301
|
|
|
{ |
302
|
58 |
|
return $this->position < $this->tokensCount && $this->tokens[$this->position][0] === $type; |
303
|
|
|
} |
304
|
|
|
|
305
|
|
|
/** |
306
|
|
|
* @param int|null $type |
307
|
|
|
* @param bool $ws |
308
|
|
|
* |
309
|
|
|
* @return string |
310
|
|
|
*/ |
311
|
58 |
|
private function match($type, $ws) |
312
|
|
|
{ |
313
|
58 |
|
if($this->position >= $this->tokensCount) { |
314
|
21 |
|
return ''; |
315
|
|
|
} |
316
|
|
|
|
317
|
58 |
|
$token = $this->tokens[$this->position]; |
318
|
58 |
|
if(!empty($type) && $token[0] !== $type) { |
319
|
58 |
|
return ''; |
320
|
|
|
} |
321
|
|
|
|
322
|
58 |
|
$this->position++; |
323
|
58 |
|
if($ws && $this->position < $this->tokensCount && $this->tokens[$this->position][0] === self::TOKEN_WS) { |
324
|
18 |
|
$this->position++; |
325
|
18 |
|
} |
326
|
|
|
|
327
|
58 |
|
return $token[1]; |
328
|
|
|
} |
329
|
|
|
|
330
|
|
|
/* --- LEXER ----------------------------------------------------------- */ |
331
|
|
|
|
332
|
|
|
/** |
333
|
|
|
* @param string $text |
334
|
|
|
* |
335
|
|
|
* @psalm-return list<array{0:int,1:string,2:int}> |
336
|
|
|
*/ |
337
|
59 |
|
private function tokenize($text) |
338
|
|
|
{ |
339
|
59 |
|
$count = preg_match_all($this->lexerRegex, $text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); |
340
|
59 |
|
if(false === $count || preg_last_error() !== PREG_NO_ERROR) { |
341
|
|
|
throw new \RuntimeException(sprintf('PCRE failure `%s`.', preg_last_error())); |
342
|
|
|
} |
343
|
|
|
|
344
|
59 |
|
$tokens = array(); |
345
|
59 |
|
$position = 0; |
346
|
|
|
|
347
|
59 |
|
foreach($matches as $match) { |
348
|
58 |
|
switch(true) { |
349
|
58 |
View Code Duplication |
case -1 !== $match['string'][1]: { $token = $match['string'][0]; $type = self::TOKEN_STRING; break; } |
|
|
|
|
350
|
58 |
View Code Duplication |
case -1 !== $match['ws'][1]: { $token = $match['ws'][0]; $type = self::TOKEN_WS; break; } |
|
|
|
|
351
|
58 |
View Code Duplication |
case -1 !== $match['marker'][1]: { $token = $match['marker'][0]; $type = self::TOKEN_MARKER; break; } |
|
|
|
|
352
|
58 |
View Code Duplication |
case -1 !== $match['delimiter'][1]: { $token = $match['delimiter'][0]; $type = self::TOKEN_DELIMITER; break; } |
|
|
|
|
353
|
58 |
View Code Duplication |
case -1 !== $match['separator'][1]: { $token = $match['separator'][0]; $type = self::TOKEN_SEPARATOR; break; } |
|
|
|
|
354
|
58 |
View Code Duplication |
case -1 !== $match['open'][1]: { $token = $match['open'][0]; $type = self::TOKEN_OPEN; break; } |
|
|
|
|
355
|
56 |
View Code Duplication |
case -1 !== $match['close'][1]: { $token = $match['close'][0]; $type = self::TOKEN_CLOSE; break; } |
|
|
|
|
356
|
|
|
default: { throw new \RuntimeException(sprintf('Invalid token.')); } |
357
|
|
|
} |
358
|
58 |
|
$tokens[] = array($type, $token, $position); |
359
|
58 |
|
$position += mb_strlen($token, 'utf-8'); |
360
|
59 |
|
} |
361
|
|
|
|
362
|
59 |
|
return $tokens; |
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
/** @return string */ |
366
|
15 |
|
private function prepareLexer(SyntaxInterface $syntax) |
367
|
|
|
{ |
368
|
|
|
// FIXME: for some reason Psalm does not understand the `@psalm-var callable() $var` annotation |
369
|
|
|
/** @psalm-suppress MissingClosureParamType, MissingClosureReturnType */ |
370
|
|
|
$group = function($text, $group) { |
371
|
15 |
|
return '(?<'.(string)$group.'>'.preg_replace('/(.)/us', '\\\\$0', (string)$text).')'; |
372
|
15 |
|
}; |
373
|
|
|
/** @psalm-suppress MissingClosureParamType, MissingClosureReturnType */ |
374
|
15 |
|
$quote = function($text) { |
375
|
15 |
|
return preg_replace('/(.)/us', '\\\\$0', (string)$text); |
376
|
15 |
|
}; |
377
|
|
|
|
378
|
|
|
$rules = array( |
379
|
15 |
|
'(?<string>\\\\.|(?:(?!'.implode('|', array( |
380
|
15 |
|
$quote($syntax->getOpeningTag()), |
381
|
15 |
|
$quote($syntax->getClosingTag()), |
382
|
15 |
|
$quote($syntax->getClosingTagMarker()), |
383
|
15 |
|
$quote($syntax->getParameterValueSeparator()), |
384
|
15 |
|
$quote($syntax->getParameterValueDelimiter()), |
385
|
15 |
|
'\s+', |
386
|
15 |
|
)).').)+)', |
387
|
15 |
|
'(?<ws>\s+)', |
388
|
15 |
|
$group($syntax->getClosingTagMarker(), 'marker'), |
389
|
15 |
|
$group($syntax->getParameterValueDelimiter(), 'delimiter'), |
390
|
15 |
|
$group($syntax->getParameterValueSeparator(), 'separator'), |
391
|
15 |
|
$group($syntax->getOpeningTag(), 'open'), |
392
|
15 |
|
$group($syntax->getClosingTag(), 'close'), |
393
|
15 |
|
); |
394
|
|
|
|
395
|
15 |
|
return '~('.implode('|', $rules).')~us'; |
396
|
|
|
} |
397
|
|
|
} |
398
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.