1
|
|
|
<?php |
2
|
|
|
namespace Thunder\Shortcode\Parser; |
3
|
|
|
|
4
|
|
|
use Thunder\Shortcode\Shortcode\ParsedShortcode; |
5
|
|
|
use Thunder\Shortcode\Shortcode\Shortcode; |
6
|
|
|
use Thunder\Shortcode\Syntax\CommonSyntax; |
7
|
|
|
use Thunder\Shortcode\Syntax\SyntaxInterface; |
8
|
|
|
use Thunder\Shortcode\Utility\RegexBuilderUtility; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* @author Tomasz Kowalczyk <[email protected]> |
12
|
|
|
*/ |
13
|
|
|
final class RegularParser implements ParserInterface |
14
|
|
|
{ |
15
|
|
|
private $lexerRegex; |
16
|
|
|
private $tokens; |
17
|
|
|
private $tokensCount; |
18
|
|
|
private $position; |
19
|
|
|
private $backtracks; |
20
|
|
|
|
21
|
|
|
const TOKEN_OPEN = 1; |
22
|
|
|
const TOKEN_CLOSE = 2; |
23
|
|
|
const TOKEN_MARKER = 3; |
24
|
|
|
const TOKEN_SEPARATOR = 4; |
25
|
|
|
const TOKEN_DELIMITER = 5; |
26
|
|
|
const TOKEN_STRING = 6; |
27
|
|
|
const TOKEN_WS = 7; |
28
|
|
|
|
29
|
14 |
|
public function __construct(SyntaxInterface $syntax = null) |
30
|
|
|
{ |
31
|
14 |
|
$this->lexerRegex = $this->getTokenizerRegex($syntax ?: new CommonSyntax()); |
32
|
14 |
|
} |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* @param string $text |
36
|
|
|
* |
37
|
|
|
* @return ParsedShortcode[] |
38
|
|
|
*/ |
39
|
50 |
|
public function parse($text) |
40
|
|
|
{ |
41
|
50 |
|
$this->tokens = $this->tokenize($text); |
42
|
50 |
|
$this->backtracks = array(); |
43
|
50 |
|
$this->position = 0; |
44
|
50 |
|
$this->tokensCount = count($this->tokens); |
45
|
|
|
|
46
|
50 |
|
$shortcodes = array(); |
47
|
50 |
|
while($this->position < $this->tokensCount) { |
48
|
49 |
|
while($this->position < $this->tokensCount && !$this->lookahead(self::TOKEN_OPEN)) { |
49
|
22 |
|
$this->position++; |
50
|
22 |
|
} |
51
|
49 |
|
$names = array(); |
52
|
49 |
|
$this->beginBacktrack(); |
53
|
49 |
|
$matches = $this->shortcode($names); |
54
|
49 |
|
if(is_array($matches)) { |
55
|
42 |
|
foreach($matches as $shortcode) { |
56
|
42 |
|
$shortcodes[] = $shortcode; |
57
|
42 |
|
} |
58
|
42 |
|
} |
59
|
49 |
|
} |
60
|
|
|
|
61
|
50 |
|
return $shortcodes; |
62
|
|
|
} |
63
|
|
|
|
64
|
42 |
|
private function getObject($name, $parameters, $bbCode, $offset, $content, $text) |
65
|
|
|
{ |
66
|
42 |
|
return new ParsedShortcode(new Shortcode($name, $parameters, $content, $bbCode), $text, $offset); |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
/* --- RULES ----------------------------------------------------------- */ |
70
|
|
|
|
71
|
49 |
|
private function shortcode(array &$names) |
72
|
|
|
{ |
73
|
49 |
|
$name = null; |
74
|
49 |
|
$offset = null; |
75
|
|
|
|
76
|
|
|
$setName = function(array $token) use(&$name) { $name = $token[1]; }; |
77
|
|
|
$setOffset = function(array $token) use(&$offset) { $offset = $token[2]; }; |
78
|
|
|
|
79
|
49 |
|
if(!$this->match(self::TOKEN_OPEN, $setOffset, true)) { return false; } |
80
|
49 |
|
if(!$this->match(self::TOKEN_STRING, $setName, false)) { return false; } |
81
|
46 |
|
if($this->lookahead(self::TOKEN_STRING, null)) { return false; } |
82
|
46 |
|
if(!preg_match_all('~^'.RegexBuilderUtility::buildNameRegex().'$~us', $name, $matches)) { return false; } |
83
|
45 |
|
$this->match(self::TOKEN_WS); |
84
|
45 |
|
if(false === ($bbCode = $this->bbCode())) { return false; } |
85
|
45 |
|
if(false === ($parameters = $this->parameters())) { return false; } |
86
|
|
|
|
87
|
|
|
// self-closing |
88
|
43 |
|
if($this->match(self::TOKEN_MARKER, null, true)) { |
89
|
14 |
|
if(!$this->match(self::TOKEN_CLOSE)) { return false; } |
90
|
|
|
|
91
|
13 |
|
return array($this->getObject($name, $parameters, $bbCode, $offset, null, $this->getBacktrack())); |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
// just-closed or with-content |
95
|
33 |
|
if(!$this->match(self::TOKEN_CLOSE)) { return false; } |
96
|
33 |
|
$this->beginBacktrack(); |
97
|
33 |
|
$names[] = $name; |
98
|
33 |
|
list($content, $shortcodes, $closingName) = $this->content($names); |
99
|
33 |
|
if(null !== $closingName && $closingName !== $name) { |
100
|
6 |
|
array_pop($names); |
101
|
6 |
|
array_pop($this->backtracks); |
102
|
6 |
|
array_pop($this->backtracks); |
103
|
|
|
|
104
|
6 |
|
return $closingName; |
105
|
|
|
} |
106
|
33 |
|
if(false === $content || $closingName !== $name) { |
107
|
22 |
|
$this->backtrack(false); |
108
|
22 |
|
$text = $this->backtrack(false); |
109
|
|
|
|
110
|
22 |
|
return array_merge(array($this->getObject($name, $parameters, $bbCode, $offset, null, $text)), $shortcodes); |
111
|
|
|
} |
112
|
17 |
|
$content = $this->getBacktrack(); |
113
|
17 |
|
if(!$this->close($names)) { return false; } |
114
|
|
|
|
115
|
17 |
|
return array($this->getObject($name, $parameters, $bbCode, $offset, $content, $this->getBacktrack())); |
116
|
|
|
} |
117
|
|
|
|
118
|
33 |
|
private function content(array &$names) |
119
|
|
|
{ |
120
|
33 |
|
$content = null; |
121
|
33 |
|
$shortcodes = array(); |
122
|
33 |
|
$closingName = null; |
123
|
|
|
$appendContent = function(array $token) use(&$content) { $content .= $token[1]; }; |
124
|
|
|
|
125
|
33 |
|
while($this->position < $this->tokensCount) { |
126
|
25 |
View Code Duplication |
while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_OPEN)) { |
|
|
|
|
127
|
21 |
|
$this->match(null, $appendContent); |
128
|
21 |
|
continue; |
129
|
|
|
} |
130
|
|
|
|
131
|
25 |
|
$this->beginBacktrack(); |
132
|
25 |
|
$matchedShortcodes = $this->shortcode($names); |
133
|
25 |
|
if(is_string($matchedShortcodes)) { |
134
|
6 |
|
$closingName = $matchedShortcodes; |
135
|
6 |
|
break; |
136
|
|
|
} |
137
|
25 |
|
if(false !== $matchedShortcodes) { |
138
|
13 |
|
$shortcodes = array_merge($shortcodes, $matchedShortcodes); |
139
|
13 |
|
continue; |
140
|
|
|
} |
141
|
20 |
|
$this->backtrack(); |
142
|
|
|
|
143
|
20 |
|
$this->beginBacktrack(); |
144
|
20 |
|
if(false !== ($closingName = $this->close($names))) { |
145
|
17 |
|
if(null === $content) { $content = ''; } |
146
|
17 |
|
$this->backtrack(); |
147
|
17 |
|
$shortcodes = array(); |
148
|
17 |
|
break; |
149
|
|
|
} |
150
|
7 |
|
$closingName = null; |
151
|
7 |
|
$this->backtrack(); |
152
|
7 |
|
if($this->position < $this->tokensCount) { |
153
|
1 |
|
$shortcodes = array(); |
154
|
1 |
|
break; |
155
|
|
|
} |
156
|
|
|
|
157
|
6 |
|
$this->match(null, $appendContent); |
158
|
6 |
|
} |
159
|
|
|
|
160
|
33 |
|
return array($this->position < $this->tokensCount ? $content : false, $shortcodes, $closingName); |
161
|
|
|
} |
162
|
|
|
|
163
|
20 |
|
private function close(array &$names) |
164
|
|
|
{ |
165
|
20 |
|
$closingName = null; |
166
|
|
|
$setName = function(array $token) use(&$closingName) { $closingName = $token[1]; }; |
167
|
|
|
|
168
|
20 |
|
if(!$this->match(self::TOKEN_OPEN, null, true)) { return false; } |
169
|
18 |
|
if(!$this->match(self::TOKEN_MARKER, null, true)) { return false; } |
170
|
18 |
|
if(!$this->match(self::TOKEN_STRING, $setName, true)) { return false; } |
171
|
18 |
|
if(!$this->match(self::TOKEN_CLOSE)) { return false; } |
172
|
|
|
|
173
|
18 |
|
return in_array($closingName, $names) ? $closingName : false; |
174
|
|
|
} |
175
|
|
|
|
176
|
45 |
|
private function bbCode() |
177
|
|
|
{ |
178
|
45 |
|
return $this->match(self::TOKEN_SEPARATOR, null, true) ? $this->value() : null; |
179
|
|
|
} |
180
|
|
|
|
181
|
45 |
|
private function parameters() |
182
|
|
|
{ |
183
|
45 |
|
$parameters = array(); |
184
|
|
|
$setName = function(array $token) use(&$name) { $name = $token[1]; }; |
185
|
|
|
|
186
|
45 |
|
while(true) { |
187
|
45 |
|
$name = null; |
|
|
|
|
188
|
|
|
|
189
|
45 |
|
$this->match(self::TOKEN_WS); |
190
|
45 |
|
if($this->lookahead(array(self::TOKEN_MARKER, self::TOKEN_CLOSE))) { break; } |
191
|
22 |
|
if(!$this->match(self::TOKEN_STRING, $setName, true)) { return false; } |
192
|
21 |
|
if(!$this->match(self::TOKEN_SEPARATOR, null, true)) { $parameters[$name] = null; continue; } |
193
|
20 |
|
if(false === ($value = $this->value())) { return false; } |
194
|
19 |
|
$this->match(self::TOKEN_WS); |
195
|
|
|
|
196
|
19 |
|
$parameters[$name] = $value; |
197
|
19 |
|
} |
198
|
|
|
|
199
|
43 |
|
return $parameters; |
200
|
|
|
} |
201
|
|
|
|
202
|
21 |
|
private function value() |
203
|
|
|
{ |
204
|
21 |
|
$value = ''; |
205
|
|
|
$appendValue = function(array $token) use(&$value) { $value .= $token[1]; }; |
206
|
|
|
|
207
|
21 |
|
if($this->match(self::TOKEN_DELIMITER)) { |
208
|
18 |
View Code Duplication |
while($this->position < $this->tokensCount && !$this->lookahead(self::TOKEN_DELIMITER)) { |
|
|
|
|
209
|
18 |
|
$this->match(null, $appendValue); |
210
|
18 |
|
} |
211
|
|
|
|
212
|
18 |
|
return $this->match(self::TOKEN_DELIMITER) ? $value : false; |
213
|
|
|
} |
214
|
|
|
|
215
|
8 |
|
if($this->match(self::TOKEN_STRING, $appendValue)) { |
216
|
|
|
while($this->match(self::TOKEN_STRING, $appendValue)) { |
217
|
|
|
continue; |
218
|
|
|
} |
219
|
|
|
|
220
|
49 |
|
return $value; |
221
|
|
|
} |
222
|
49 |
|
|
223
|
49 |
|
return false; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/* --- PARSER ---------------------------------------------------------- */ |
227
|
|
|
|
228
|
|
|
private function beginBacktrack() |
229
|
|
|
{ |
230
|
|
|
$this->backtracks[] = array(); |
231
|
33 |
|
} |
232
|
|
|
|
233
|
33 |
|
private function getBacktrack() |
234
|
33 |
|
{ |
235
|
33 |
|
// switch from array_map() to array_column() when dropping support for PHP <5.5 |
236
|
20 |
|
return implode('', array_map(function(array $token) { return $token[1]; }, array_pop($this->backtracks))); |
237
|
20 |
|
} |
238
|
|
|
|
239
|
33 |
|
private function backtrack($modifyPosition = true) |
240
|
|
|
{ |
241
|
|
|
$tokens = array_pop($this->backtracks); |
242
|
33 |
|
$count = count($tokens); |
243
|
25 |
|
if($modifyPosition) { |
244
|
25 |
|
$this->position -= $count; |
245
|
33 |
|
} |
246
|
|
|
|
247
|
|
|
foreach($this->backtracks as &$backtrack) { |
248
|
|
|
// array_pop() in loop is much faster than array_slice() because |
249
|
|
|
// it operates directly on the passed array |
250
|
49 |
|
for($i = 0; $i < $count; $i++) { |
251
|
|
|
array_pop($backtrack); |
252
|
49 |
|
} |
253
|
1 |
|
} |
254
|
|
|
|
255
|
|
|
return implode('', array_map(function(array $token) { return $token[1]; }, $tokens)); |
256
|
49 |
|
} |
257
|
49 |
|
|
258
|
49 |
|
private function lookahead($type, $callback = null) |
259
|
48 |
|
{ |
260
|
|
|
if($this->position >= $this->tokensCount) { |
261
|
|
|
return false; |
262
|
|
|
} |
263
|
49 |
|
|
264
|
|
|
$type = (array)$type; |
265
|
49 |
|
$token = $this->tokens[$this->position]; |
266
|
|
|
if(!empty($type) && !in_array($token[0], $type)) { |
267
|
|
|
return false; |
268
|
49 |
|
} |
269
|
|
|
|
270
|
49 |
|
/** @var $callback callable */ |
271
|
19 |
|
$callback && $callback($token); |
272
|
|
|
|
273
|
|
|
return true; |
274
|
49 |
|
} |
275
|
49 |
|
|
276
|
49 |
|
private function match($type, $callbacks = null, $ws = false) |
277
|
49 |
|
{ |
278
|
|
|
if($this->position >= $this->tokensCount) { |
279
|
49 |
|
return false; |
280
|
49 |
|
} |
281
|
49 |
|
|
282
|
|
|
$type = (array)$type; |
283
|
49 |
|
$token = $this->tokens[$this->position]; |
284
|
49 |
|
if(!empty($type) && !in_array($token[0], $type)) { |
285
|
49 |
|
return false; |
286
|
49 |
|
} |
287
|
|
|
foreach($this->backtracks as &$backtrack) { |
288
|
49 |
|
$backtrack[] = $token; |
289
|
|
|
} |
290
|
49 |
|
|
291
|
|
|
$this->position++; |
292
|
|
|
foreach((array)$callbacks as $callback) { |
293
|
|
|
$callback($token); |
294
|
|
|
} |
295
|
50 |
|
|
296
|
|
|
$ws && $this->match(self::TOKEN_WS); |
297
|
50 |
|
|
298
|
50 |
|
return true; |
299
|
50 |
|
} |
300
|
|
|
|
301
|
50 |
|
/* --- LEXER ----------------------------------------------------------- */ |
302
|
49 |
|
|
303
|
49 |
|
private function tokenize($text) |
304
|
49 |
|
{ |
305
|
48 |
|
preg_match_all($this->lexerRegex, $text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); |
306
|
48 |
|
$tokens = array(); |
307
|
48 |
|
$position = 0; |
308
|
48 |
|
|
309
|
48 |
|
foreach($matches as $match) { |
310
|
48 |
|
switch(true) { |
311
|
49 |
View Code Duplication |
case -1 !== $match['open'][1]: { $token = $match['open'][0]; $type = self::TOKEN_OPEN; break; } |
|
|
|
|
312
|
49 |
View Code Duplication |
case -1 !== $match['close'][1]: { $token = $match['close'][0]; $type = self::TOKEN_CLOSE; break; } |
|
|
|
|
313
|
50 |
View Code Duplication |
case -1 !== $match['marker'][1]: { $token = $match['marker'][0]; $type = self::TOKEN_MARKER; break; } |
|
|
|
|
314
|
|
View Code Duplication |
case -1 !== $match['separator'][1]: { $token = $match['separator'][0]; $type = self::TOKEN_SEPARATOR; break; } |
|
|
|
|
315
|
50 |
View Code Duplication |
case -1 !== $match['delimiter'][1]: { $token = $match['delimiter'][0]; $type = self::TOKEN_DELIMITER; break; } |
|
|
|
|
316
|
|
View Code Duplication |
case -1 !== $match['ws'][1]: { $token = $match['ws'][0]; $type = self::TOKEN_WS; break; } |
|
|
|
|
317
|
|
|
default: { $token = $match['string'][0]; $type = self::TOKEN_STRING; } |
|
|
|
|
318
|
|
|
} |
319
|
|
|
$tokens[] = array($type, $token, $position); |
320
|
14 |
|
$position += mb_strlen($token, 'utf-8'); |
321
|
14 |
|
} |
322
|
14 |
|
|
323
|
|
|
return $tokens; |
324
|
|
|
} |
325
|
14 |
|
|
326
|
14 |
|
private function getTokenizerRegex(SyntaxInterface $syntax) |
327
|
14 |
|
{ |
328
|
14 |
|
$quote = function($text, $group) { |
329
|
14 |
|
return '(?<'.$group.'>'.preg_replace('/(.)/us', '\\\\$0', $text).')'; |
330
|
14 |
|
}; |
331
|
14 |
|
|
332
|
14 |
|
$rules = array( |
333
|
|
|
$quote($syntax->getOpeningTag(), 'open'), |
334
|
14 |
|
$quote($syntax->getClosingTag(), 'close'), |
335
|
|
|
$quote($syntax->getClosingTagMarker(), 'marker'), |
336
|
|
|
$quote($syntax->getParameterValueSeparator(), 'separator'), |
337
|
|
|
$quote($syntax->getParameterValueDelimiter(), 'delimiter'), |
338
|
|
|
'(?<ws>\s+)', |
339
|
|
|
'(?<string>[\w-]+|\\\\.|.)', |
340
|
|
|
); |
341
|
|
|
|
342
|
|
|
return '~('.implode('|', $rules).')~us'; |
343
|
|
|
} |
344
|
|
|
} |
345
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.