1 | <?php |
||
2 | |||
3 | namespace petitparser; |
||
4 | use OutOfRangeException; |
||
5 | |||
6 | /** |
||
7 | * Abstract base class of all parsers. |
||
8 | */ |
||
9 | abstract class Parser |
||
10 | { |
||
11 | /** |
||
12 | * @type int unbounded upper limit |
||
13 | */ |
||
14 | const UNBOUNDED = -1; |
||
15 | |||
16 | /** |
||
17 | * Primitive method doing the actual parsing. |
||
18 | * |
||
19 | * The method is overridden in concrete subclasses to implement the |
||
20 | * parser specific logic. The methods takes a parse [context] and |
||
21 | * returns the resulting context, which is either a [Success] or |
||
22 | * [Failure] context. |
||
23 | * |
||
24 | * @param Context $context |
||
25 | * |
||
26 | * @return Result |
||
27 | */ |
||
28 | abstract public function parseOn(Context $context); |
||
29 | |||
30 | /** |
||
31 | * Returns the parse result of the [input] buffer. |
||
32 | * |
||
33 | * The implementation creates a default parse context on the input and calls |
||
34 | * the internal parsing logic of the receiving parser. |
||
35 | * |
||
36 | * For example, [:letter().plus().parse('abc'):] results in an instance of |
||
37 | * [Success], where [Result.position] is [:3:] and [Success.value] is |
||
38 | * [:[a, b, c]:]. |
||
39 | * |
||
40 | * Similarly, [:letter().plus().parse('123'):] results in an instance of |
||
41 | * [Failure], where [Result.position] is [:0:] and [Failure.message] is |
||
42 | * ['letter expected']. |
||
43 | * |
||
44 | * Note that, if a string is given, the input is assumed to be in the current PHP internal |
||
45 | * character encoding, e.g. as defined by {@link mb_internal_encoding()} - if this is not |
||
46 | * the case (such as when parsing the contents of a file with a known encoding), either |
||
47 | * call {@link Buffer::create()} explicitly indicating the encoding, or use one of the |
||
48 | * two convenience methods {@link parseUTF8()} or {@link parseISO()}. |
||
49 | * |
||
50 | * @param Buffer|string $input input Buffer or string (see notes about strings above) |
||
51 | * |
||
52 | * @return Result |
||
53 | * |
||
54 | * @see mb_internal_encoding() |
||
55 | * @see parseUTF8() |
||
56 | * @see parseISO() |
||
57 | */ |
||
58 | 1 | public function parse($input) |
|
59 | { |
||
60 | 1 | return $this->parseOn(new Context(is_string($input) ? Buffer::create($input) : $input, 0)); |
|
61 | } |
||
62 | |||
63 | /** |
||
64 | * @see parse() |
||
65 | * |
||
66 | * @param string $string UTF-8 encoded string to parse |
||
67 | * |
||
68 | * @return Result |
||
69 | */ |
||
70 | public function parseUTF8($string) |
||
71 | { |
||
72 | return $this->parse(Buffer::fromUTF8($string)); |
||
73 | } |
||
74 | |||
75 | /** |
||
76 | * @see parse() |
||
77 | * |
||
78 | * @param string $string ISO-8859-1 encoded string to parse |
||
79 | * |
||
80 | * @return Result |
||
81 | */ |
||
82 | public function parseISO($string) |
||
83 | { |
||
84 | return $this->parse(Buffer::fromISO($string)); |
||
85 | } |
||
86 | |||
87 | /** |
||
88 | * Tests if the [input] can be successfully parsed. |
||
89 | * |
||
90 | * For example, [:letter().plus().accept('abc'):] returns [:true:], and |
||
91 | * [:letter().plus().accept('123'):] returns [:false:]. |
||
92 | * |
||
93 | * @param Buffer $input |
||
94 | * |
||
95 | * @return bool |
||
96 | */ |
||
97 | 1 | public function accept(Buffer $input) |
|
98 | { |
||
99 | 1 | return $this->parse($input)->isSuccess(); |
|
100 | } |
||
101 | |||
102 | /** |
||
103 | * Returns a list of all successful overlapping parses of the [input]. |
||
104 | * |
||
105 | * For example, [:letter().plus().matches('abc de'):] results in the list |
||
106 | * [:[['a', 'b', 'c'], ['b', 'c'], ['c'], ['d', 'e'], ['e']]:]. See |
||
107 | * [Parser.matchesSkipping] to retrieve non-overlapping parse results. |
||
108 | * |
||
109 | * @param Buffer $input |
||
110 | * |
||
111 | * @return Result[] |
||
112 | */ |
||
113 | 1 | public function matches(Buffer $input) |
|
114 | { |
||
115 | 1 | $list = array(); |
|
116 | |||
117 | 1 | $this->and_() |
|
118 | 1 | ->map( |
|
119 | function ($each) use (&$list) { |
||
120 | 1 | $list[] = $each; |
|
121 | 1 | } |
|
122 | 1 | ) |
|
123 | 1 | ->seq(any()) |
|
124 | 1 | ->or_(any()) |
|
125 | 1 | ->star() |
|
126 | 1 | ->parse($input); |
|
127 | |||
128 | 1 | return $list; |
|
129 | } |
||
130 | |||
131 | /** |
||
132 | * Returns a list of all successful non-overlapping parses of the input. |
||
133 | * |
||
134 | * For example, [:letter().plus().matchesSkipping('abc de'):] results in the |
||
135 | * list [:[['a', 'b', 'c'], ['d', 'e']]:]. See [Parser.matches] to retrieve |
||
136 | * overlapping parse results. |
||
137 | * |
||
138 | * @param Buffer $input |
||
139 | * |
||
140 | * @return Result[] |
||
141 | */ |
||
142 | 1 | public function matchesSkipping(Buffer $input) |
|
143 | { |
||
144 | 1 | $list = array(); |
|
145 | |||
146 | 1 | $this |
|
147 | 1 | ->map( |
|
148 | function ($each) use (&$list) { |
||
149 | 1 | $list[] = $each; |
|
150 | 1 | } |
|
151 | 1 | ) |
|
152 | 1 | ->or_(any()) |
|
153 | 1 | ->star() |
|
154 | 1 | ->parse($input); |
|
155 | |||
156 | 1 | return $list; |
|
157 | } |
||
158 | |||
159 | /** |
||
160 | * Returns new parser that accepts the receiver, if possible. The resulting |
||
161 | * parser returns the result of the receiver, or [:null:] if not applicable. |
||
162 | * The returned value can be provided as an optional argument [otherwise]. |
||
163 | * |
||
164 | * For example, the parser [:letter().optional():] accepts a letter as input |
||
165 | * and returns that letter. When given something else the parser succeeds as |
||
166 | * well, does not consume anything and returns [:null:]. |
||
167 | * |
||
168 | * @param mixed $otherwise |
||
169 | * |
||
170 | * @return Parser |
||
171 | */ |
||
172 | 1 | public function optional($otherwise = null) |
|
173 | { |
||
174 | 1 | return new OptionalParser($this, $otherwise); |
|
175 | } |
||
176 | |||
177 | /** |
||
178 | * Returns a parser that accepts the receiver zero or more times. The |
||
179 | * resulting parser returns a list of the parse results of the receiver. |
||
180 | * |
||
181 | * This is a greedy and blind implementation that tries to consume as much |
||
182 | * input as possible and that does not consider what comes afterwards. |
||
183 | * |
||
184 | * For example, the parser [:letter().star():] accepts the empty string or |
||
185 | * any sequence of letters and returns a possibly empty list of the parsed |
||
186 | * letters. |
||
187 | * |
||
188 | * @return Parser |
||
189 | */ |
||
190 | 1 | public function star() |
|
191 | { |
||
192 | 1 | return $this->repeat(0, self::UNBOUNDED); |
|
193 | } |
||
194 | |||
195 | /** |
||
196 | * Returns a parser that parses the receiver zero or more times until it |
||
197 | * reaches a [limit]. This is a greedy non-blind implementation of the |
||
198 | * [Parser.star] operator. The [limit] is not consumed. |
||
199 | * |
||
200 | * @param Parser $limit |
||
201 | * |
||
202 | * @return Parser |
||
203 | */ |
||
204 | 1 | public function starGreedy(Parser $limit) |
|
205 | { |
||
206 | 1 | return $this->repeatGreedy($limit, 0, self::UNBOUNDED); |
|
207 | } |
||
208 | |||
209 | /** |
||
210 | * Returns a parser that parses the receiver zero or more times until it |
||
211 | * reaches a [limit]. This is a lazy non-blind implementation of the |
||
212 | * [Parser.star] operator. The [limit] is not consumed. |
||
213 | * |
||
214 | * @param Parser $limit |
||
215 | * |
||
216 | * @return Parser |
||
217 | */ |
||
218 | 1 | public function starLazy(Parser $limit) |
|
219 | { |
||
220 | 1 | return $this->repeatLazy($limit, 0, self::UNBOUNDED); |
|
221 | } |
||
222 | |||
223 | /** |
||
224 | * Returns a parser that accepts the receiver one or more times. The |
||
225 | * resulting parser returns a list of the parse results of the receiver. |
||
226 | * |
||
227 | * This is a greedy and blind implementation that tries to consume as much |
||
228 | * input as possible and that does not consider what comes afterwards. |
||
229 | * |
||
230 | * For example, the parser [:letter().plus():] accepts any sequence of |
||
231 | * letters and returns a list of the parsed letters. |
||
232 | * |
||
233 | * @return Parser |
||
234 | */ |
||
235 | 1 | public function plus() |
|
236 | { |
||
237 | 1 | return $this->repeat(1, self::UNBOUNDED); |
|
238 | } |
||
239 | |||
240 | /** |
||
241 | * Returns a parser that parses the receiver one or more times until it |
||
242 | * reaches [limit]. This is a greedy non-blind implementation of the |
||
243 | * [Parser.plus] operator. The [limit] is not consumed. |
||
244 | * |
||
245 | * @param Parser $limit |
||
246 | * |
||
247 | * @return Parser |
||
248 | */ |
||
249 | 1 | public function plusGreedy(Parser $limit) |
|
250 | { |
||
251 | 1 | return $this->repeatGreedy($limit, 1, self::UNBOUNDED); |
|
252 | } |
||
253 | |||
254 | /** |
||
255 | * Returns a parser that parses the receiver zero or more times until it |
||
256 | * reaches a [limit]. This is a lazy non-blind implementation of the |
||
257 | * [Parser.plus] operator. The [limit] is not consumed. |
||
258 | * |
||
259 | * @param Parser $limit |
||
260 | * |
||
261 | * @return Parser |
||
262 | */ |
||
263 | 1 | public function plusLazy(Parser $limit) |
|
264 | { |
||
265 | 1 | return $this->repeatLazy($limit, 1, self::UNBOUNDED); |
|
266 | } |
||
267 | |||
268 | /** |
||
269 | * Returns a parser that accepts the receiver between [min] and [max] times. |
||
270 | * The resulting parser returns a list of the parse results of the receiver. |
||
271 | * |
||
272 | * This is a greedy and blind implementation that tries to consume as much |
||
273 | * input as possible and that does not consider what comes afterwards. |
||
274 | * |
||
275 | * For example, the parser [:letter().repeat(2, 4):] accepts a sequence of |
||
276 | * two, three, or four letters and returns the accepted letters as a list. |
||
277 | * |
||
278 | * @param int $min |
||
279 | * @param int $max |
||
280 | * |
||
281 | * @return Parser |
||
282 | */ |
||
283 | 1 | public function repeat($min, $max) |
|
284 | { |
||
285 | 1 | return new PossessiveRepeatingParser($this, $min, $max); |
|
286 | } |
||
287 | |||
288 | /** |
||
289 | * Returns a parser that parses the receiver at least [min] and at most [max] |
||
290 | * times until it reaches a [limit]. This is a greedy non-blind implementation of |
||
291 | * the [Parser.repeat] operator. The [limit] is not consumed. |
||
292 | * |
||
293 | * @param Parser $limit |
||
294 | * @param int $min |
||
295 | * @param int $max |
||
296 | * |
||
297 | * @return Parser |
||
298 | */ |
||
299 | 1 | public function repeatGreedy(Parser $limit, $min, $max) |
|
300 | { |
||
301 | 1 | return new GreedyRepeatingParser($this, $limit, $min, $max); |
|
302 | } |
||
303 | |||
304 | /** |
||
305 | * Returns a parser that parses the receiver at least [min] and at most [max] |
||
306 | * times until it reaches a [limit]. This is a lazy non-blind implementation of |
||
307 | * the [Parser.repeat] operator. The [limit] is not consumed. |
||
308 | * |
||
309 | * @param Parser $limit |
||
310 | * @param int $min |
||
311 | * @param int $max |
||
312 | * |
||
313 | * @return Parser |
||
314 | */ |
||
315 | 1 | public function repeatLazy(Parser $limit, $min, $max) |
|
316 | { |
||
317 | 1 | return new LazyRepeatingParser($this, $limit, $min, $max); |
|
318 | } |
||
319 | |||
320 | /** |
||
321 | * Returns a parser that accepts the receiver exactly [count] times. The |
||
322 | * resulting parser returns a list of the parse results of the receiver. |
||
323 | * |
||
324 | * For example, the parser [:letter().times(2):] accepts two letters and |
||
325 | * returns a list of the two parsed letters. |
||
326 | * |
||
327 | * @param int $count |
||
328 | * |
||
329 | * @return Parser |
||
330 | */ |
||
331 | 1 | public function times($count) |
|
332 | { |
||
333 | 1 | return $this->repeat($count, $count); |
|
334 | } |
||
335 | |||
336 | /** |
||
337 | * Returns a parser that accepts the receiver followed by [other]. The |
||
338 | * resulting parser returns a list of the parse result of the receiver |
||
339 | * followed by the parse result of [other]. Calling this method on an |
||
340 | * existing sequence code not nest this sequence into a new one, but |
||
341 | * instead augments the existing sequence with [other]. |
||
342 | * |
||
343 | * For example, the parser [:letter().seq(digit()).seq(letter()):] accepts a |
||
344 | * letter followed by a digit and another letter. The parse result of the |
||
345 | * input string [:'a1b':] is the list [:['a', '1', 'b']:]. |
||
346 | * |
||
347 | * @param Parser $other |
||
348 | * |
||
349 | * @return Parser |
||
350 | */ |
||
351 | 1 | public function seq(Parser $other) |
|
352 | { |
||
353 | 1 | return new SequenceParser(array($this, $other)); |
|
354 | } |
||
355 | |||
356 | /** |
||
357 | * Returns a parser that accepts the receiver or [other]. The resulting |
||
358 | * parser returns the parse result of the receiver, if the receiver fails |
||
359 | * it returns the parse result of [other] (exclusive ordered choice). |
||
360 | * |
||
361 | * For example, the parser [:letter().or(digit()):] accepts a letter or a |
||
362 | * digit. An example where the order matters is the following choice between |
||
363 | * overlapping parsers: [:letter().or(char('a')):]. In the example the parser |
||
364 | * [:char('a'):] will never be activated, because the input is always consumed |
||
365 | * [:letter():]. This can be problematic if the author intended to attach a |
||
366 | * production action to [:char('a'):]. |
||
367 | * |
||
368 | * TODO find a better name for this method |
||
369 | * |
||
370 | * @param Parser $other |
||
371 | * |
||
372 | * @return Parser |
||
373 | */ |
||
374 | 1 | public function or_(Parser $other) |
|
375 | { |
||
376 | 1 | return new ChoiceParser(array($this, $other)); |
|
377 | } |
||
378 | |||
379 | /** |
||
380 | * Returns a parser (logical and-predicate) that succeeds whenever the |
||
381 | * receiver does, but never consumes input. |
||
382 | * |
||
383 | * For example, the parser [:char('_').and().seq(identifier):] accepts |
||
384 | * identifiers that start with an underscore character. Since the predicate |
||
385 | * does not consume accepted input, the parser [:identifier:] is given the |
||
386 | * ability to process the complete identifier. |
||
387 | * |
||
388 | * @return AndParser |
||
389 | */ |
||
390 | 1 | public function and_() |
|
391 | { |
||
392 | 1 | return new AndParser($this); |
|
393 | } |
||
394 | |||
395 | /** |
||
396 | * Returns a parser (logical not-predicate) that succeeds whenever the |
||
397 | * receiver fails, but never consumes input. |
||
398 | * |
||
399 | * For example, the parser [:char('_').not().seq(identifier):] accepts |
||
400 | * identifiers that do not start with an underscore character. If the parser |
||
401 | * [:char('_'):] accepts the input, the negation and subsequently the |
||
402 | * complete parser fails. Otherwise the parser [:identifier:] is given the |
||
403 | * ability to process the complete identifier. |
||
404 | * |
||
405 | * @param string $message |
||
406 | * |
||
407 | * @return Parser |
||
408 | */ |
||
409 | 1 | public function not_($message = null) |
|
410 | { |
||
411 | 1 | return new NotParser($this, $message); |
|
412 | } |
||
413 | |||
414 | /** |
||
415 | * Returns a parser that consumes any input token (character), but the |
||
416 | * receiver. |
||
417 | * |
||
418 | * For example, the parser [:letter().neg():] accepts any input but a letter. |
||
419 | * The parser fails for inputs like [:'a':] or [:'Z':], but succeeds for |
||
420 | * input like [:'1':], [:'_':] or [:'$':]. |
||
421 | * |
||
422 | * @param string $message |
||
423 | * |
||
424 | * @return Parser |
||
425 | */ |
||
426 | 1 | public function neg($message = null) |
|
427 | { |
||
428 | 1 | return $this->not_($message)->seq(any())->pick(1); |
|
429 | } |
||
430 | |||
431 | /** |
||
432 | * Returns a parser that discards the result of the receiver, and returns |
||
433 | * a sub-string of the consumed range in the string/list being parsed. |
||
434 | * |
||
435 | * For example, the parser [:letter().plus().flatten():] returns [:'abc':] |
||
436 | * for the input [:'abc':]. In contrast, the parser [:letter().plus():] would |
||
437 | * return [:['a', 'b', 'c']:] for the same input instead. |
||
438 | * |
||
439 | * @return Parser |
||
440 | */ |
||
441 | 1 | public function flatten() |
|
442 | { |
||
443 | 1 | return new FlattenParser($this); |
|
444 | } |
||
445 | |||
446 | /** |
||
447 | * Returns a parser that returns a [Token]. The token carries the parsed |
||
448 | * values of the receiver [Token.value], as well as the consumed range from |
||
449 | * [Token.start] to [Token.stop] of the string/list being parsed. |
||
450 | * |
||
451 | * For example, the parser [:letter().plus().token():] returns the token |
||
452 | * [:Token[start: 0, stop: 3, value: abc]:] for the input [:'abc':]. |
||
453 | * |
||
454 | * @return Parser |
||
455 | */ |
||
456 | 1 | public function token() |
|
457 | { |
||
458 | 1 | return new TokenParser($this); |
|
459 | } |
||
460 | |||
461 | /** |
||
462 | * Returns a parser that consumes input before and after the receiver. The |
||
463 | * optional argument is a parser that consumes the excess input. By default |
||
464 | * `whitespace()` is used. To arguments can be provided to have different |
||
465 | * parsers on the [left] and [right] side. |
||
466 | * |
||
467 | * For example, the parser `letter().plus().trim()` returns `['a', 'b']` |
||
468 | * for the input `' ab\n'` and consumes the complete input string. |
||
469 | * |
||
470 | * @param Parser $left |
||
471 | * @param Parser $right |
||
472 | * |
||
473 | * @return Parser |
||
474 | */ |
||
475 | 1 | public function trim(Parser $left = null, Parser $right = null) |
|
476 | { |
||
477 | 1 | $left = $left ?: whitespace(); |
|
478 | 1 | $right = $right ?: $left; |
|
479 | |||
480 | 1 | return new TrimmingParser($this, $left, $right); |
|
481 | } |
||
482 | |||
483 | /** |
||
484 | * Returns a parser that succeeds only if the receiver consumes the complete |
||
485 | * input, otherwise return a failure with the optional [message]. |
||
486 | * |
||
487 | * For example, the parser [:letter().end():] succeeds on the input [:'a':] |
||
488 | * and fails on [:'ab':]. In contrast the parser [:letter():] alone would |
||
489 | * succeed on both inputs, but not consume everything for the second input. |
||
490 | * |
||
491 | * @param string $message |
||
492 | * |
||
493 | * @return Parser |
||
494 | */ |
||
495 | 1 | public function end_($message = 'end of input expected') |
|
496 | { |
||
497 | 1 | return new EndOfInputParser($this, $message); |
|
498 | } |
||
499 | |||
500 | /** |
||
501 | * Returns a parser that points to the receiver, but can be changed to point |
||
502 | * to something else at a later point in time. |
||
503 | * |
||
504 | * For example, the parser [:letter().settable():] behaves exactly the same |
||
505 | * as [:letter():], but it can be replaced with another parser using |
||
506 | * [SettableParser.set]. |
||
507 | * |
||
508 | * @return SettableParser |
||
509 | */ |
||
510 | 1 | public function settable() |
|
511 | { |
||
512 | 1 | return new SettableParser($this); |
|
513 | } |
||
514 | |||
515 | /** |
||
516 | * Returns a parser that evaluates [function] as action handler on success |
||
517 | * of the receiver. |
||
518 | * |
||
519 | * For example, the parser [:digit().map((char) => int.parse(char)):] returns |
||
520 | * the number [:1:] for the input string [:'1':]. |
||
521 | * |
||
522 | * @param callable $function |
||
523 | * |
||
524 | * @return Parser |
||
525 | */ |
||
526 | 1 | public function map($function) |
|
527 | { |
||
528 | 1 | return new ActionParser($this, $function); |
|
529 | } |
||
530 | |||
531 | /** |
||
532 | * Returns a parser that transform a successful parse result by returning |
||
533 | * the element at [index] of a list. A negative index can be used to access |
||
534 | * the elements from the back of the list. |
||
535 | * |
||
536 | * For example, the parser [:letter().star().pick(-1):] returns the last |
||
537 | * letter parsed. For the input [:'abc':] it returns [:'c':]. |
||
538 | * |
||
539 | * @param int $index |
||
540 | * |
||
541 | * @return Parser |
||
542 | */ |
||
543 | 1 | public function pick($index) |
|
544 | { |
||
545 | 1 | return $this |
|
546 | 1 | ->map( |
|
547 | function ($list) use ($index) { |
||
548 | 1 | return $list[$index < 0 ? count($list) + $index : $index]; |
|
549 | } |
||
550 | 1 | ); |
|
551 | } |
||
552 | |||
553 | /** |
||
554 | * Returns a parser that transforms a successful parse result by returning |
||
555 | * the permuted elements at [indexes] of a list. Negative indexes can be |
||
556 | * used to access the elements from the back of the list. |
||
557 | * |
||
558 | * For example, the parser [:letter().star().permute([0, -1]):] returns the |
||
559 | * first and last letter parsed. For the input [:'abc':] it returns |
||
560 | * [:['a', 'c']:]. |
||
561 | * |
||
562 | * @param int[] $indexes |
||
563 | * |
||
564 | * @return Parser |
||
565 | */ |
||
566 | 1 | public function permute($indexes) |
|
567 | { |
||
568 | 1 | return $this |
|
569 | 1 | ->map( |
|
570 | function ($list) use ($indexes) { |
||
571 | 1 | return array_map( |
|
572 | function ($index) use ($list) { |
||
573 | 1 | return $list[$index < 0 ? count($list) + $index : $index]; |
|
574 | 1 | }, |
|
575 | $indexes |
||
576 | 1 | ); |
|
577 | } |
||
578 | 1 | ); |
|
579 | } |
||
580 | |||
581 | /** |
||
582 | * Returns a parser that consumes the receiver one or more times separated |
||
583 | * by the [separator] parser. The resulting parser returns a flat list of |
||
584 | * the parse results of the receiver interleaved with the parse result of the |
||
585 | * separator parser. |
||
586 | * |
||
587 | * If the optional argument [includeSeparators] is set to [:false:], then the |
||
588 | * separators are not included in the parse result. If the optional argument |
||
589 | * [optionalSeparatorAtEnd] is set to [:true:] the parser also accepts an |
||
590 | * optional separator at the end. |
||
591 | * |
||
592 | * For example, the parser [:digit().separatedBy(char('-')):] returns a parser |
||
593 | * that consumes input like [:'1-2-3':] and returns a list of the elements and |
||
594 | * separators: [:['1', '-', '2', '-', '3']:]. |
||
595 | * |
||
596 | * @param Parser $separator |
||
597 | * @param bool $includeSeparators |
||
598 | * @param bool $optionalSeparatorAtEnd |
||
599 | * |
||
600 | * @return Parser |
||
601 | */ |
||
602 | 1 | public function separatedBy(Parser $separator, $includeSeparators = true, $optionalSeparatorAtEnd = false) |
|
603 | { |
||
604 | 1 | $repeater = new SequenceParser(array($separator, $this)); |
|
605 | 1 | $repeater = $repeater->star(); |
|
606 | |||
607 | 1 | $parser = new SequenceParser($optionalSeparatorAtEnd |
|
608 | 1 | ? array($this, $repeater, $separator->optional($separator)) |
|
609 | 1 | : array($this, $repeater)); |
|
610 | |||
611 | 1 | return $parser->map( |
|
612 | 1 | function ($list) use ($includeSeparators, $optionalSeparatorAtEnd, $separator) { |
|
613 | 1 | $result = array(); |
|
614 | 1 | $result[] = $list[0]; |
|
615 | |||
616 | 1 | foreach ($list[1] as $tuple) { |
|
617 | 1 | if ($includeSeparators) { |
|
618 | 1 | $result[] = $tuple[0]; |
|
619 | 1 | } |
|
620 | 1 | $result[] = $tuple[1]; |
|
621 | 1 | } |
|
622 | 1 | if ($includeSeparators && $optionalSeparatorAtEnd && $list[2] !== $separator) { |
|
623 | 1 | $result[] = $list[2]; |
|
624 | 1 | } |
|
625 | 1 | return $result; |
|
626 | } |
||
627 | 1 | ); |
|
628 | } |
||
629 | |||
630 | /** |
||
631 | * Returns a shallow copy of the receiver. |
||
632 | * |
||
633 | * @return Parser |
||
634 | */ |
||
635 | abstract public function copy(); |
||
636 | |||
637 | /** |
||
638 | * Recusively tests for the equality of two parsers. |
||
639 | * |
||
640 | * The code can automatically deals with recursive parsers and parsers that |
||
641 | * refer to other parsers. This code is supposed to be overridden by parsers |
||
642 | * that add other state. |
||
643 | * |
||
644 | * @param Parser $other |
||
645 | * @param Parser[] $seen |
||
646 | * |
||
647 | * @return bool |
||
648 | */ |
||
649 | 1 | public function isEqualTo(Parser $other, $seen = array()) |
|
650 | { |
||
651 | 1 | if ($this === $other || in_array($this, $seen, true)) { |
|
652 | 1 | return true; |
|
653 | } |
||
654 | |||
655 | 1 | $seen[] = $this; |
|
656 | |||
657 | 1 | return get_class($this) === get_class($other) |
|
658 | 1 | && $this->hasEqualProperties($other) |
|
659 | 1 | && $this->hasEqualChildren($other, $seen); |
|
660 | } |
||
661 | |||
662 | /** |
||
663 | * Compare the properties of two parsers. Normally this method should not be |
||
664 | * called directly, instead use [Parser#equals]. |
||
665 | * |
||
666 | * Override this method in all subclasses that add new state. |
||
667 | * |
||
668 | * @param Parser $other |
||
669 | * |
||
670 | * @return bool |
||
671 | */ |
||
672 | 1 | public function hasEqualProperties( |
|
673 | /** @noinspection PhpUnusedParameterInspection */ |
||
674 | Parser $other) |
||
675 | { |
||
676 | 1 | return true; |
|
677 | } |
||
678 | |||
679 | /** |
||
680 | * @param Parser $other |
||
681 | * @param Parser[] $seen |
||
682 | * |
||
683 | * @return bool |
||
684 | */ |
||
685 | 1 | public function hasEqualChildren(Parser $other, $seen = array()) |
|
686 | { |
||
687 | 1 | $thisChildren = $this->getChildren(); |
|
688 | 1 | $otherChildren = $other->getChildren(); |
|
689 | |||
690 | 1 | if (length($thisChildren) !== length($otherChildren)) { |
|
691 | return false; |
||
692 | } |
||
693 | |||
694 | 1 | for ($i = 0; $i < length($thisChildren); $i++) { |
|
0 ignored issues
–
show
|
|||
695 | 1 | if (! $thisChildren[$i]->isEqualTo($otherChildren[$i], $seen)) { |
|
696 | 1 | return false; |
|
697 | } |
||
698 | 1 | } |
|
699 | |||
700 | 1 | return true; |
|
701 | } |
||
702 | |||
703 | /** |
||
704 | * Returns a list of directly referenced parsers. |
||
705 | * |
||
706 | * For example, [:letter().children:] returns the empty collection [:[]:], |
||
707 | * because the letter parser is a primitive or leaf parser that does not |
||
708 | * depend or call any other parser. |
||
709 | * |
||
710 | * In contrast, [:letter().or(digit()).children:] returns a collection |
||
711 | * containing both the [:letter():] and [:digit():] parser. |
||
712 | * |
||
713 | * @return Parser[] a list of directly referenced parsers. |
||
714 | */ |
||
715 | 1 | public function getChildren() |
|
716 | { |
||
717 | 1 | return array(); |
|
718 | } |
||
719 | |||
720 | /** |
||
721 | * @param int $index |
||
722 | * |
||
723 | * @return Parser the nth directly referenced parser. |
||
724 | */ |
||
725 | 1 | public function getChild($index) |
|
726 | { |
||
727 | 1 | $children = $this->getChildren(); |
|
728 | |||
729 | 1 | if (!isset($children[$index])) { |
|
730 | throw new OutOfRangeException(count($children) ? "valid range is: 0 to " . count($children) - 1 : "Parser has no children"); |
||
731 | } |
||
732 | |||
733 | 1 | return $children[$index]; |
|
734 | } |
||
735 | |||
736 | /** |
||
737 | * Changes the receiver by replacing [source] with [target]. Does nothing |
||
738 | * if [source] does not exist in [Parser.children]. |
||
739 | * |
||
740 | * The following example creates a letter parser and then defines a parser |
||
741 | * called [:example:] that accepts one or more letters. Eventually the parser |
||
742 | * [:example:] is modified by replacing the [:letter:] parser with a new |
||
743 | * parser that accepts a digit. The resulting [:example:] parser accepts one |
||
744 | * or more digits. |
||
745 | * |
||
746 | * $letter = $this->letter(); |
||
747 | * $example = $letter->plus(); |
||
748 | * $example->replace($letter, $this->digit()); |
||
749 | * |
||
750 | * @param Parser $source |
||
751 | * @param Parser $target |
||
752 | * |
||
753 | * @return void |
||
754 | */ |
||
755 | 1 | public function replace(Parser $source, Parser $target) |
|
756 | { |
||
757 | // no children, nothing to do |
||
758 | 1 | } |
|
759 | |||
760 | /** |
||
761 | * @return string |
||
762 | */ |
||
763 | public function __toString() |
||
764 | { |
||
765 | return get_class($this); |
||
766 | } |
||
767 | } |
||
768 |
If you have a function call in the test part of a
for
loop, this function is executed on each iteration. Often such a function, can be moved to the initialization part and be cached.