Issues (12)

src/_functions.php (1 issue)

1
<?php
2
3
namespace petitparser;
4
5
use Closure;
6
use InvalidArgumentException;
7
use RuntimeException;
8
9
/**
10
 * @param mixed $value
11
 *
12
 * @throws \RuntimeException
13
 * @return int
14
 */
15
function length($value)
16
{
17 1
    if (is_string($value)) {
18 1
        return mb_strlen($value); // TODO QA
19
    }
20
21 1
    if (is_array($value)) {
22 1
        return count($value);
23
    }
24
25 1
    if (is_object($value) && (method_exists($value, 'getLength') || property_exists($value, 'length'))) {
26 1
        return $value->getLength();
27
    }
28
29
    throw new RuntimeException("unable to obtain length of given value");
30
}
31
32
/**
33
 * Internal method to convert an element to a character code.
34
 *
35
 * @param int|string $element single character encoded as UTF-8; or a 32-bit Unicode character code
36
 *
37
 * @return int 32-bit Unicode character code
38
 *
39
 * @throws InvalidArgumentException
40
 */
41
function toCharCode($element)
42
{
43 1
    if (is_int($element)) {
44 1
        return $element;
45
    }
46
47 1
    if (is_string($element) && mb_strlen($element) === 1) {
48 1
        $element = mb_convert_encoding($element, 'UTF-32');
49
50 1
        if (PHP_INT_SIZE <= 4) {
51
            list(, $h, $l) = unpack('n*', $element);
52
            return ($l + ($h * 0x010000));
53
        } else {
54 1
            list(, $int) = unpack('N', $element);
55 1
            return $int;
56
        }
57
    }
58
59 1
    throw new InvalidArgumentException("'$element' is not a character");
60
}
61
62
/**
63
 * Internal method to convert a unicode character code to a string
64
 *
65
 * @param int $ord Unicode character code
66
 * @param string|null $encoding encoding (or NULL to use mb_internal_encoding())
67
 *
68
 * @return string
69
 */
70
function fromCharCode($ord, $encoding = null) {
71 1
    return mb_convert_encoding(pack("N", $ord), $encoding ?: mb_internal_encoding(), 'UCS-4BE');
72
}
73
74
/**
75
 * Returns a parser that accepts a specific character only.
76
 *
77
 * @param int|string $element
78
 * @param string $message
79
 *
80
 * @return Parser
81
 */
82
function char($element, $message = null)
83
{
84 1
    return new CharacterParser(
85 1
        new SingleCharacterPredicate($element),
86 1
        $message ? : "\"{$element}\" expected");
87
}
88
89
/**
90
 * Returns a parser that accepts any digit character.
91
 *
92
 * @param string $message
93
 *
94
 * @return Parser
95
 */
96
function digit($message = null)
97
{
98 1
    static $digitCharMatcher = null;
99
100 1
    if ($digitCharMatcher === null) {
101 1
        $digitCharMatcher = new DigitCharacterPredicate();
102 1
    }
103
104 1
    return new CharacterParser(
105 1
      $digitCharMatcher,
106 1
      $message ?: 'digit expected');
107
}
108
109
/**
110
 * Returns a parser that accepts any letter character.
111
 *
112
 * @param $message
113
 *
114
 * @return Parser
115
 */
116
function letter($message = null)
117
{
118 1
    static $letterCharMatcher = null;
119
120 1
    if ($letterCharMatcher === null) {
121 1
        $letterCharMatcher = new LetterCharacterPredicate();
122 1
    }
123
124 1
    return new CharacterParser(
125 1
        $letterCharMatcher,
126 1
        $message ?: 'letter expected');
127
}
128
129
/**
130
 * Returns a parser that accepts any lowercase character.
131
 *
132
 * @param string $message
133
 *
134
 * @return Parser
135
 */
136
function lowercase($message = null)
137
{
138 1
    static $lowercaseCharMatcher = null;
139
140 1
    if ($lowercaseCharMatcher === null) {
141 1
        $lowercaseCharMatcher = new LowercaseCharacterPredicate();
142 1
    }
143
144 1
    return new CharacterParser(
145 1
        $lowercaseCharMatcher,
146 1
        $message ?: 'lowercase letter expected');
147
}
148
149
/**
150
 * Returns a parser that accepts the given character class pattern.
151
 *
152
 * @param string $element
153
 * @param string $message
154
 *
155
 * @return Parser
156
 */
157
function pattern($element, $message = null)
158
{
159 1
    static $patternParser = null;
160
161 1
    if ($patternParser === null) {
162 1
        $single = any()->map(
163
            function ($each) {
164 1
                return new RangeCharacterPredicate($each, $each);
165
            }
166 1
        );
167
168 1
        $multiple = any()->seq(char('-'))->seq(any())->map(
169
            function ($each) {
170 1
                return new RangeCharacterPredicate($each[0], $each[2]);
171
            }
172 1
        );
173
174 1
        $positive = $multiple->or_($single)->plus()->map(
175
            function ($each) {
176 1
                return _optimizedRanges($each);
177
            }
178 1
        );
179
180 1
        $patternParser = char('^')->optional()->seq($positive)->map(
181
            function ($each) {
182 1
                return $each[0] === null ? $each[1] : new NotCharacterPredicate($each[1]);
183
            }
184 1
        );
185 1
    }
186
187 1
    return new CharacterParser(
188 1
        $patternParser->parse($element)->getValue(),
189 1
        $message ?: "[{$element}] expected");
190
}
191
192
/**
193
 * Returns a parser that accepts any character in the range between [start] and [stop].
194
 *
195
 * @param int    $start
196
 * @param int    $stop
197
 * @param string $message
198
 *
199
 * @return Parser
200
 */
201
function range($start, $stop, $message = null)
202
{
203 1
    return new CharacterParser(
204 1
        new RangeCharacterPredicate($start, $stop),
205 1
        $message ?: "{$start}..{$stop} expected");
206
}
207
208
/**
209
 * Returns a parser that accepts any uppercase character.
210
 *
211
 * @param string $message
212
 *
213
 * @return Parser
214
 */
215
function uppercase($message = null)
216
{
217 1
    static $uppercaseCharMatcher = null;
218
219 1
    if ($uppercaseCharMatcher === null) {
220 1
        $uppercaseCharMatcher = new UppercaseCharacterPredicate();
221 1
    }
222
223 1
    return new CharacterParser(
224 1
        $uppercaseCharMatcher,
225 1
        $message ?: 'uppercase letter expected');
226
}
227
228
/**
229
 * Returns a parser that accepts any whitespace character.
230
 *
231
 * @param string $message
232
 *
233
 * @return Parser
234
 */
235
function whitespace($message = null)
236
{
237 1
    static $whitespaceCharMatcher = null;
238
239 1
    if ($whitespaceCharMatcher === null) {
240 1
        $whitespaceCharMatcher = new WhitespaceCharacterPredicate();
241 1
    }
242
243 1
    return new CharacterParser(
244 1
        $whitespaceCharMatcher,
245 1
        $message ?: 'whitespace expected');
246
}
247
248
/**
249
 * Returns a parser that accepts any word character.
250
 *
251
 * @param string $message
252
 *
253
 * @return Parser
254
 */
255
function word($message = null)
256
{
257 1
    static $wordCharMatcher = null;
258
259 1
    if ($wordCharMatcher === null) {
260 1
        $wordCharMatcher = new WordCharacterPredicate();
261 1
    }
262
263 1
    return new CharacterParser(
264 1
        $wordCharMatcher,
265 1
        $message ?: 'letter or digit expected');
266
}
267
268
/**
269
 * Returns a parser that consumes nothing and succeeds.
270
 *
271
 * For example, [:char('a').or(epsilon()):] is equivalent to
272
 * [:char('a').optional():].
273
 */
274
function epsilon($result = null)
275
{
276 1
    return new EpsilonParser($result);
277
}
278
279
/**
280
 * Returns a parser that consumes nothing and fails.
281
 *
282
 * For example, [:failure():] always fails, no matter what input it is given.
283
 *
284
 * @param string $message
285
 *
286
 * @return Parser
287
 */
288
function failure($message = 'unable to parse')
289
{
290 1
    return new FailureParser($message);
291
}
292
293
/**
294
 * Returns a parser that is not defined, but that can be set at a later
295
 * point in time.
296
 *
297
 * For example, the following code sets up a parser that points to itself
298
 * and that accepts a sequence of a's ended with the letter b.
299
 *
300
 *     var p = undefined();
301
 *     p.set(char('a').seq(p).or(char('b')));
302
 *
303
 * @param string $message
304
 *
305
 * @return SettableParser
306
 */
307
function undefined($message = 'undefined parser')
308
{
309 1
    return failure($message)->settable();
310
}
311
312
/**
313
 * Returns a parser that accepts any input element.
314
 *
315
 * For example, [:any():] succeeds and consumes any given letter. It only
316
 * fails for an empty input.
317
 *
318
 * @param string $message
319
 *
320
 * @return Parser
321
 */
322
function any($message = null)
323
{
324 1
    return new AnyParser($message ?: 'input expected');
325
}
326
327
/**
328
 * Returns a parser that accepts any of the [elements].
329
 *
330
 * For example, [:anyIn('ab'):] succeeds and consumes either the letter
331
 * [:'a':] or the letter [:'b':]. For any other input the parser fails.
332
 *
333
 * @param array|string $elements
334
 * @param string $message
335
 *
336
 * @return Parser
337
 */
338
function anyIn($elements, $message = null)
339
{
340 1
    if (is_string($elements)) {
341 1
        $elements = Buffer::create($elements)->toArray();
342 1
    }
343
344 1
    return predicate(
345 1
        1,
346
        function ($each) use ($elements) {
347 1
            return array_search($each, $elements) !== false;
348 1
        },
349 1
        $message ? : 'any of ' . implode(', ', $elements) . ' expected'
350 1
    );
351
}
352
353
/**
354
 * Returns a parser that accepts any of the specified characters.
355
 *
356
 * @param string $string
357
 * @param string $message
358
 *
359
 * @return Parser
360
 */
361
function anyOf($string, $message = null)
362
{
363 1
    return new CharacterParser(_optimizedString($string), $message ?: "any of \"{$string}\" expected");
364
}
365
366
/**
367
 * Returns a parser that accepts none of the specified characters.
368
 *
369
 * @param string $string
370
 * @param string $message
371
 *
372
 * @return Parser
373
 */
374
function noneOf($string, $message = null)
375
{
376 1
    return new CharacterParser(
377 1
        new NotCharacterPredicate(_optimizedString($string)),
378 1
        $message ?: "none of \"{$string}\" expected");
379
}
380
381
/**
382
 * @param string $string
383
 *
384
 * @return CharacterPredicate
385
 */
386
function _optimizedString($string)
387
{
388 1
    $ranges = array();
389
390 1
    $buffer = Buffer::create($string);
391
392 1
    for ($offset=0; $offset<$buffer->getLength(); $offset++) {
0 ignored issues
show
Performance Best Practice introduced by
Consider avoiding function calls on each iteration of the for loop.

If you have a function call in the test part of a for loop, this function is executed on each iteration. Often such a function, can be moved to the initialization part and be cached.

// count() is called on each iteration
for ($i=0; $i < count($collection); $i++) { }

// count() is only called once
for ($i=0, $c=count($collection); $i<$c; $i++) { }
Loading history...
393 1
        $value = $buffer->charCodeAt($offset);
394
395 1
        $ranges[] = new RangeCharacterPredicate($value, $value);
396 1
    }
397
398 1
    return _optimizedRanges($ranges);
399
}
400
401
/**
402
 * @param RangeCharacterPredicate[] $ranges
403
 *
404
 * @return CharacterPredicate
405
 */
406
function _optimizedRanges($ranges)
407
{
408
    // 1. sort the ranges:
409
410 1
    $sortedRanges = $ranges;
411
412
    usort($sortedRanges, function ($first, $second) {
413 1
        return $first->start != $second->start
414 1
            ? $first->start - $second->start
415 1
            : $first->stop - $second->stop;
416 1
    });
417
418
    // 2. merge adjacent or overlapping ranges:
419
420 1
    $mergedRanges = array();
421
422 1
    foreach ($sortedRanges as $thisRange) {
423 1
        if (count($mergedRanges) === 0) {
424 1
            $mergedRanges[] = $thisRange;
425 1
        } else {
426 1
            $lastRange = $mergedRanges[count($mergedRanges) - 1];
427
428 1
            if ($lastRange->stop + 1 >= $thisRange->start) {
429 1
                $characterRange = new RangeCharacterPredicate($lastRange->start, $thisRange->stop);
430 1
                $mergedRanges[count($mergedRanges) - 1] = $characterRange;
431 1
            } else {
432 1
                $mergedRanges[] = $thisRange;
433
            }
434
        }
435 1
    }
436
437
    // 3. build the best resulting predicates:
438
439 1
    if (count($mergedRanges) === 1) {
440 1
        return $mergedRanges[0]->start === $mergedRanges[0]->stop
441 1
            ? new SingleCharacterPredicate($mergedRanges[0]->start)
442 1
            : $mergedRanges[0];
443
    } else {
444 1
        return new RangesCharacterPredicate(
445 1
            count($mergedRanges),
446
            array_map(function (RangeCharacterPredicate $range) {
447 1
                return $range->start;
448 1
            }, $mergedRanges),
449
            array_map(function (RangeCharacterPredicate $range) {
450 1
                return $range->stop;
451 1
            }, $mergedRanges)
452 1
        );
453
    }
454
}
455
456
/**
457
 * Returns a parser that accepts the string [element].
458
 *
459
 * For example, [:string('foo'):] succeeds and consumes the input string
460
 * [:'foo':]. Fails for any other input.
461
 *
462
 * @param string $element
463
 * @param string $message
464
 *
465
 * @return Parser
466
 */
467
function string($element, $message = null)
468
{
469 1
    return predicate(
470 1
        mb_strlen($element), // TODO QA
471
        function ($each) use ($element) {
472 1
            return $element === $each;
473 1
        },
474 1
        $message ? : "{$element} expected"
475 1
    );
476
}
477
478
/**
479
 * Returns a parser that accepts the string [element] ignoring the case.
480
 *
481
 * For example, [:stringIgnoreCase('foo'):] succeeds and consumes the input
482
 * string [:'Foo':] or [:'FOO':]. Fails for any other input.
483
 *
484
 * @param string $element
485
 * @param string $message
486
 *
487
 * @return Parser
488
 */
489
function stringIgnoreCase($element, $message = null)
490
{
491 1
    $lowerElement = mb_convert_case($element, MB_CASE_LOWER);
492
493 1
    return predicate(
494 1
        mb_strlen($element),
495
        function ($each) use ($lowerElement) {
496 1
            return $lowerElement === mb_convert_case($each, MB_CASE_LOWER);
497 1
        },
498 1
        $message ? : "{$element} expected"
499 1
    );
500
}
501
502
/**
503
 * A generic predicate function returning [true] or [false] for a given
504
 * [input] argument.
505
 *
506
 * TODO add typedef when supported by php-doc
507
 */
508
//typedef bool Predicate(input);
509
510
/**
511
 * Returns a parser that reads input of the specified [length], accepts
512
 * it if the [predicate] matches, or fails with the given [message].
513
 *
514
 * @param int       $length
515
 * @param callable $predicate function($value) : bool
516
 * @param string    $message
517
 *
518
 * @return Parser
519
 */
520
function predicate($length, $predicate, $message)
521
{
522 1
    return new PredicateParser($length, $predicate, $message);
523
}
524
525
/**
526
 * Returns a lazy iterable over all parsers reachable from a [root]. Do
527
 * not modify the grammar while iterating over it, otherwise you might
528
 * get unexpected results.
529
 *
530
 * @param Parser $root
531
 *
532
 * @return ParserIterable|Parser[]
533
 */
534
function allParser(Parser $root)
535
{
536 1
    return new ParserIterable($root);
537
}
538
539
/**
540
 * Transforms all parsers reachable from [parser] with the given [handler].
541
 * The identity function returns a copy of the the incoming parser.
542
 *
543
 * The implementation first creates a copy of each parser reachable in the
544
 * input grammar; then the resulting grammar is traversed until all references
545
 * to old parsers are replaced with the transformed ones.
546
 *
547
 * @param Parser $parser
548
 * @param Closure $handler function (Parser $parser): Parser
549
 *
550
 * @return Parser
551
 */
552
function transformParser(Parser $parser, Closure $handler)
553
{
554 1
    $mapping = array();
555
556 1
    foreach (allParser($parser) as $each) {
557 1
        $mapping[spl_object_hash($each)] = $handler($each->copy());
558 1
    }
559
560 1
    $seen = array_values($mapping);
561 1
    $todo = array_values($mapping);
562
563 1
    while (count($todo)) {
564
        /** @var Parser $parent */
565 1
        $parent = array_pop($todo);
566
567 1
        foreach ($parent->getChildren() as $child) {
568 1
            if (isset($mapping[spl_object_hash($child)])) {
569 1
                $parent->replace($child, $mapping[spl_object_hash($child)]);
570 1
            } else if (! in_array($child, $seen, true)) {
571 1
                $seen[] = $child;
572 1
                $todo[] = $child;
573 1
            }
574 1
        }
575 1
    }
576
577 1
    return $mapping[spl_object_hash($parser)];
578
}
579
580
/**
581
 * Returns a copy of the given Parser with all settable parsers removed.
582
 *
583
 * @param Parser $parser
584
 *
585
 * @return Parser
586
 */
587
function removeSettables(Parser $parser)
588
{
589 1
    return transformParser(
590 1
        $parser,
591
        function (Parser $each) {
592 1
            while ($each instanceof SettableParser) {
593 1
                $each = $each->getChild(0);
594 1
            }
595 1
            return $each;
596
        }
597 1
    );
598
}
599
600
/**
601
 * @param Parser $parser
602
 *
603
 * @return Parser
604
 */
605
function removeDuplicates(Parser $parser)
606
{
607 1
    $uniques = array();
608
609 1
    return transformParser(
610 1
        $parser,
611
        function (Parser $source) use (&$uniques) {
612 1
            foreach ($uniques as $each) {
613 1
                if ($source !== $each && $source->isEqualTo($each)) {
614 1
                    $target = $each;
615 1
                    break;
616
                }
617 1
            }
618
619 1
            if (! isset($target)) {
620 1
                if (! in_array($source, $uniques, true)) {
621 1
                    $uniques[] = $source;
622 1
                }
623 1
                return $source;
624
            } else {
625 1
                return $target;
626
            }
627
        }
628 1
    );
629
}
630
631
// TODO implement these functions
632
633
///**
634
// * Adds debug handlers to each parser reachable from [root].
635
// */
636
//Parser debug(Parser root) {
637
//  var level = 0;
638
//  return transformParser(root, (parser) {
639
//    return new _ContinuationParser(parser, (context, continuation) {
640
//      print('${_repeat(level, '  ')}${parser}');
641
//      level++;
642
//      var result = continuation(context);
643
//      level--;
644
//      print('${_repeat(level, '  ')}${result}');
645
//      return result;
646
//     });
647
//  });
648
//}
649
//
650
//String _repeat(int count, String value) {
651
//  var result = new StringBuffer();
652
//  for (var i = 0; i < count; i++) {
653
//    result.write(value);
654
//  }
655
//  return result.toString();
656
//}
657
//
658
///**
659
// * Adds progress handlers to each parser reachable from [root].
660
// */
661
//Parser progress(Parser root) {
662
//  return transformParser(root, (parser) {
663
//    return new _ContinuationParser(parser, (context, continuation) {
664
//      print('${_repeat(context.position, '*')} $parser');
665
//      return continuation(context);
666
//    });
667
//  });
668
//}
669
//
670
///**
671
// * Adds profiling handlers to each parser reachable from [root].
672
// */
673
//Parser profile(Parser root) {
674
//  var count = new Map();
675
//  var watch = new Map();
676
//  var parsers = new List();
677
//  return new _ContinuationParser(transformParser(root, (parser) {
678
//    parsers.add(parser);
679
//    return new _ContinuationParser(parser, (context, continuation) {
680
//      count[parser]++;
681
//      watch[parser].start();
682
//      var result = continuation(context);
683
//      watch[parser].stop();
684
//      return result;
685
//     });
686
//  }), (context, continuation) {
687
//    parsers.forEach((parser) {
688
//      count[parser] = 0;
689
//      watch[parser] = new Stopwatch();
690
//    });
691
//    var result = continuation(context);
692
//    parsers.forEach((parser) {
693
//      print('${count[parser]}\t'
694
//        '${watch[parser].elapsedMicroseconds}\t'
695
//        '${parser}');
696
//    });
697
//    return result;
698
//  });
699
//}
700