mindplay-dk /
petitparserphp
| 1 | <?php |
||
| 2 | |||
| 3 | namespace petitparser; |
||
| 4 | use OutOfRangeException; |
||
| 5 | |||
| 6 | /** |
||
| 7 | * Abstract base class of all parsers. |
||
| 8 | */ |
||
| 9 | abstract class Parser |
||
| 10 | { |
||
| 11 | /** |
||
| 12 | * @type int unbounded upper limit |
||
| 13 | */ |
||
| 14 | const UNBOUNDED = -1; |
||
| 15 | |||
| 16 | /** |
||
| 17 | * Primitive method doing the actual parsing. |
||
| 18 | * |
||
| 19 | * The method is overridden in concrete subclasses to implement the |
||
| 20 | * parser specific logic. The methods takes a parse [context] and |
||
| 21 | * returns the resulting context, which is either a [Success] or |
||
| 22 | * [Failure] context. |
||
| 23 | * |
||
| 24 | * @param Context $context |
||
| 25 | * |
||
| 26 | * @return Result |
||
| 27 | */ |
||
| 28 | abstract public function parseOn(Context $context); |
||
| 29 | |||
| 30 | /** |
||
| 31 | * Returns the parse result of the [input] buffer. |
||
| 32 | * |
||
| 33 | * The implementation creates a default parse context on the input and calls |
||
| 34 | * the internal parsing logic of the receiving parser. |
||
| 35 | * |
||
| 36 | * For example, [:letter().plus().parse('abc'):] results in an instance of |
||
| 37 | * [Success], where [Result.position] is [:3:] and [Success.value] is |
||
| 38 | * [:[a, b, c]:]. |
||
| 39 | * |
||
| 40 | * Similarly, [:letter().plus().parse('123'):] results in an instance of |
||
| 41 | * [Failure], where [Result.position] is [:0:] and [Failure.message] is |
||
| 42 | * ['letter expected']. |
||
| 43 | * |
||
| 44 | * Note that, if a string is given, the input is assumed to be in the current PHP internal |
||
| 45 | * character encoding, e.g. as defined by {@link mb_internal_encoding()} - if this is not |
||
| 46 | * the case (such as when parsing the contents of a file with a known encoding), either |
||
| 47 | * call {@link Buffer::create()} explicitly indicating the encoding, or use one of the |
||
| 48 | * two convenience methods {@link parseUTF8()} or {@link parseISO()}. |
||
| 49 | * |
||
| 50 | * @param Buffer|string $input input Buffer or string (see notes about strings above) |
||
| 51 | * |
||
| 52 | * @return Result |
||
| 53 | * |
||
| 54 | * @see mb_internal_encoding() |
||
| 55 | * @see parseUTF8() |
||
| 56 | * @see parseISO() |
||
| 57 | */ |
||
| 58 | 1 | public function parse($input) |
|
| 59 | { |
||
| 60 | 1 | return $this->parseOn(new Context(is_string($input) ? Buffer::create($input) : $input, 0)); |
|
| 61 | } |
||
| 62 | |||
| 63 | /** |
||
| 64 | * @see parse() |
||
| 65 | * |
||
| 66 | * @param string $string UTF-8 encoded string to parse |
||
| 67 | * |
||
| 68 | * @return Result |
||
| 69 | */ |
||
| 70 | public function parseUTF8($string) |
||
| 71 | { |
||
| 72 | return $this->parse(Buffer::fromUTF8($string)); |
||
| 73 | } |
||
| 74 | |||
| 75 | /** |
||
| 76 | * @see parse() |
||
| 77 | * |
||
| 78 | * @param string $string ISO-8859-1 encoded string to parse |
||
| 79 | * |
||
| 80 | * @return Result |
||
| 81 | */ |
||
| 82 | public function parseISO($string) |
||
| 83 | { |
||
| 84 | return $this->parse(Buffer::fromISO($string)); |
||
| 85 | } |
||
| 86 | |||
| 87 | /** |
||
| 88 | * Tests if the [input] can be successfully parsed. |
||
| 89 | * |
||
| 90 | * For example, [:letter().plus().accept('abc'):] returns [:true:], and |
||
| 91 | * [:letter().plus().accept('123'):] returns [:false:]. |
||
| 92 | * |
||
| 93 | * @param Buffer $input |
||
| 94 | * |
||
| 95 | * @return bool |
||
| 96 | */ |
||
| 97 | 1 | public function accept(Buffer $input) |
|
| 98 | { |
||
| 99 | 1 | return $this->parse($input)->isSuccess(); |
|
| 100 | } |
||
| 101 | |||
| 102 | /** |
||
| 103 | * Returns a list of all successful overlapping parses of the [input]. |
||
| 104 | * |
||
| 105 | * For example, [:letter().plus().matches('abc de'):] results in the list |
||
| 106 | * [:[['a', 'b', 'c'], ['b', 'c'], ['c'], ['d', 'e'], ['e']]:]. See |
||
| 107 | * [Parser.matchesSkipping] to retrieve non-overlapping parse results. |
||
| 108 | * |
||
| 109 | * @param Buffer $input |
||
| 110 | * |
||
| 111 | * @return Result[] |
||
| 112 | */ |
||
| 113 | 1 | public function matches(Buffer $input) |
|
| 114 | { |
||
| 115 | 1 | $list = array(); |
|
| 116 | |||
| 117 | 1 | $this->and_() |
|
| 118 | 1 | ->map( |
|
| 119 | function ($each) use (&$list) { |
||
| 120 | 1 | $list[] = $each; |
|
| 121 | 1 | } |
|
| 122 | 1 | ) |
|
| 123 | 1 | ->seq(any()) |
|
| 124 | 1 | ->or_(any()) |
|
| 125 | 1 | ->star() |
|
| 126 | 1 | ->parse($input); |
|
| 127 | |||
| 128 | 1 | return $list; |
|
| 129 | } |
||
| 130 | |||
| 131 | /** |
||
| 132 | * Returns a list of all successful non-overlapping parses of the input. |
||
| 133 | * |
||
| 134 | * For example, [:letter().plus().matchesSkipping('abc de'):] results in the |
||
| 135 | * list [:[['a', 'b', 'c'], ['d', 'e']]:]. See [Parser.matches] to retrieve |
||
| 136 | * overlapping parse results. |
||
| 137 | * |
||
| 138 | * @param Buffer $input |
||
| 139 | * |
||
| 140 | * @return Result[] |
||
| 141 | */ |
||
| 142 | 1 | public function matchesSkipping(Buffer $input) |
|
| 143 | { |
||
| 144 | 1 | $list = array(); |
|
| 145 | |||
| 146 | 1 | $this |
|
| 147 | 1 | ->map( |
|
| 148 | function ($each) use (&$list) { |
||
| 149 | 1 | $list[] = $each; |
|
| 150 | 1 | } |
|
| 151 | 1 | ) |
|
| 152 | 1 | ->or_(any()) |
|
| 153 | 1 | ->star() |
|
| 154 | 1 | ->parse($input); |
|
| 155 | |||
| 156 | 1 | return $list; |
|
| 157 | } |
||
| 158 | |||
| 159 | /** |
||
| 160 | * Returns new parser that accepts the receiver, if possible. The resulting |
||
| 161 | * parser returns the result of the receiver, or [:null:] if not applicable. |
||
| 162 | * The returned value can be provided as an optional argument [otherwise]. |
||
| 163 | * |
||
| 164 | * For example, the parser [:letter().optional():] accepts a letter as input |
||
| 165 | * and returns that letter. When given something else the parser succeeds as |
||
| 166 | * well, does not consume anything and returns [:null:]. |
||
| 167 | * |
||
| 168 | * @param mixed $otherwise |
||
| 169 | * |
||
| 170 | * @return Parser |
||
| 171 | */ |
||
| 172 | 1 | public function optional($otherwise = null) |
|
| 173 | { |
||
| 174 | 1 | return new OptionalParser($this, $otherwise); |
|
| 175 | } |
||
| 176 | |||
| 177 | /** |
||
| 178 | * Returns a parser that accepts the receiver zero or more times. The |
||
| 179 | * resulting parser returns a list of the parse results of the receiver. |
||
| 180 | * |
||
| 181 | * This is a greedy and blind implementation that tries to consume as much |
||
| 182 | * input as possible and that does not consider what comes afterwards. |
||
| 183 | * |
||
| 184 | * For example, the parser [:letter().star():] accepts the empty string or |
||
| 185 | * any sequence of letters and returns a possibly empty list of the parsed |
||
| 186 | * letters. |
||
| 187 | * |
||
| 188 | * @return Parser |
||
| 189 | */ |
||
| 190 | 1 | public function star() |
|
| 191 | { |
||
| 192 | 1 | return $this->repeat(0, self::UNBOUNDED); |
|
| 193 | } |
||
| 194 | |||
| 195 | /** |
||
| 196 | * Returns a parser that parses the receiver zero or more times until it |
||
| 197 | * reaches a [limit]. This is a greedy non-blind implementation of the |
||
| 198 | * [Parser.star] operator. The [limit] is not consumed. |
||
| 199 | * |
||
| 200 | * @param Parser $limit |
||
| 201 | * |
||
| 202 | * @return Parser |
||
| 203 | */ |
||
| 204 | 1 | public function starGreedy(Parser $limit) |
|
| 205 | { |
||
| 206 | 1 | return $this->repeatGreedy($limit, 0, self::UNBOUNDED); |
|
| 207 | } |
||
| 208 | |||
| 209 | /** |
||
| 210 | * Returns a parser that parses the receiver zero or more times until it |
||
| 211 | * reaches a [limit]. This is a lazy non-blind implementation of the |
||
| 212 | * [Parser.star] operator. The [limit] is not consumed. |
||
| 213 | * |
||
| 214 | * @param Parser $limit |
||
| 215 | * |
||
| 216 | * @return Parser |
||
| 217 | */ |
||
| 218 | 1 | public function starLazy(Parser $limit) |
|
| 219 | { |
||
| 220 | 1 | return $this->repeatLazy($limit, 0, self::UNBOUNDED); |
|
| 221 | } |
||
| 222 | |||
| 223 | /** |
||
| 224 | * Returns a parser that accepts the receiver one or more times. The |
||
| 225 | * resulting parser returns a list of the parse results of the receiver. |
||
| 226 | * |
||
| 227 | * This is a greedy and blind implementation that tries to consume as much |
||
| 228 | * input as possible and that does not consider what comes afterwards. |
||
| 229 | * |
||
| 230 | * For example, the parser [:letter().plus():] accepts any sequence of |
||
| 231 | * letters and returns a list of the parsed letters. |
||
| 232 | * |
||
| 233 | * @return Parser |
||
| 234 | */ |
||
| 235 | 1 | public function plus() |
|
| 236 | { |
||
| 237 | 1 | return $this->repeat(1, self::UNBOUNDED); |
|
| 238 | } |
||
| 239 | |||
| 240 | /** |
||
| 241 | * Returns a parser that parses the receiver one or more times until it |
||
| 242 | * reaches [limit]. This is a greedy non-blind implementation of the |
||
| 243 | * [Parser.plus] operator. The [limit] is not consumed. |
||
| 244 | * |
||
| 245 | * @param Parser $limit |
||
| 246 | * |
||
| 247 | * @return Parser |
||
| 248 | */ |
||
| 249 | 1 | public function plusGreedy(Parser $limit) |
|
| 250 | { |
||
| 251 | 1 | return $this->repeatGreedy($limit, 1, self::UNBOUNDED); |
|
| 252 | } |
||
| 253 | |||
| 254 | /** |
||
| 255 | * Returns a parser that parses the receiver zero or more times until it |
||
| 256 | * reaches a [limit]. This is a lazy non-blind implementation of the |
||
| 257 | * [Parser.plus] operator. The [limit] is not consumed. |
||
| 258 | * |
||
| 259 | * @param Parser $limit |
||
| 260 | * |
||
| 261 | * @return Parser |
||
| 262 | */ |
||
| 263 | 1 | public function plusLazy(Parser $limit) |
|
| 264 | { |
||
| 265 | 1 | return $this->repeatLazy($limit, 1, self::UNBOUNDED); |
|
| 266 | } |
||
| 267 | |||
| 268 | /** |
||
| 269 | * Returns a parser that accepts the receiver between [min] and [max] times. |
||
| 270 | * The resulting parser returns a list of the parse results of the receiver. |
||
| 271 | * |
||
| 272 | * This is a greedy and blind implementation that tries to consume as much |
||
| 273 | * input as possible and that does not consider what comes afterwards. |
||
| 274 | * |
||
| 275 | * For example, the parser [:letter().repeat(2, 4):] accepts a sequence of |
||
| 276 | * two, three, or four letters and returns the accepted letters as a list. |
||
| 277 | * |
||
| 278 | * @param int $min |
||
| 279 | * @param int $max |
||
| 280 | * |
||
| 281 | * @return Parser |
||
| 282 | */ |
||
| 283 | 1 | public function repeat($min, $max) |
|
| 284 | { |
||
| 285 | 1 | return new PossessiveRepeatingParser($this, $min, $max); |
|
| 286 | } |
||
| 287 | |||
| 288 | /** |
||
| 289 | * Returns a parser that parses the receiver at least [min] and at most [max] |
||
| 290 | * times until it reaches a [limit]. This is a greedy non-blind implementation of |
||
| 291 | * the [Parser.repeat] operator. The [limit] is not consumed. |
||
| 292 | * |
||
| 293 | * @param Parser $limit |
||
| 294 | * @param int $min |
||
| 295 | * @param int $max |
||
| 296 | * |
||
| 297 | * @return Parser |
||
| 298 | */ |
||
| 299 | 1 | public function repeatGreedy(Parser $limit, $min, $max) |
|
| 300 | { |
||
| 301 | 1 | return new GreedyRepeatingParser($this, $limit, $min, $max); |
|
| 302 | } |
||
| 303 | |||
| 304 | /** |
||
| 305 | * Returns a parser that parses the receiver at least [min] and at most [max] |
||
| 306 | * times until it reaches a [limit]. This is a lazy non-blind implementation of |
||
| 307 | * the [Parser.repeat] operator. The [limit] is not consumed. |
||
| 308 | * |
||
| 309 | * @param Parser $limit |
||
| 310 | * @param int $min |
||
| 311 | * @param int $max |
||
| 312 | * |
||
| 313 | * @return Parser |
||
| 314 | */ |
||
| 315 | 1 | public function repeatLazy(Parser $limit, $min, $max) |
|
| 316 | { |
||
| 317 | 1 | return new LazyRepeatingParser($this, $limit, $min, $max); |
|
| 318 | } |
||
| 319 | |||
| 320 | /** |
||
| 321 | * Returns a parser that accepts the receiver exactly [count] times. The |
||
| 322 | * resulting parser returns a list of the parse results of the receiver. |
||
| 323 | * |
||
| 324 | * For example, the parser [:letter().times(2):] accepts two letters and |
||
| 325 | * returns a list of the two parsed letters. |
||
| 326 | * |
||
| 327 | * @param int $count |
||
| 328 | * |
||
| 329 | * @return Parser |
||
| 330 | */ |
||
| 331 | 1 | public function times($count) |
|
| 332 | { |
||
| 333 | 1 | return $this->repeat($count, $count); |
|
| 334 | } |
||
| 335 | |||
| 336 | /** |
||
| 337 | * Returns a parser that accepts the receiver followed by [other]. The |
||
| 338 | * resulting parser returns a list of the parse result of the receiver |
||
| 339 | * followed by the parse result of [other]. Calling this method on an |
||
| 340 | * existing sequence code not nest this sequence into a new one, but |
||
| 341 | * instead augments the existing sequence with [other]. |
||
| 342 | * |
||
| 343 | * For example, the parser [:letter().seq(digit()).seq(letter()):] accepts a |
||
| 344 | * letter followed by a digit and another letter. The parse result of the |
||
| 345 | * input string [:'a1b':] is the list [:['a', '1', 'b']:]. |
||
| 346 | * |
||
| 347 | * @param Parser $other |
||
| 348 | * |
||
| 349 | * @return Parser |
||
| 350 | */ |
||
| 351 | 1 | public function seq(Parser $other) |
|
| 352 | { |
||
| 353 | 1 | return new SequenceParser(array($this, $other)); |
|
| 354 | } |
||
| 355 | |||
| 356 | /** |
||
| 357 | * Returns a parser that accepts the receiver or [other]. The resulting |
||
| 358 | * parser returns the parse result of the receiver, if the receiver fails |
||
| 359 | * it returns the parse result of [other] (exclusive ordered choice). |
||
| 360 | * |
||
| 361 | * For example, the parser [:letter().or(digit()):] accepts a letter or a |
||
| 362 | * digit. An example where the order matters is the following choice between |
||
| 363 | * overlapping parsers: [:letter().or(char('a')):]. In the example the parser |
||
| 364 | * [:char('a'):] will never be activated, because the input is always consumed |
||
| 365 | * [:letter():]. This can be problematic if the author intended to attach a |
||
| 366 | * production action to [:char('a'):]. |
||
| 367 | * |
||
| 368 | * TODO find a better name for this method |
||
| 369 | * |
||
| 370 | * @param Parser $other |
||
| 371 | * |
||
| 372 | * @return Parser |
||
| 373 | */ |
||
| 374 | 1 | public function or_(Parser $other) |
|
| 375 | { |
||
| 376 | 1 | return new ChoiceParser(array($this, $other)); |
|
| 377 | } |
||
| 378 | |||
| 379 | /** |
||
| 380 | * Returns a parser (logical and-predicate) that succeeds whenever the |
||
| 381 | * receiver does, but never consumes input. |
||
| 382 | * |
||
| 383 | * For example, the parser [:char('_').and().seq(identifier):] accepts |
||
| 384 | * identifiers that start with an underscore character. Since the predicate |
||
| 385 | * does not consume accepted input, the parser [:identifier:] is given the |
||
| 386 | * ability to process the complete identifier. |
||
| 387 | * |
||
| 388 | * @return AndParser |
||
| 389 | */ |
||
| 390 | 1 | public function and_() |
|
| 391 | { |
||
| 392 | 1 | return new AndParser($this); |
|
| 393 | } |
||
| 394 | |||
| 395 | /** |
||
| 396 | * Returns a parser (logical not-predicate) that succeeds whenever the |
||
| 397 | * receiver fails, but never consumes input. |
||
| 398 | * |
||
| 399 | * For example, the parser [:char('_').not().seq(identifier):] accepts |
||
| 400 | * identifiers that do not start with an underscore character. If the parser |
||
| 401 | * [:char('_'):] accepts the input, the negation and subsequently the |
||
| 402 | * complete parser fails. Otherwise the parser [:identifier:] is given the |
||
| 403 | * ability to process the complete identifier. |
||
| 404 | * |
||
| 405 | * @param string $message |
||
| 406 | * |
||
| 407 | * @return Parser |
||
| 408 | */ |
||
| 409 | 1 | public function not_($message = null) |
|
| 410 | { |
||
| 411 | 1 | return new NotParser($this, $message); |
|
| 412 | } |
||
| 413 | |||
| 414 | /** |
||
| 415 | * Returns a parser that consumes any input token (character), but the |
||
| 416 | * receiver. |
||
| 417 | * |
||
| 418 | * For example, the parser [:letter().neg():] accepts any input but a letter. |
||
| 419 | * The parser fails for inputs like [:'a':] or [:'Z':], but succeeds for |
||
| 420 | * input like [:'1':], [:'_':] or [:'$':]. |
||
| 421 | * |
||
| 422 | * @param string $message |
||
| 423 | * |
||
| 424 | * @return Parser |
||
| 425 | */ |
||
| 426 | 1 | public function neg($message = null) |
|
| 427 | { |
||
| 428 | 1 | return $this->not_($message)->seq(any())->pick(1); |
|
| 429 | } |
||
| 430 | |||
| 431 | /** |
||
| 432 | * Returns a parser that discards the result of the receiver, and returns |
||
| 433 | * a sub-string of the consumed range in the string/list being parsed. |
||
| 434 | * |
||
| 435 | * For example, the parser [:letter().plus().flatten():] returns [:'abc':] |
||
| 436 | * for the input [:'abc':]. In contrast, the parser [:letter().plus():] would |
||
| 437 | * return [:['a', 'b', 'c']:] for the same input instead. |
||
| 438 | * |
||
| 439 | * @return Parser |
||
| 440 | */ |
||
| 441 | 1 | public function flatten() |
|
| 442 | { |
||
| 443 | 1 | return new FlattenParser($this); |
|
| 444 | } |
||
| 445 | |||
| 446 | /** |
||
| 447 | * Returns a parser that returns a [Token]. The token carries the parsed |
||
| 448 | * values of the receiver [Token.value], as well as the consumed range from |
||
| 449 | * [Token.start] to [Token.stop] of the string/list being parsed. |
||
| 450 | * |
||
| 451 | * For example, the parser [:letter().plus().token():] returns the token |
||
| 452 | * [:Token[start: 0, stop: 3, value: abc]:] for the input [:'abc':]. |
||
| 453 | * |
||
| 454 | * @return Parser |
||
| 455 | */ |
||
| 456 | 1 | public function token() |
|
| 457 | { |
||
| 458 | 1 | return new TokenParser($this); |
|
| 459 | } |
||
| 460 | |||
| 461 | /** |
||
| 462 | * Returns a parser that consumes input before and after the receiver. The |
||
| 463 | * optional argument is a parser that consumes the excess input. By default |
||
| 464 | * `whitespace()` is used. To arguments can be provided to have different |
||
| 465 | * parsers on the [left] and [right] side. |
||
| 466 | * |
||
| 467 | * For example, the parser `letter().plus().trim()` returns `['a', 'b']` |
||
| 468 | * for the input `' ab\n'` and consumes the complete input string. |
||
| 469 | * |
||
| 470 | * @param Parser $left |
||
| 471 | * @param Parser $right |
||
| 472 | * |
||
| 473 | * @return Parser |
||
| 474 | */ |
||
| 475 | 1 | public function trim(Parser $left = null, Parser $right = null) |
|
| 476 | { |
||
| 477 | 1 | $left = $left ?: whitespace(); |
|
| 478 | 1 | $right = $right ?: $left; |
|
| 479 | |||
| 480 | 1 | return new TrimmingParser($this, $left, $right); |
|
| 481 | } |
||
| 482 | |||
| 483 | /** |
||
| 484 | * Returns a parser that succeeds only if the receiver consumes the complete |
||
| 485 | * input, otherwise return a failure with the optional [message]. |
||
| 486 | * |
||
| 487 | * For example, the parser [:letter().end():] succeeds on the input [:'a':] |
||
| 488 | * and fails on [:'ab':]. In contrast the parser [:letter():] alone would |
||
| 489 | * succeed on both inputs, but not consume everything for the second input. |
||
| 490 | * |
||
| 491 | * @param string $message |
||
| 492 | * |
||
| 493 | * @return Parser |
||
| 494 | */ |
||
| 495 | 1 | public function end_($message = 'end of input expected') |
|
| 496 | { |
||
| 497 | 1 | return new EndOfInputParser($this, $message); |
|
| 498 | } |
||
| 499 | |||
| 500 | /** |
||
| 501 | * Returns a parser that points to the receiver, but can be changed to point |
||
| 502 | * to something else at a later point in time. |
||
| 503 | * |
||
| 504 | * For example, the parser [:letter().settable():] behaves exactly the same |
||
| 505 | * as [:letter():], but it can be replaced with another parser using |
||
| 506 | * [SettableParser.set]. |
||
| 507 | * |
||
| 508 | * @return SettableParser |
||
| 509 | */ |
||
| 510 | 1 | public function settable() |
|
| 511 | { |
||
| 512 | 1 | return new SettableParser($this); |
|
| 513 | } |
||
| 514 | |||
| 515 | /** |
||
| 516 | * Returns a parser that evaluates [function] as action handler on success |
||
| 517 | * of the receiver. |
||
| 518 | * |
||
| 519 | * For example, the parser [:digit().map((char) => int.parse(char)):] returns |
||
| 520 | * the number [:1:] for the input string [:'1':]. |
||
| 521 | * |
||
| 522 | * @param callable $function |
||
| 523 | * |
||
| 524 | * @return Parser |
||
| 525 | */ |
||
| 526 | 1 | public function map($function) |
|
| 527 | { |
||
| 528 | 1 | return new ActionParser($this, $function); |
|
| 529 | } |
||
| 530 | |||
| 531 | /** |
||
| 532 | * Returns a parser that transform a successful parse result by returning |
||
| 533 | * the element at [index] of a list. A negative index can be used to access |
||
| 534 | * the elements from the back of the list. |
||
| 535 | * |
||
| 536 | * For example, the parser [:letter().star().pick(-1):] returns the last |
||
| 537 | * letter parsed. For the input [:'abc':] it returns [:'c':]. |
||
| 538 | * |
||
| 539 | * @param int $index |
||
| 540 | * |
||
| 541 | * @return Parser |
||
| 542 | */ |
||
| 543 | 1 | public function pick($index) |
|
| 544 | { |
||
| 545 | 1 | return $this |
|
| 546 | 1 | ->map( |
|
| 547 | function ($list) use ($index) { |
||
| 548 | 1 | return $list[$index < 0 ? count($list) + $index : $index]; |
|
| 549 | } |
||
| 550 | 1 | ); |
|
| 551 | } |
||
| 552 | |||
| 553 | /** |
||
| 554 | * Returns a parser that transforms a successful parse result by returning |
||
| 555 | * the permuted elements at [indexes] of a list. Negative indexes can be |
||
| 556 | * used to access the elements from the back of the list. |
||
| 557 | * |
||
| 558 | * For example, the parser [:letter().star().permute([0, -1]):] returns the |
||
| 559 | * first and last letter parsed. For the input [:'abc':] it returns |
||
| 560 | * [:['a', 'c']:]. |
||
| 561 | * |
||
| 562 | * @param int[] $indexes |
||
| 563 | * |
||
| 564 | * @return Parser |
||
| 565 | */ |
||
| 566 | 1 | public function permute($indexes) |
|
| 567 | { |
||
| 568 | 1 | return $this |
|
| 569 | 1 | ->map( |
|
| 570 | function ($list) use ($indexes) { |
||
| 571 | 1 | return array_map( |
|
| 572 | function ($index) use ($list) { |
||
| 573 | 1 | return $list[$index < 0 ? count($list) + $index : $index]; |
|
| 574 | 1 | }, |
|
| 575 | $indexes |
||
| 576 | 1 | ); |
|
| 577 | } |
||
| 578 | 1 | ); |
|
| 579 | } |
||
| 580 | |||
| 581 | /** |
||
| 582 | * Returns a parser that consumes the receiver one or more times separated |
||
| 583 | * by the [separator] parser. The resulting parser returns a flat list of |
||
| 584 | * the parse results of the receiver interleaved with the parse result of the |
||
| 585 | * separator parser. |
||
| 586 | * |
||
| 587 | * If the optional argument [includeSeparators] is set to [:false:], then the |
||
| 588 | * separators are not included in the parse result. If the optional argument |
||
| 589 | * [optionalSeparatorAtEnd] is set to [:true:] the parser also accepts an |
||
| 590 | * optional separator at the end. |
||
| 591 | * |
||
| 592 | * For example, the parser [:digit().separatedBy(char('-')):] returns a parser |
||
| 593 | * that consumes input like [:'1-2-3':] and returns a list of the elements and |
||
| 594 | * separators: [:['1', '-', '2', '-', '3']:]. |
||
| 595 | * |
||
| 596 | * @param Parser $separator |
||
| 597 | * @param bool $includeSeparators |
||
| 598 | * @param bool $optionalSeparatorAtEnd |
||
| 599 | * |
||
| 600 | * @return Parser |
||
| 601 | */ |
||
| 602 | 1 | public function separatedBy(Parser $separator, $includeSeparators = true, $optionalSeparatorAtEnd = false) |
|
| 603 | { |
||
| 604 | 1 | $repeater = new SequenceParser(array($separator, $this)); |
|
| 605 | 1 | $repeater = $repeater->star(); |
|
| 606 | |||
| 607 | 1 | $parser = new SequenceParser($optionalSeparatorAtEnd |
|
| 608 | 1 | ? array($this, $repeater, $separator->optional($separator)) |
|
| 609 | 1 | : array($this, $repeater)); |
|
| 610 | |||
| 611 | 1 | return $parser->map( |
|
| 612 | 1 | function ($list) use ($includeSeparators, $optionalSeparatorAtEnd, $separator) { |
|
| 613 | 1 | $result = array(); |
|
| 614 | 1 | $result[] = $list[0]; |
|
| 615 | |||
| 616 | 1 | foreach ($list[1] as $tuple) { |
|
| 617 | 1 | if ($includeSeparators) { |
|
| 618 | 1 | $result[] = $tuple[0]; |
|
| 619 | 1 | } |
|
| 620 | 1 | $result[] = $tuple[1]; |
|
| 621 | 1 | } |
|
| 622 | 1 | if ($includeSeparators && $optionalSeparatorAtEnd && $list[2] !== $separator) { |
|
| 623 | 1 | $result[] = $list[2]; |
|
| 624 | 1 | } |
|
| 625 | 1 | return $result; |
|
| 626 | } |
||
| 627 | 1 | ); |
|
| 628 | } |
||
| 629 | |||
| 630 | /** |
||
| 631 | * Returns a shallow copy of the receiver. |
||
| 632 | * |
||
| 633 | * @return Parser |
||
| 634 | */ |
||
| 635 | abstract public function copy(); |
||
| 636 | |||
| 637 | /** |
||
| 638 | * Recusively tests for the equality of two parsers. |
||
| 639 | * |
||
| 640 | * The code can automatically deals with recursive parsers and parsers that |
||
| 641 | * refer to other parsers. This code is supposed to be overridden by parsers |
||
| 642 | * that add other state. |
||
| 643 | * |
||
| 644 | * @param Parser $other |
||
| 645 | * @param Parser[] $seen |
||
| 646 | * |
||
| 647 | * @return bool |
||
| 648 | */ |
||
| 649 | 1 | public function isEqualTo(Parser $other, $seen = array()) |
|
| 650 | { |
||
| 651 | 1 | if ($this === $other || in_array($this, $seen, true)) { |
|
| 652 | 1 | return true; |
|
| 653 | } |
||
| 654 | |||
| 655 | 1 | $seen[] = $this; |
|
| 656 | |||
| 657 | 1 | return get_class($this) === get_class($other) |
|
| 658 | 1 | && $this->hasEqualProperties($other) |
|
| 659 | 1 | && $this->hasEqualChildren($other, $seen); |
|
| 660 | } |
||
| 661 | |||
| 662 | /** |
||
| 663 | * Compare the properties of two parsers. Normally this method should not be |
||
| 664 | * called directly, instead use [Parser#equals]. |
||
| 665 | * |
||
| 666 | * Override this method in all subclasses that add new state. |
||
| 667 | * |
||
| 668 | * @param Parser $other |
||
| 669 | * |
||
| 670 | * @return bool |
||
| 671 | */ |
||
| 672 | 1 | public function hasEqualProperties( |
|
| 673 | /** @noinspection PhpUnusedParameterInspection */ |
||
| 674 | Parser $other) |
||
| 675 | { |
||
| 676 | 1 | return true; |
|
| 677 | } |
||
| 678 | |||
| 679 | /** |
||
| 680 | * @param Parser $other |
||
| 681 | * @param Parser[] $seen |
||
| 682 | * |
||
| 683 | * @return bool |
||
| 684 | */ |
||
| 685 | 1 | public function hasEqualChildren(Parser $other, $seen = array()) |
|
| 686 | { |
||
| 687 | 1 | $thisChildren = $this->getChildren(); |
|
| 688 | 1 | $otherChildren = $other->getChildren(); |
|
| 689 | |||
| 690 | 1 | if (length($thisChildren) !== length($otherChildren)) { |
|
| 691 | return false; |
||
| 692 | } |
||
| 693 | |||
| 694 | 1 | for ($i = 0; $i < length($thisChildren); $i++) { |
|
|
0 ignored issues
–
show
|
|||
| 695 | 1 | if (! $thisChildren[$i]->isEqualTo($otherChildren[$i], $seen)) { |
|
| 696 | 1 | return false; |
|
| 697 | } |
||
| 698 | 1 | } |
|
| 699 | |||
| 700 | 1 | return true; |
|
| 701 | } |
||
| 702 | |||
| 703 | /** |
||
| 704 | * Returns a list of directly referenced parsers. |
||
| 705 | * |
||
| 706 | * For example, [:letter().children:] returns the empty collection [:[]:], |
||
| 707 | * because the letter parser is a primitive or leaf parser that does not |
||
| 708 | * depend or call any other parser. |
||
| 709 | * |
||
| 710 | * In contrast, [:letter().or(digit()).children:] returns a collection |
||
| 711 | * containing both the [:letter():] and [:digit():] parser. |
||
| 712 | * |
||
| 713 | * @return Parser[] a list of directly referenced parsers. |
||
| 714 | */ |
||
| 715 | 1 | public function getChildren() |
|
| 716 | { |
||
| 717 | 1 | return array(); |
|
| 718 | } |
||
| 719 | |||
| 720 | /** |
||
| 721 | * @param int $index |
||
| 722 | * |
||
| 723 | * @return Parser the nth directly referenced parser. |
||
| 724 | */ |
||
| 725 | 1 | public function getChild($index) |
|
| 726 | { |
||
| 727 | 1 | $children = $this->getChildren(); |
|
| 728 | |||
| 729 | 1 | if (!isset($children[$index])) { |
|
| 730 | throw new OutOfRangeException(count($children) ? "valid range is: 0 to " . count($children) - 1 : "Parser has no children"); |
||
| 731 | } |
||
| 732 | |||
| 733 | 1 | return $children[$index]; |
|
| 734 | } |
||
| 735 | |||
| 736 | /** |
||
| 737 | * Changes the receiver by replacing [source] with [target]. Does nothing |
||
| 738 | * if [source] does not exist in [Parser.children]. |
||
| 739 | * |
||
| 740 | * The following example creates a letter parser and then defines a parser |
||
| 741 | * called [:example:] that accepts one or more letters. Eventually the parser |
||
| 742 | * [:example:] is modified by replacing the [:letter:] parser with a new |
||
| 743 | * parser that accepts a digit. The resulting [:example:] parser accepts one |
||
| 744 | * or more digits. |
||
| 745 | * |
||
| 746 | * $letter = $this->letter(); |
||
| 747 | * $example = $letter->plus(); |
||
| 748 | * $example->replace($letter, $this->digit()); |
||
| 749 | * |
||
| 750 | * @param Parser $source |
||
| 751 | * @param Parser $target |
||
| 752 | * |
||
| 753 | * @return void |
||
| 754 | */ |
||
| 755 | 1 | public function replace(Parser $source, Parser $target) |
|
| 756 | { |
||
| 757 | // no children, nothing to do |
||
| 758 | 1 | } |
|
| 759 | |||
| 760 | /** |
||
| 761 | * @return string |
||
| 762 | */ |
||
| 763 | public function __toString() |
||
| 764 | { |
||
| 765 | return get_class($this); |
||
| 766 | } |
||
| 767 | } |
||
| 768 |
If you have a function call in the test part of a
forloop, this function is executed on each iteration. Often such a function, can be moved to the initialization part and be cached.