Code

< 40 %
40-60 %
> 60 %
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $length;
22
23
    private $at = 0;
24
25
    private $currentByte;
26
27
    private $lineNumber = 1;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $currentLineStartsAt = 0;
38
39
    /**
40
     * Private constructor.
41
     *
42
     * @param string $json
43
     * @param bool   $associative
44
     * @param int    $depth
45
     * @param bool   $castBigIntToString
46
     */
47 327
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
48
    {
49 327
        $this->json = $json;
50 327
        $this->associative = $associative;
51 327
        $this->maxDepth = $depth;
52 327
        $this->castBigIntToString = $castBigIntToString;
53
54 327
        $this->length = \strlen($json);
55 327
        $this->currentByte = $this->getByte(0);
56 218
    }
57
58
    /**
59
     * Takes a JSON encoded string and converts it into a PHP variable.
60
     *
61
     * The parameters exactly match PHP's json_decode() function - see
62
     * http://php.net/manual/en/function.json-decode.php for more information.
63
     *
64
     * @param string $source      The JSON string being decoded.
65
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
66
     * @param int    $depth       User specified recursion depth.
67
     * @param int    $options     Bitmask of JSON decode options.
68
     *
69
     * @return mixed
70
     */
71 435
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
72
    {
73
        // Try parsing with json_decode first, since that's much faster
74
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
75 435
        if (PHP_VERSION_ID >= 70000) {
76
            try {
77 435
                $result = \json_decode($source, $associative, $depth, $options);
78 432
                if (\json_last_error() === \JSON_ERROR_NONE) {
79 432
                    return $result;
80
                }
81 3
            } catch (\Throwable $e) {
82
                // ignore exception, continue parsing as JSON5
83
            }
84
        }
85
86
        // Fall back to JSON5 if that fails
87 327
        $associative = $associative === true || ($associative === null && $options & \JSON_OBJECT_AS_ARRAY);
88 327
        $castBigIntToString = $options & \JSON_BIGINT_AS_STRING;
89
90 327
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
91
92 327
        $result = $decoder->value();
93 216
        $decoder->white();
94 213
        if ($decoder->currentByte) {
95 18
            $decoder->throwSyntaxError('Syntax error');
96
        }
97
98 195
        return $result;
99
    }
100
101
    /**
102
     * @param int $at
103
     *
104
     * @return null
105
     */
106 327
    private function getByte($at)
107
    {
108 327
        if ($at >= $this->length) {
109 219
            return null;
110
        }
111
112 324
        return $this->json[$at];
113
    }
114
115
    /**
116
     * @return string|null
117
     */
118 33
    private function currentChar()
119
    {
120 33
        if ($this->at >= $this->length) {
121 12
            return null;
122
        }
123
124 21
        return \mb_substr(\substr($this->json, $this->at, 4), 0, 1);
125
    }
126
127
    /**
128
     * Parse the next character.
129
     *
130
     * @return null|string
131
     */
132 303
    private function next()
133
    {
134
        // Get the next character. When there are no more characters,
135
        // return the empty string.
136 303
        if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
137 216
            $this->lineNumber++;
138 216
            $this->currentLineStartsAt = $this->at + 1;
139
        }
140
141 303
        $this->at++;
142
143 303
        return $this->currentByte = $this->getByte($this->at);
144
    }
145
146
    /**
147
     * Parse the next character if it matches $c or fail.
148
     *
149
     * @param string $c
150
     *
151
     * @return string|null
152
     */
153 168
    private function nextOrFail($c)
154
    {
155 168
        if ($c !== $this->currentByte) {
156 24
            $this->throwSyntaxError(\sprintf(
157 24
                'Expected %s instead of %s',
158 24
                self::renderChar($c),
159 24
                self::renderChar($this->currentChar())
160
            ));
161
        }
162
163 168
        return $this->next();
164
    }
165
166
    /**
167
     * Get the next character without consuming it or
168
     * assigning it to the ch variable.
169
     *
170
     * @return mixed
171
     */
172 36
    private function peek()
173
    {
174 36
        return $this->getByte($this->at + 1);
175
    }
176
177
    /**
178
     * Attempt to match a regular expression at the current position on the current line.
179
     *
180
     * This function will not match across multiple lines.
181
     *
182
     * @param string $regex
183
     *
184
     * @return string|null
185
     */
186 162
    private function match($regex)
187
    {
188 162
        $subject = \substr($this->json, $this->at);
189
        // Only match on the current line
190 162
        if ($pos = \strpos($subject, "\n")) {
191 153
            $subject = \substr($subject, 0, $pos);
192
        }
193
194 162
        if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
195 15
            return null;
196
        }
197
198 147
        $this->at += $matches[0][1] + \strlen($matches[0][0]);
199 147
        $this->currentByte = $this->getByte($this->at);
200
201 147
        return $matches[0][0];
202
    }
203
204
    /**
205
     * Parse an identifier.
206
     *
207
     * Normally, reserved words are disallowed here, but we
208
     * only use this for unquoted object keys, where reserved words are allowed,
209
     * so we don't check for those here. References:
210
     * - http://es5.github.com/#x7.6
211
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
212
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
213
     */
214 42
    private function identifier()
215
    {
216
        // @codingStandardsIgnoreStart
217
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
218 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
219
        // @codingStandardsIgnoreEnd
220
221 42
        if ($match === null) {
222 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
223
        }
224
225
        // Un-escape escaped Unicode chars
226 11
        $unescaped = \preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
227 6
            return \json_decode('"'.$m[0].'"');
228 22
        }, $match);
229
230 33
        return $unescaped;
231
    }
232
233 165
    private function number()
234
    {
235 165
        $number = null;
236 165
        $sign = '';
237 165
        $string = '';
238 165
        $base = 10;
239
240 165
        if ($this->currentByte === '-' || $this->currentByte === '+') {
241 90
            $sign = $this->currentByte;
242 90
            $this->next();
243
        }
244
245
        // support for Infinity
246 165
        if ($this->currentByte === 'I') {
247 9
            $this->word();
248
249 6
            return ($sign === '-') ? -INF : INF;
250
        }
251
252
        // support for NaN
253 156
        if ($this->currentByte === 'N') {
254 3
            $number = $this->word();
255
256
            // ignore sign as -NaN also is NaN
257 3
            return $number;
258
        }
259
260 153
        if ($this->currentByte === '0') {
261 81
            $string .= $this->currentByte;
262 81
            $this->next();
263 81
            if ($this->currentByte === 'x' || $this->currentByte === 'X') {
264 33
                $string .= $this->currentByte;
265 33
                $this->next();
266 33
                $base = 16;
267 48
            } elseif (\is_numeric($this->currentByte)) {
268 30
                $this->throwSyntaxError('Octal literal');
269
            }
270
        }
271
272
        switch ($base) {
273 123
            case 10:
274
                // @codingStandardsIgnoreStart
275 93
                if ((\is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
276 90
                    $string .= $match;
277
                }
278 93
                if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
279 21
                    $string .= $match;
280
                }
281
                // @codingStandardsIgnoreEnd
282 93
                $number = $string;
283 93
                break;
284 33
            case 16:
285 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
286 30
                    $string .= $match;
287 30
                    $number = \hexdec($string);
288 30
                    break;
289
                }
290 3
                $this->throwSyntaxError('Bad hex number');
291
        }
292
293 120
        if ($sign === '-') {
294 24
            $number = '-' . $number;
295
        }
296
297 120
        if (!\is_numeric($number) || !\is_finite($number)) {
298 3
            $this->throwSyntaxError('Bad number');
299
        }
300
301
        // Adding 0 will automatically cast this to an int or float
302 117
        $asIntOrFloat = $number + 0;
303
304 117
        $isIntLike = preg_match('/^-?\d+$/', $number) === 1;
305 117
        if ($this->castBigIntToString && $isIntLike && is_float($asIntOrFloat)) {
306 3
            return $number;
307
        }
308
309 117
        return $asIntOrFloat;
310
    }
311
312 69
    private function string()
313
    {
314 69
        $string = '';
315
316 69
        $delim = $this->currentByte;
317 69
        $this->next();
318 69
        while ($this->currentByte !== null) {
319 69
            if ($this->currentByte === $delim) {
320 63
                $this->next();
321
322 63
                return $string;
323
            }
324
325 69
            if ($this->currentByte === '\\') {
326 27
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
327
                    try {
328 3
                        $unicodeUnescaped = \json_decode('"' . $unicodeEscaped . '"', false, 1, JSON_THROW_ON_ERROR);
329
                        if ($unicodeUnescaped === null && ($err = json_last_error_msg())) {
330
                            throw new \JsonException($err);
331
                        }
332
                        $string .= $unicodeUnescaped;
333 3
                    } catch (\JsonException $e) {
334 3
                        $this->throwSyntaxError($e->getMessage());
335
                    }
336
                    continue;
337
                }
338
339 24
                $this->next();
340 24
                if ($this->currentByte === "\r") {
341 6
                    if ($this->peek() === "\n") {
342 6
                        $this->next();
343
                    }
344 18
                } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
345 15
                    $string .= $escapee;
346
                } else {
347 24
                    break;
348
                }
349 69
            } elseif ($this->currentByte === "\n") {
350
                // unescaped newlines are invalid; see:
351
                // https://github.com/json5/json5/issues/24
352
                // @todo this feels special-cased; are there other invalid unescaped chars?
353 3
                break;
354
            } else {
355 69
                $string .= $this->currentByte;
356
            }
357
358 69
            $this->next();
359
        }
360
361 6
        $this->throwSyntaxError('Bad string');
362
    }
363
364
    /**
365
     * Skip an inline comment, assuming this is one.
366
     *
367
     * The current character should be the second / character in the // pair that begins this inline comment.
368
     * To finish the inline comment, we look for a newline or the end of the text.
369
     */
370 45
    private function inlineComment()
371
    {
372
        do {
373 45
            $this->next();
374 45
            if ($this->currentByte === "\n" || $this->currentByte === "\r") {
375 39
                $this->next();
376
377 39
                return;
378
            }
379 45
        } while ($this->currentByte !== null);
380 4
    }
381
382
    /**
383
     * Skip a block comment, assuming this is one.
384
     *
385
     * The current character should be the * character in the /* pair that begins this block comment.
386
     * To finish the block comment, we look for an ending *​/ pair of characters,
387
     * but we also watch for the end of text before the comment is terminated.
388
     */
389 21
    private function blockComment()
390
    {
391
        do {
392 21
            $this->next();
393 21
            while ($this->currentByte === '*') {
394 18
                $this->nextOrFail('*');
395 18
                if ($this->currentByte === '/') {
396 18
                    $this->nextOrFail('/');
397
398 18
                    return;
399
                }
400
            }
401 21
        } while ($this->currentByte !== null);
402
403 3
        $this->throwSyntaxError('Unterminated block comment');
404
    }
405
406
    /**
407
     * Skip a comment, whether inline or block-level, assuming this is one.
408
     */
409 66
    private function comment()
410
    {
411
        // Comments always begin with a / character.
412 66
        $this->nextOrFail('/');
413
414 66
        if ($this->currentByte === '/') {
415 45
            $this->inlineComment();
416 24
        } elseif ($this->currentByte === '*') {
417 21
            $this->blockComment();
418
        } else {
419 3
            $this->throwSyntaxError('Unrecognized comment');
420
        }
421 40
    }
422
423
    /**
424
     * Skip whitespace and comments.
425
     *
426
     * Note that we're detecting comments by only a single / character.
427
     * This works since regular expressions are not valid JSON(5), but this will
428
     * break if there are other valid values that begin with a / character!
429
     */
430 327
    private function white()
431
    {
432 327
        while ($this->currentByte !== null) {
433 324
            if ($this->currentByte === '/') {
434 66
                $this->comment();
435 315
            } elseif (\preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) {
436 228
                $this->next();
437 315
            } elseif (\ord($this->currentByte) === 0xC2 && \ord($this->peek()) === 0xA0) {
438
                // Non-breaking space in UTF-8
439 3
                $this->next();
440 3
                $this->next();
441
            } else {
442 315
                return;
443
            }
444
        }
445 144
    }
446
447
    /**
448
     * Matches true, false, null, etc
449
     */
450 78
    private function word()
451
    {
452 78
        switch ($this->currentByte) {
453 78
            case 't':
454 21
                $this->nextOrFail('t');
455 21
                $this->nextOrFail('r');
456 21
                $this->nextOrFail('u');
457 21
                $this->nextOrFail('e');
458 21
                return true;
459 63
            case 'f':
460 9
                $this->nextOrFail('f');
461 9
                $this->nextOrFail('a');
462 9
                $this->nextOrFail('l');
463 9
                $this->nextOrFail('s');
464 9
                $this->nextOrFail('e');
465 9
                return false;
466 54
            case 'n':
467 15
                $this->nextOrFail('n');
468 15
                $this->nextOrFail('u');
469 15
                $this->nextOrFail('l');
470 15
                $this->nextOrFail('l');
471 15
                return null;
472 39
            case 'I':
473 18
                $this->nextOrFail('I');
474 18
                $this->nextOrFail('n');
475 15
                $this->nextOrFail('f');
476 12
                $this->nextOrFail('i');
477 12
                $this->nextOrFail('n');
478 12
                $this->nextOrFail('i');
479 12
                $this->nextOrFail('t');
480 12
                $this->nextOrFail('y');
481 12
                return INF;
482 21
            case 'N':
483 12
                $this->nextOrFail('N');
484 12
                $this->nextOrFail('a');
485 6
                $this->nextOrFail('N');
486 6
                return NAN;
487
        }
488
489 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
490
    }
491
492 36
    private function arr()
493
    {
494 36
        $arr = [];
495
496 36
        if (++$this->depth > $this->maxDepth) {
497 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
498
        }
499
500 36
        $this->nextOrFail('[');
501 36
        $this->white();
502 36
        while ($this->currentByte !== null) {
503 33
            if ($this->currentByte === ']') {
504 9
                $this->nextOrFail(']');
505 9
                $this->depth--;
506 9
                return $arr; // Potentially empty array
507
            }
508
            // ES5 allows omitting elements in arrays, e.g. [,] and
509
            // [,null]. We don't allow this in JSON5.
510 33
            if ($this->currentByte === ',') {
511 6
                $this->throwSyntaxError('Missing array element');
512
            }
513
514 27
            $arr[] = $this->value();
515
516 24
            $this->white();
517
            // If there's no comma after this value, this needs to
518
            // be the end of the array.
519 24
            if ($this->currentByte !== ',') {
520 15
                $this->nextOrFail(']');
521 12
                $this->depth--;
522 12
                return $arr;
523
            }
524 15
            $this->nextOrFail(',');
525 15
            $this->white();
526
        }
527
528 3
        $this->throwSyntaxError('Invalid array');
529
    }
530
531
    /**
532
     * Parse an object value
533
     */
534 81
    private function obj()
535
    {
536 81
        $object = $this->associative ? [] : new \stdClass;
537
538 81
        if (++$this->depth > $this->maxDepth) {
539 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
540
        }
541
542 81
        $this->nextOrFail('{');
543 81
        $this->white();
544 81
        while ($this->currentByte !== null) {
545 75
            if ($this->currentByte === '}') {
546 18
                $this->nextOrFail('}');
547 18
                $this->depth--;
548 18
                return $object; // Potentially empty object
549
            }
550
551
            // Keys can be unquoted. If they are, they need to be
552
            // valid JS identifiers.
553 66
            if ($this->currentByte === '"' || $this->currentByte === "'") {
554 27
                $key = $this->string();
555
            } else {
556 42
                $key = $this->identifier();
557
            }
558
559 57
            $this->white();
560 57
            $this->nextOrFail(':');
561 54
            if ($this->associative) {
562 39
                $object[$key] = $this->value();
563
            } else {
564 54
                $object->{$key} = $this->value();
565
            }
566 51
            $this->white();
567
            // If there's no comma after this pair, this needs to be
568
            // the end of the object.
569 51
            if ($this->currentByte !== ',') {
570 39
                $this->nextOrFail('}');
571 33
                $this->depth--;
572 33
                return $object;
573
            }
574 18
            $this->nextOrFail(',');
575 18
            $this->white();
576
        }
577
578 6
        $this->throwSyntaxError('Invalid object');
579
    }
580
581
    /**
582
     * Parse a JSON value.
583
     *
584
     * It could be an object, an array, a string, a number,
585
     * or a word.
586
     */
587 327
    private function value()
588
    {
589 327
        $this->white();
590 324
        switch ($this->currentByte) {
591 324
            case '{':
592 81
                return $this->obj();
593 294
            case '[':
594 36
                return $this->arr();
595 282
            case '"':
596 249
            case "'":
597 60
                return $this->string();
598 228
            case '-':
599 198
            case '+':
600 147
            case '.':
601 99
                return $this->number();
602
            default:
603 138
                return \is_numeric($this->currentByte) ? $this->number() : $this->word();
604
        }
605
    }
606
607 132
    private function throwSyntaxError($message)
608
    {
609
        // Calculate the column number
610 132
        $str = \substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt);
611 132
        $column = \mb_strlen($str) + 1;
612
613 132
        throw new SyntaxError($message, $this->lineNumber, $column);
614
    }
615
616 33
    private static function renderChar($chr)
617
    {
618 33
        return $chr === null ? 'EOF' : "'" . $chr . "'";
619
    }
620
621
    /**
622
     * @param string $ch
623
     *
624
     * @return string|null
625
     */
626 18
    private static function getEscapee($ch)
627
    {
628 18
        switch ($ch) {
629
            // @codingStandardsIgnoreStart
630 18
            case "'":  return "'";
631 15
            case '"':  return '"';
632 15
            case '\\': return '\\';
633 15
            case '/':  return '/';
634 15
            case "\n": return '';
635 6
            case 'b':  return \chr(8);
636 6
            case 'f':  return "\f";
637 6
            case 'n':  return "\n";
638 6
            case 'r':  return "\r";
639 6
            case 't':  return "\t";
640 3
            default:   return null;
641
            // @codingStandardsIgnoreEnd
642
        }
643
    }
644
}
645