Completed
Push — master ( be1be7...29a316 )
by Colin
01:49
created

Json5Decoder::obj()   B

Complexity

Conditions 9
Paths 40

Size

Total Lines 46

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 27
CRAP Score 9.0036

Importance

Changes 0
Metric Value
cc 9
nc 40
nop 0
dl 0
loc 46
ccs 27
cts 28
cp 0.9643
crap 9.0036
rs 7.6226
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $length;
22
23
    private $at = 0;
24
25
    private $currentByte;
26
27
    private $lineNumber = 1;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $currentLineStartsAt = 0;
38
39
    /**
40
     * Private constructor.
41
     *
42
     * @param string $json
43
     * @param bool   $associative
44
     * @param int    $depth
45
     * @param bool   $castBigIntToString
46
     */
47 315
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
48
    {
49 315
        $this->json = $json;
50 315
        $this->associative = $associative;
51 315
        $this->maxDepth = $depth;
52 315
        $this->castBigIntToString = $castBigIntToString;
53
54 315
        $this->length = \strlen($json);
55 315
        $this->currentByte = $this->getByte(0);
56 315
    }
57
58
    /**
59
     * Takes a JSON encoded string and converts it into a PHP variable.
60
     *
61
     * The parameters exactly match PHP's json_decode() function - see
62
     * http://php.net/manual/en/function.json-decode.php for more information.
63
     *
64
     * @param string $source      The JSON string being decoded.
65
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
66
     * @param int    $depth       User specified recursion depth.
67
     * @param int    $options     Bitmask of JSON decode options.
68
     *
69
     * @return mixed
70
     */
71 423
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
72
    {
73
        // Try parsing with json_decode first, since that's much faster
74
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
75 423
        if (PHP_VERSION_ID >= 70000) {
76 423
            $result = \json_decode($source, $associative, $depth, $options);
77 423
            if (\json_last_error() === \JSON_ERROR_NONE) {
78 108
                return $result;
79
            }
80
        }
81
82
        // Fall back to JSON5 if that fails
83 315
        $associative = $associative === true || ($associative === null && $options & \JSON_OBJECT_AS_ARRAY);
84 315
        $castBigIntToString = $options & \JSON_BIGINT_AS_STRING;
85
86 315
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
87
88 315
        $result = $decoder->value();
89 207
        $decoder->white();
90 204
        if ($decoder->currentByte) {
91 18
            $decoder->throwSyntaxError('Syntax error');
92
        }
93
94 186
        return $result;
95
    }
96
97
    /**
98
     * @param int $at
99
     *
100
     * @return null
101
     */
102 315
    private function getByte($at)
103
    {
104 315
        if ($at >= $this->length) {
105 210
            return null;
106
        }
107
108 312
        return $this->json[$at];
109
    }
110
111
    /**
112
     * @return string|null
113
     */
114 33
    private function currentChar()
115
    {
116 33
        if ($this->at >= $this->length) {
117 12
            return null;
118
        }
119
120 21
        return \mb_substr(\substr($this->json, $this->at, 4), 0, 1);
121
    }
122
123
    /**
124
     * Parse the next character.
125
     *
126
     * @return null|string
127
     */
128 291
    private function next()
129
    {
130
        // Get the next character. When there are no more characters,
131
        // return the empty string.
132 291
        if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
133 210
            $this->lineNumber++;
134 210
            $this->currentLineStartsAt = $this->at + 1;
135
        }
136
137 291
        $this->at++;
138
        
139 291
        return $this->currentByte = $this->getByte($this->at);
140
    }
141
142
    /**
143
     * Parse the next character if it matches $c or fail.
144
     *
145
     * @param string $c
146
     *
147
     * @return string|null
148
     */
149 156
    private function nextOrFail($c)
150
    {
151 156
        if ($c !== $this->currentByte) {
152 24
            $this->throwSyntaxError(\sprintf(
153 24
                'Expected %s instead of %s',
154 24
                self::renderChar($c),
155 24
                self::renderChar($this->currentChar())
156
            ));
157
        }
158
159 156
        return $this->next();
160
    }
161
162
    /**
163
     * Get the next character without consuming it or
164
     * assigning it to the ch variable.
165
     *
166
     * @return mixed
167
     */
168 33
    private function peek()
169
    {
170 33
        return $this->getByte($this->at + 1);
171
    }
172
173
    /**
174
     * Attempt to match a regular expression at the current position on the current line.
175
     *
176
     * This function will not match across multiple lines.
177
     *
178
     * @param string $regex
179
     *
180
     * @return string|null
181
     */
182 153
    private function match($regex)
183
    {
184 153
        $subject = \substr($this->json, $this->at);
185
        // Only match on the current line
186 153
        if ($pos = \strpos($subject, "\n")) {
187 144
            $subject = \substr($subject, 0, $pos);
188
        }
189
190 153
        if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
191 15
            return null;
192
        }
193
194 138
        $this->at += $matches[0][1] + \strlen($matches[0][0]);
195 138
        $this->currentByte = $this->getByte($this->at);
196
197 138
        return $matches[0][0];
198
    }
199
200
    /**
201
     * Parse an identifier.
202
     *
203
     * Normally, reserved words are disallowed here, but we
204
     * only use this for unquoted object keys, where reserved words are allowed,
205
     * so we don't check for those here. References:
206
     * - http://es5.github.com/#x7.6
207
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
208
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
209
     */
210 42
    private function identifier()
211
    {
212
        // @codingStandardsIgnoreStart
213
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
214 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
215
        // @codingStandardsIgnoreEnd
216
217 42
        if ($match === null) {
218 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
219
        }
220
221
        // Un-escape escaped Unicode chars
222
        $unescaped = \preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
223 6
            return \json_decode('"'.$m[0].'"');
224 33
        }, $match);
225
226 33
        return $unescaped;
227
    }
228
229 156
    private function number()
230
    {
231 156
        $number = null;
232 156
        $sign = '';
233 156
        $string = '';
234 156
        $base = 10;
235
236 156
        if ($this->currentByte === '-' || $this->currentByte === '+') {
237 84
            $sign = $this->currentByte;
238 84
            $this->next();
239
        }
240
241
        // support for Infinity
242 156
        if ($this->currentByte === 'I') {
243 9
            $this->word();
244
245 6
            return ($sign === '-') ? -INF : INF;
246
        }
247
248
        // support for NaN
249 147
        if ($this->currentByte === 'N') {
250 3
            $number = $this->word();
251
252
            // ignore sign as -NaN also is NaN
253 3
            return $number;
254
        }
255
256 144
        if ($this->currentByte === '0') {
257 81
            $string .= $this->currentByte;
258 81
            $this->next();
259 81
            if ($this->currentByte === 'x' || $this->currentByte === 'X') {
260 33
                $string .= $this->currentByte;
261 33
                $this->next();
262 33
                $base = 16;
263 48
            } elseif (\is_numeric($this->currentByte)) {
264 30
                $this->throwSyntaxError('Octal literal');
265
            }
266
        }
267
268
        switch ($base) {
269 114
            case 10:
270
                // @codingStandardsIgnoreStart
271 84 View Code Duplication
                if ((\is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
272 81
                    $string .= $match;
273
                }
274 84 View Code Duplication
                if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
275 21
                    $string .= $match;
276
                }
277
                // @codingStandardsIgnoreEnd
278 84
                $number = $string;
279 84
                break;
280 33
            case 16:
281 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
282 30
                    $string .= $match;
283 30
                    $number = \hexdec($string);
284 30
                    break;
285
                }
286 3
                $this->throwSyntaxError('Bad hex number');
287
        }
288
289 111
        if ($sign === '-') {
290 18
            $number = -1 * $number;
291
        }
292
293 111
        if (!\is_numeric($number) || !\is_finite($number)) {
294 3
            $this->throwSyntaxError('Bad number');
295
        }
296
297 108
        if ($this->castBigIntToString) {
298
            return $number;
299
        }
300
301
        // Adding 0 will automatically cast this to an int or float
302 108
        return $number + 0;
303
    }
304
305 63
    private function string()
306
    {
307 63
        $string = '';
308
309 63
        $delim = $this->currentByte;
310 63
        $this->next();
311 63
        while ($this->currentByte !== null) {
312 63
            if ($this->currentByte === $delim) {
313 57
                $this->next();
314
315 57
                return $string;
316
            }
317
318 63
            if ($this->currentByte === '\\') {
319 24
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
320
                    $string .= \json_decode('"'.$unicodeEscaped.'"');
321
                    continue;
322
                }
323
324 24
                $this->next();
325 24
                if ($this->currentByte === "\r") {
326 6
                    if ($this->peek() === "\n") {
327 6
                        $this->next();
328
                    }
329 18
                } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
330 15
                    $string .= $escapee;
331
                } else {
332 24
                    break;
333
                }
334 63
            } elseif ($this->currentByte === "\n") {
335
                // unescaped newlines are invalid; see:
336
                // https://github.com/json5/json5/issues/24
337
                // @todo this feels special-cased; are there other invalid unescaped chars?
338 3
                break;
339
            } else {
340 63
                $string .= $this->currentByte;
341
            }
342
343 63
            $this->next();
344
        }
345
346 6
        $this->throwSyntaxError('Bad string');
347
    }
348
349
    /**
350
     * Skip an inline comment, assuming this is one.
351
     *
352
     * The current character should be the second / character in the // pair that begins this inline comment.
353
     * To finish the inline comment, we look for a newline or the end of the text.
354
     */
355 36
    private function inlineComment()
356
    {
357
        do {
358 36
            $this->next();
359 36
            if ($this->currentByte === "\n" || $this->currentByte === "\r") {
360 33
                $this->next();
361
362 33
                return;
363
            }
364 36
        } while ($this->currentByte !== null);
365 3
    }
366
367
    /**
368
     * Skip a block comment, assuming this is one.
369
     *
370
     * The current character should be the * character in the /* pair that begins this block comment.
371
     * To finish the block comment, we look for an ending *​/ pair of characters,
372
     * but we also watch for the end of text before the comment is terminated.
373
     */
374 21
    private function blockComment()
375
    {
376
        do {
377 21
            $this->next();
378 21
            while ($this->currentByte === '*') {
379 18
                $this->nextOrFail('*');
380 18
                if ($this->currentByte === '/') {
381 18
                    $this->nextOrFail('/');
382
383 18
                    return;
384
                }
385
            }
386 21
        } while ($this->currentByte !== null);
387
388 3
        $this->throwSyntaxError('Unterminated block comment');
389
    }
390
391
    /**
392
     * Skip a comment, whether inline or block-level, assuming this is one.
393
     */
394 57
    private function comment()
395
    {
396
        // Comments always begin with a / character.
397 57
        $this->nextOrFail('/');
398
399 57
        if ($this->currentByte === '/') {
400 36
            $this->inlineComment();
401 24
        } elseif ($this->currentByte === '*') {
402 21
            $this->blockComment();
403
        } else {
404 3
            $this->throwSyntaxError('Unrecognized comment');
405
        }
406 51
    }
407
408
    /**
409
     * Skip whitespace and comments.
410
     *
411
     * Note that we're detecting comments by only a single / character.
412
     * This works since regular expressions are not valid JSON(5), but this will
413
     * break if there are other valid values that begin with a / character!
414
     */
415 315
    private function white()
416
    {
417 315
        while ($this->currentByte !== null) {
418 312
            if ($this->currentByte === '/') {
419 57
                $this->comment();
420 303
            } elseif (\preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) {
421 216
                $this->next();
422 303
            } elseif (\ord($this->currentByte) === 0xC2 && \ord($this->peek()) === 0xA0) {
423
                // Non-breaking space in UTF-8
424 3
                $this->next();
425 3
                $this->next();
426
            } else {
427 303
                return;
428
            }
429
        }
430 207
    }
431
432
    /**
433
     * Matches true, false, null, etc
434
     */
435 78
    private function word()
436
    {
437 78
        switch ($this->currentByte) {
438 78
            case 't':
439 21
                $this->nextOrFail('t');
440 21
                $this->nextOrFail('r');
441 21
                $this->nextOrFail('u');
442 21
                $this->nextOrFail('e');
443 21
                return true;
444 63
            case 'f':
445 9
                $this->nextOrFail('f');
446 9
                $this->nextOrFail('a');
447 9
                $this->nextOrFail('l');
448 9
                $this->nextOrFail('s');
449 9
                $this->nextOrFail('e');
450 9
                return false;
451 54
            case 'n':
452 15
                $this->nextOrFail('n');
453 15
                $this->nextOrFail('u');
454 15
                $this->nextOrFail('l');
455 15
                $this->nextOrFail('l');
456 15
                return null;
457 39
            case 'I':
458 18
                $this->nextOrFail('I');
459 18
                $this->nextOrFail('n');
460 15
                $this->nextOrFail('f');
461 12
                $this->nextOrFail('i');
462 12
                $this->nextOrFail('n');
463 12
                $this->nextOrFail('i');
464 12
                $this->nextOrFail('t');
465 12
                $this->nextOrFail('y');
466 12
                return INF;
467 21
            case 'N':
468 12
                $this->nextOrFail('N');
469 12
                $this->nextOrFail('a');
470 6
                $this->nextOrFail('N');
471 6
                return NAN;
472
        }
473
474 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
475
    }
476
477 30
    private function arr()
478
    {
479 30
        $arr = [];
480
481 30
        if (++$this->depth > $this->maxDepth) {
482 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
483
        }
484
485 30
        $this->nextOrFail('[');
486 30
        $this->white();
487 30
        while ($this->currentByte !== null) {
488 27
            if ($this->currentByte === ']') {
489 9
                $this->nextOrFail(']');
490 9
                $this->depth--;
491 9
                return $arr; // Potentially empty array
492
            }
493
            // ES5 allows omitting elements in arrays, e.g. [,] and
494
            // [,null]. We don't allow this in JSON5.
495 27
            if ($this->currentByte === ',') {
496 6
                $this->throwSyntaxError('Missing array element');
497
            }
498
499 21
            $arr[] = $this->value();
500
501 18
            $this->white();
502
            // If there's no comma after this value, this needs to
503
            // be the end of the array.
504 18
            if ($this->currentByte !== ',') {
505 9
                $this->nextOrFail(']');
506 6
                $this->depth--;
507 6
                return $arr;
508
            }
509 9
            $this->nextOrFail(',');
510 9
            $this->white();
511
        }
512
513 3
        $this->throwSyntaxError('Invalid array');
514
    }
515
516
    /**
517
     * Parse an object value
518
     */
519 78
    private function obj()
520
    {
521 78
        $object = $this->associative ? [] : new \stdClass;
522
523 78
        if (++$this->depth > $this->maxDepth) {
524 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
525
        }
526
527 78
        $this->nextOrFail('{');
528 78
        $this->white();
529 78
        while ($this->currentByte !== null) {
530 72
            if ($this->currentByte === '}') {
531 18
                $this->nextOrFail('}');
532 18
                $this->depth--;
533 18
                return $object; // Potentially empty object
534
            }
535
536
            // Keys can be unquoted. If they are, they need to be
537
            // valid JS identifiers.
538 63
            if ($this->currentByte === '"' || $this->currentByte === "'") {
539 24
                $key = $this->string();
540
            } else {
541 42
                $key = $this->identifier();
542
            }
543
544 54
            $this->white();
545 54
            $this->nextOrFail(':');
546 51
            if ($this->associative) {
547 39
                $object[$key] = $this->value();
548
            } else {
549 51
                $object->{$key} = $this->value();
550
            }
551 48
            $this->white();
552
            // If there's no comma after this pair, this needs to be
553
            // the end of the object.
554 48
            if ($this->currentByte !== ',') {
555 39
                $this->nextOrFail('}');
556 33
                $this->depth--;
557 33
                return $object;
558
            }
559 15
            $this->nextOrFail(',');
560 15
            $this->white();
561
        }
562
563 6
        $this->throwSyntaxError('Invalid object');
564
    }
565
566
    /**
567
     * Parse a JSON value.
568
     *
569
     * It could be an object, an array, a string, a number,
570
     * or a word.
571
     */
572 315
    private function value()
573
    {
574 315
        $this->white();
575 312
        switch ($this->currentByte) {
576 312
            case '{':
577 78
                return $this->obj();
578 282
            case '[':
579 30
                return $this->arr();
580 270
            case '"':
581 240
            case "'":
582 57
                return $this->string();
583 219
            case '-':
584 189
            case '+':
585 138
            case '.':
586 93
                return $this->number();
587
            default:
588 129
                return \is_numeric($this->currentByte) ? $this->number() : $this->word();
589
        }
590
    }
591
592 129
    private function throwSyntaxError($message)
593
    {
594
        // Calculate the column number
595 129
        $str = \substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt);
596 129
        $column = \mb_strlen($str) + 1;
597
598 129
        throw new SyntaxError($message, $this->lineNumber, $column);
599
    }
600
601 33
    private static function renderChar($chr)
602
    {
603 33
        return $chr === null ? 'EOF' : "'" . $chr . "'";
604
    }
605
606
    /**
607
     * @param string $ch
608
     *
609
     * @return string|null
610
     */
611 18
    private static function getEscapee($ch)
612
    {
613 12
        switch ($ch) {
614
            // @codingStandardsIgnoreStart
615 18
            case "'":  return "'";
616 15
            case '"':  return '"';
617 15
            case '\\': return '\\';
618 15
            case '/':  return '/';
619 15
            case "\n": return '';
620 6
            case 'b':  return \chr(8);
621 6
            case 'f':  return "\f";
622 6
            case 'n':  return "\n";
623 6
            case 'r':  return "\r";
624 6
            case 't':  return "\t";
625 3
            default:   return null;
626
            // @codingStandardsIgnoreEnd
627
        }
628
    }
629
}
630