Completed
Push — master ( 75ab96...ef147f )
by Colin
01:52
created

Json5Decoder::decode()   B

Complexity

Conditions 6
Paths 13

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 6.0852

Importance

Changes 0
Metric Value
cc 6
nc 13
nop 4
dl 0
loc 25
ccs 13
cts 15
cp 0.8667
crap 6.0852
rs 8.8977
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $length;
22
23
    private $at = 0;
24
25
    private $currentByte;
26
27
    private $lineNumber = 1;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $currentLineStartsAt = 0;
38
39
    /**
40
     * Private constructor.
41
     *
42
     * @param string $json
43
     * @param bool   $associative
44
     * @param int    $depth
45
     * @param bool   $castBigIntToString
46
     */
47 351
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
48
    {
49 351
        $this->json = $json;
50 351
        $this->associative = $associative;
51 351
        $this->maxDepth = $depth;
52 351
        $this->castBigIntToString = $castBigIntToString;
53
54 351
        $this->length = \strlen($json);
55 351
        $this->currentByte = $this->getByte(0);
56 351
    }
57
58
    /**
59
     * Takes a JSON encoded string and converts it into a PHP variable.
60
     *
61
     * The parameters exactly match PHP's json_decode() function - see
62
     * http://php.net/manual/en/function.json-decode.php for more information.
63
     *
64
     * @param string $source      The JSON string being decoded.
65
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
66
     * @param int    $depth       User specified recursion depth.
67
     * @param int    $options     Bitmask of JSON decode options.
68
     *
69
     * @return mixed
70
     */
71 423
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
72
    {
73
        // Try parsing with json_decode first, since that's much faster
74
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
75 423
        if (PHP_VERSION_ID >= 70000) {
76 282
            $result = \json_decode($source, $associative, $depth, $options);
77 282
            if (\json_last_error() === \JSON_ERROR_NONE) {
78 72
                return $result;
79
            }
80
        }
81
82
        // Fall back to JSON5 if that fails
83 351
        $associative = $associative === true || ($associative === null && $options & \JSON_OBJECT_AS_ARRAY);
84 351
        $castBigIntToString = $options & \JSON_BIGINT_AS_STRING;
85
86 351
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
87
88 351
        $result = $decoder->value();
89 243
        $decoder->white();
90 240
        if ($decoder->currentByte) {
91 18
            $decoder->throwSyntaxError('Syntax error');
92
        }
93
94 222
        return $result;
95
    }
96
97
    /**
98
     * @param int $at
99
     *
100
     * @return null
101
     */
102 351
    private function getByte($at)
103
    {
104 351
        if ($at >= $this->length) {
105 246
            return null;
106
        }
107
108 348
        return $this->json[$at];
109
    }
110
111
    /**
112
     * @return string|null
113
     */
114 33
    private function currentChar()
115
    {
116 33
        if ($this->at >= $this->length) {
117 12
            return null;
118
        }
119
120 21
        return \mb_substr(\substr($this->json, $this->at, 4), 0, 1);
121
    }
122
123
    /**
124
     * Parse the next character.
125
     *
126
     * @return null|string
127
     */
128 325
    private function next()
129
    {
130
        // Get the next character. When there are no more characters,
131
        // return the empty string.
132 325
        if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
133 234
            $this->lineNumber++;
134 234
            $this->currentLineStartsAt = $this->at + 1;
135 94
        }
136
137 325
        $this->at++;
138
        
139 325
        return $this->currentByte = $this->getByte($this->at);
140
    }
141
142
    /**
143
     * Parse the next character if it matches $c or fail.
144
     *
145
     * @param string $c
146
     *
147
     * @return string|null
148
     */
149 167
    private function nextOrFail($c)
150
    {
151 167
        if ($c !== $this->currentByte) {
152 24
            $this->throwSyntaxError(\sprintf(
153 24
                'Expected %s instead of %s',
154 24
                self::renderChar($c),
155 24
                self::renderChar($this->currentChar())
156 8
            ));
157
        }
158
159 167
        return $this->next();
160
    }
161
162
    /**
163
     * Get the next character without consuming it or
164
     * assigning it to the ch variable.
165
     *
166
     * @return mixed
167
     */
168 35
    private function peek()
169
    {
170 35
        return $this->getByte($this->at + 1);
171
    }
172
173
    /**
174
     * Attempt to match a regular expression at the current position on the current line.
175
     *
176
     * This function will not match across multiple lines.
177
     *
178
     * @param string $regex
179
     *
180
     * @return string|null
181
     */
182 174
    private function match($regex)
183
    {
184 174
        $subject = \substr($this->json, $this->at);
185
        // Only match on the current line
186 174
        if ($pos = \strpos($subject, "\n")) {
187 162
            $subject = \substr($subject, 0, $pos);
188 66
        }
189
190 174
        if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
191 15
            return null;
192
        }
193
194 159
        $this->at += $matches[0][1] + \strlen($matches[0][0]);
195 159
        $this->currentByte = $this->getByte($this->at);
196
197 159
        return $matches[0][0];
198
    }
199
200
    /**
201
     * Parse an identifier.
202
     *
203
     * Normally, reserved words are disallowed here, but we
204
     * only use this for unquoted object keys, where reserved words are allowed,
205
     * so we don't check for those here. References:
206
     * - http://es5.github.com/#x7.6
207
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
208
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
209
     */
210 42
    private function identifier()
211
    {
212
        // @codingStandardsIgnoreStart
213
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
214 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
215
        // @codingStandardsIgnoreEnd
216
217 42
        if ($match === null) {
218 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
219
        }
220
221
        // Un-escape escaped Unicode chars
222 11
        $unescaped = \preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
223 6
            return \json_decode('"'.$m[0].'"');
224 33
        }, $match);
225
226 33
        return $unescaped;
227
    }
228
229 178
    private function number()
230
    {
231 178
        $number = null;
232 178
        $sign = '';
233 178
        $string = '';
234 178
        $base = 10;
235
236 178
        if ($this->currentByte === '-' || $this->currentByte === '+') {
237 89
            $sign = $this->currentByte;
238 89
            $this->next();
239 33
        }
240
241
        // support for Infinity
242 178
        if ($this->currentByte === 'I') {
243 9
            $this->word();
244
245 6
            return ($sign === '-') ? -INF : INF;
246
        }
247
248
        // support for NaN
249 169
        if ($this->currentByte === 'N') {
250 3
            $number = $this->word();
251
252
            // ignore sign as -NaN also is NaN
253 3
            return $number;
254
        }
255
256 166
        if ($this->currentByte === '0') {
257 89
            $string .= $this->currentByte;
258 89
            $this->next();
259 89
            if ($this->currentByte === 'x' || $this->currentByte === 'X') {
260 33
                $string .= $this->currentByte;
261 33
                $this->next();
262 33
                $base = 16;
263 67
            } elseif (\is_numeric($this->currentByte)) {
264 30
                $this->throwSyntaxError('Octal literal');
265
            }
266 25
        }
267
268
        switch ($base) {
269 136
            case 10:
270
                // @codingStandardsIgnoreStart
271 106 View Code Duplication
                if ((\is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
272 99
                    $string .= $match;
273 45
                }
274 106 View Code Duplication
                if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
275 29
                    $string .= $match;
276 15
                }
277
                // @codingStandardsIgnoreEnd
278 106
                $number = $string;
279 106
                break;
280 33
            case 16:
281 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
282 30
                    $string .= $match;
283 30
                    $number = \hexdec($string);
284 30
                    break;
285
                }
286 3
                $this->throwSyntaxError('Bad hex number');
287
        }
288
289 133
        if ($sign === '-') {
290 23
            $number = -1 * $number;
291 11
        }
292
293 133
        if (!\is_numeric($number) || !\is_finite($number)) {
294 3
            $this->throwSyntaxError('Bad number');
295
        }
296
297 130
        if ($this->castBigIntToString) {
298 1
            return $number;
299
        }
300
301
        // Adding 0 will automatically cast this to an int or float
302 129
        return $number + 0;
303
    }
304
305 74
    private function string()
306
    {
307 74
        $string = '';
308
309 74
        $delim = $this->currentByte;
310 74
        $this->next();
311 74
        while ($this->currentByte !== null) {
312 74
            if ($this->currentByte === $delim) {
313 68
                $this->next();
314
315 68
                return $string;
316
            }
317
318 74
            if ($this->currentByte === '\\') {
319 26
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
320 2
                    $string .= \json_decode('"'.$unicodeEscaped.'"');
321 2
                    continue;
322
                }
323
324 24
                $this->next();
325 24
                if ($this->currentByte === "\r") {
326 6
                    if ($this->peek() === "\n") {
327 5
                        $this->next();
328 1
                    }
329 20
                } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
330 15
                    $string .= $escapee;
331 5
                } else {
332 17
                    break;
333
                }
334 74
            } elseif ($this->currentByte === "\n") {
335
                // unescaped newlines are invalid; see:
336
                // https://github.com/json5/json5/issues/24
337
                // @todo this feels special-cased; are there other invalid unescaped chars?
338 3
                break;
339
            } else {
340 74
                $string .= $this->currentByte;
341
            }
342
343 74
            $this->next();
344 32
        }
345
346 6
        $this->throwSyntaxError('Bad string');
347
    }
348
349
    /**
350
     * Skip an inline comment, assuming this is one.
351
     *
352
     * The current character should be the second / character in the // pair that begins this inline comment.
353
     * To finish the inline comment, we look for a newline or the end of the text.
354
     */
355 36
    private function inlineComment()
356
    {
357
        do {
358 36
            $this->next();
359 36
            if ($this->currentByte === "\n" || $this->currentByte === "\r") {
360 33
                $this->next();
361
362 33
                return;
363
            }
364 36
        } while ($this->currentByte !== null);
365 3
    }
366
367
    /**
368
     * Skip a block comment, assuming this is one.
369
     *
370
     * The current character should be the * character in the /* pair that begins this block comment.
371
     * To finish the block comment, we look for an ending *​/ pair of characters,
372
     * but we also watch for the end of text before the comment is terminated.
373
     */
374 21
    private function blockComment()
375
    {
376
        do {
377 21
            $this->next();
378 21
            while ($this->currentByte === '*') {
379 18
                $this->nextOrFail('*');
380 18
                if ($this->currentByte === '/') {
381 18
                    $this->nextOrFail('/');
382
383 18
                    return;
384
                }
385 1
            }
386 21
        } while ($this->currentByte !== null);
387
388 3
        $this->throwSyntaxError('Unterminated block comment');
389
    }
390
391
    /**
392
     * Skip a comment, whether inline or block-level, assuming this is one.
393
     */
394 57
    private function comment()
395
    {
396
        // Comments always begin with a / character.
397 57
        $this->nextOrFail('/');
398
399 57
        if ($this->currentByte === '/') {
400 36
            $this->inlineComment();
401 35
        } elseif ($this->currentByte === '*') {
402 21
            $this->blockComment();
403 6
        } else {
404 3
            $this->throwSyntaxError('Unrecognized comment');
405
        }
406 51
    }
407
408
    /**
409
     * Skip whitespace and comments.
410
     *
411
     * Note that we're detecting comments by only a single / character.
412
     * This works since regular expressions are not valid JSON(5), but this will
413
     * break if there are other valid values that begin with a / character!
414
     */
415 351
    private function white()
416
    {
417 351
        while ($this->currentByte !== null) {
418 348
            if ($this->currentByte === '/') {
419 57
                $this->comment();
420 341
            } elseif (\preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) {
421 243
                $this->next();
422 339
            } elseif (\ord($this->currentByte) === 0xC2 && \ord($this->peek()) === 0xA0) {
423
                // Non-breaking space in UTF-8
424 3
                $this->next();
425 3
                $this->next();
426 1
            } else {
427 339
                return;
428
            }
429 101
        }
430 243
    }
431
432
    /**
433
     * Matches true, false, null, etc
434
     */
435 83
    private function word()
436
    {
437 83
        switch ($this->currentByte) {
438 83
            case 't':
439 26
                $this->nextOrFail('t');
440 26
                $this->nextOrFail('r');
441 26
                $this->nextOrFail('u');
442 26
                $this->nextOrFail('e');
443 26
                return true;
444 66
            case 'f':
445 12
                $this->nextOrFail('f');
446 12
                $this->nextOrFail('a');
447 12
                $this->nextOrFail('l');
448 12
                $this->nextOrFail('s');
449 12
                $this->nextOrFail('e');
450 12
                return false;
451 55
            case 'n':
452 16
                $this->nextOrFail('n');
453 16
                $this->nextOrFail('u');
454 16
                $this->nextOrFail('l');
455 16
                $this->nextOrFail('l');
456 16
                return null;
457 39
            case 'I':
458 18
                $this->nextOrFail('I');
459 18
                $this->nextOrFail('n');
460 15
                $this->nextOrFail('f');
461 12
                $this->nextOrFail('i');
462 12
                $this->nextOrFail('n');
463 12
                $this->nextOrFail('i');
464 12
                $this->nextOrFail('t');
465 12
                $this->nextOrFail('y');
466 12
                return INF;
467 21
            case 'N':
468 12
                $this->nextOrFail('N');
469 12
                $this->nextOrFail('a');
470 6
                $this->nextOrFail('N');
471 6
                return NAN;
472 3
        }
473
474 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
475
    }
476
477 35
    private function arr()
478
    {
479 35
        $arr = [];
480
481 35
        if (++$this->depth > $this->maxDepth) {
482 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
483
        }
484
485 35
        $this->nextOrFail('[');
486 35
        $this->white();
487 35
        while ($this->currentByte !== null) {
488 32
            if ($this->currentByte === ']') {
489 10
                $this->nextOrFail(']');
490 10
                $this->depth--;
491 10
                return $arr; // Potentially empty array
492
            }
493
            // ES5 allows omitting elements in arrays, e.g. [,] and
494
            // [,null]. We don't allow this in JSON5.
495 31
            if ($this->currentByte === ',') {
496 6
                $this->throwSyntaxError('Missing array element');
497
            }
498
499 25
            $arr[] = $this->value();
500
501 22
            $this->white();
502
            // If there's no comma after this value, this needs to
503
            // be the end of the array.
504 22
            if ($this->currentByte !== ',') {
505 13
                $this->nextOrFail(']');
506 10
                $this->depth--;
507 10
                return $arr;
508
            }
509 11
            $this->nextOrFail(',');
510 11
            $this->white();
511 5
        }
512
513 3
        $this->throwSyntaxError('Invalid array');
514
    }
515
516
    /**
517
     * Parse an object value
518
     */
519 85
    private function obj()
520
    {
521 85
        $object = $this->associative ? [] : new \stdClass;
522
523 85
        if (++$this->depth > $this->maxDepth) {
524 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
525
        }
526
527 85
        $this->nextOrFail('{');
528 85
        $this->white();
529 85
        while ($this->currentByte !== null) {
530 79
            if ($this->currentByte === '}') {
531 19
                $this->nextOrFail('}');
532 19
                $this->depth--;
533 19
                return $object; // Potentially empty object
534
            }
535
536
            // Keys can be unquoted. If they are, they need to be
537
            // valid JS identifiers.
538 69
            if ($this->currentByte === '"' || $this->currentByte === "'") {
539 30
                $key = $this->string();
540 14
            } else {
541 42
                $key = $this->identifier();
542
            }
543
544 60
            $this->white();
545 60
            $this->nextOrFail(':');
546 57
            if ($this->associative) {
547 42
                $object[$key] = $this->value();
548 16
            } else {
549 56
                $object->{$key} = $this->value();
550
            }
551 54
            $this->white();
552
            // If there's no comma after this pair, this needs to be
553
            // the end of the object.
554 54
            if ($this->currentByte !== ',') {
555 45
                $this->nextOrFail('}');
556 39
                $this->depth--;
557 39
                return $object;
558
            }
559 17
            $this->nextOrFail(',');
560 17
            $this->white();
561 7
        }
562
563 6
        $this->throwSyntaxError('Invalid object');
564
    }
565
566
    /**
567
     * Parse a JSON value.
568
     *
569
     * It could be an object, an array, a string, a number,
570
     * or a word.
571
     */
572 351
    private function value()
573
    {
574 351
        $this->white();
575 348
        switch ($this->currentByte) {
576 348
            case '{':
577 85
                return $this->obj();
578 317
            case '[':
579 35
                return $this->arr();
580 304
            case '"':
581 284
            case "'":
582 65
                return $this->string();
583 246
            case '-':
584 226
            case '+':
585 192
            case '.':
586 98
                return $this->number();
587 65
            default:
588 151
                return \is_numeric($this->currentByte) ? $this->number() : $this->word();
589 65
        }
590
    }
591
592 129
    private function throwSyntaxError($message)
593
    {
594
        // Calculate the column number
595 129
        $str = \substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt);
596 129
        $column = \mb_strlen($str) + 1;
597
598 129
        throw new SyntaxError($message, $this->lineNumber, $column);
599
    }
600
601 33
    private static function renderChar($chr)
602
    {
603 33
        return $chr === null ? 'EOF' : "'" . $chr . "'";
604
    }
605
606
    /**
607
     * @param string $ch
608
     *
609
     * @return string|null
610
     */
611 18
    private static function getEscapee($ch)
612
    {
613 6
        switch ($ch) {
614
            // @codingStandardsIgnoreStart
615 18
            case "'":  return "'";
616 15
            case '"':  return '"';
617 15
            case '\\': return '\\';
618 15
            case '/':  return '/';
619 15
            case "\n": return '';
620 6
            case 'b':  return \chr(8);
621 6
            case 'f':  return "\f";
622 6
            case 'n':  return "\n";
623 6
            case 'r':  return "\r";
624 6
            case 't':  return "\t";
625 3
            default:   return null;
626
            // @codingStandardsIgnoreEnd
627 1
        }
628
    }
629
}
630