Completed
Push — master ( 1cbf3e...8c73e1 )
by Colin
01:59
created

Json5Decoder::decode()   B

Complexity

Conditions 6
Paths 13

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 7.3329

Importance

Changes 0
Metric Value
cc 6
nc 13
nop 4
dl 0
loc 25
ccs 10
cts 15
cp 0.6667
crap 7.3329
rs 8.8977
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $length;
22
23
    private $at = 0;
24
25
    private $currentByte;
26
27
    private $lineNumber = 1;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $currentLineStartsAt = 0;
38
39
    /**
40
     * Private constructor.
41
     *
42
     * @param string $json
43
     * @param bool   $associative
44
     * @param int    $depth
45
     * @param bool   $castBigIntToString
46
     */
47 420
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
48
    {
49 420
        $this->json = $json;
50 420
        $this->associative = $associative;
51 420
        $this->maxDepth = $depth;
52 420
        $this->castBigIntToString = $castBigIntToString;
53
54 420
        $this->length = strlen($json);
55 420
        $this->currentByte = $this->getByte(0);
56 420
    }
57
58
    /**
59
     * Takes a JSON encoded string and converts it into a PHP variable.
60
     *
61
     * The parameters exactly match PHP's json_decode() function - see
62
     * http://php.net/manual/en/function.json-decode.php for more information.
63
     *
64
     * @param string $source      The JSON string being decoded.
65
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
66
     * @param int    $depth       User specified recursion depth.
67
     * @param int    $options     Bitmask of JSON decode options.
68
     *
69
     * @return mixed
70
     */
71 420
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
72
    {
73
        // Try parsing with json_decode first, since that's much faster
74
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
75 420
        if (PHP_VERSION_ID >= 70000) {
76
            $result = json_decode($source, $associative, $depth, $options);
77
            if (json_last_error() === JSON_ERROR_NONE) {
78
                return $result;
79
            }
80
        }
81
82
        // Fall back to JSON5 if that fails
83 420
        $associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY);
84 420
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
85
86 420
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
87
88 420
        $result = $decoder->value();
89 312
        $decoder->white();
90 309
        if ($decoder->currentByte) {
91 18
            $decoder->throwSyntaxError('Syntax error');
92
        }
93
94 291
        return $result;
95
    }
96
97
    /**
98
     * @param int $at
99
     *
100
     * @return null
101
     */
102 420
    private function getByte($at)
103
    {
104 420
        if ($at >= $this->length) {
105 315
            return null;
106
        }
107
108 417
        return $this->json[$at];
109
    }
110
111
    /**
112
     * @return string|null
113
     */
114 33
    private function currentChar()
115
    {
116 33
        if ($this->at >= $this->length) {
117 12
            return null;
118
        }
119
120 21
        return mb_substr(substr($this->json, $this->at, 4), 0, 1);
121
    }
122
123
    /**
124
     * Parse the next character.
125
     *
126
     * @return null|string
127
     */
128 390
    private function next()
129
    {
130
        // Get the next character. When there are no more characters,
131
        // return the empty string.
132 390
        if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
133 282
            $this->lineNumber++;
134 282
            $this->currentLineStartsAt = $this->at + 1;
135 282
        }
136
137 390
        $this->at++;
138
        
139 390
        return $this->currentByte = $this->getByte($this->at);
140
    }
141
142
    /**
143
     * Parse the next character if it matches $c or fail.
144
     *
145
     * @param string $c
146
     *
147
     * @return string|null
148
     */
149 186
    private function nextOrFail($c)
150
    {
151 186
        if ($c !== $this->currentByte) {
152 24
            $this->throwSyntaxError(sprintf(
153 24
                'Expected %s instead of %s',
154 24
                self::renderChar($c),
155 24
                self::renderChar($this->currentChar())
156 24
            ));
157
        }
158
159 186
        return $this->next();
160
    }
161
162
    /**
163
     * Get the next character without consuming it or
164
     * assigning it to the ch variable.
165
     *
166
     * @return mixed
167
     */
168 39
    private function peek()
169
    {
170 39
        return $this->getByte($this->at + 1);
171
    }
172
173
    /**
174
     * Attempt to match a regular expression at the current position on the current line.
175
     *
176
     * This function will not match across multiple lines.
177
     *
178
     * @param string $regex
179
     *
180
     * @return string|null
181
     */
182 216
    private function match($regex)
183
    {
184 216
        $subject = substr($this->json, $this->at);
185
        // Only match on the current line
186 216
        if ($pos = strpos($subject, "\n")) {
187 198
            $subject = substr($subject, 0, $pos);
188 198
        }
189
190 216
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
191 15
            return null;
192
        }
193
194 201
        $this->at += $matches[0][1] + strlen($matches[0][0]);
195 201
        $this->currentByte = $this->getByte($this->at);
196
197 201
        return $matches[0][0];
198
    }
199
200
    /**
201
     * Parse an identifier.
202
     *
203
     * Normally, reserved words are disallowed here, but we
204
     * only use this for unquoted object keys, where reserved words are allowed,
205
     * so we don't check for those here. References:
206
     * - http://es5.github.com/#x7.6
207
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
208
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
209
     */
210 42
    private function identifier()
211
    {
212
        // @codingStandardsIgnoreStart
213
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
214 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
215
        // @codingStandardsIgnoreEnd
216
217 42
        if ($match === null) {
218 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
219
        }
220
221
        // Un-escape escaped Unicode chars
222 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
223 6
            return json_decode('"'.$m[0].'"');
224 33
        }, $match);
225
226 33
        return $unescaped;
227
    }
228
229 222
    private function number()
230
    {
231 222
        $number = null;
232 222
        $sign = '';
233 222
        $string = '';
234 222
        $base = 10;
235
236 222
        if ($this->currentByte === '-' || $this->currentByte === '+') {
237 99
            $sign = $this->currentByte;
238 99
            $this->next();
239 99
        }
240
241
        // support for Infinity
242 222
        if ($this->currentByte === 'I') {
243 9
            $this->word();
244
245 6
            return ($sign === '-') ? -INF : INF;
246
        }
247
248
        // support for NaN
249 213
        if ($this->currentByte === 'N') {
250 3
            $number = $this->word();
251
252
            // ignore sign as -NaN also is NaN
253 3
            return $number;
254
        }
255
256 210
        if ($this->currentByte === '0') {
257 105
            $string .= $this->currentByte;
258 105
            $this->next();
259 105
            if ($this->currentByte === 'x' || $this->currentByte === 'X') {
260 33
                $string .= $this->currentByte;
261 33
                $this->next();
262 33
                $base = 16;
263 105
            } elseif (is_numeric($this->currentByte)) {
264 30
                $this->throwSyntaxError('Octal literal');
265
            }
266 75
        }
267
268
        switch ($base) {
269 180
            case 10:
270
                // @codingStandardsIgnoreStart
271 150 View Code Duplication
                if ((is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
272 135
                    $string .= $match;
273 135
                }
274 150 View Code Duplication
                if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
275 45
                    $string .= $match;
276 45
                }
277
                // @codingStandardsIgnoreEnd
278 150
                $number = $string;
279 150
                break;
280 33
            case 16:
281 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
282 30
                    $string .= $match;
283 30
                    $number = hexdec($string);
284 30
                    break;
285
                }
286 3
                $this->throwSyntaxError('Bad hex number');
287
        }
288
289 177
        if ($sign === '-') {
290 33
            $number = -$number;
291 33
        }
292
293 177
        if (!is_numeric($number) || !is_finite($number)) {
294 3
            $this->throwSyntaxError('Bad number');
295
        }
296
297 174
        if ($this->castBigIntToString) {
298 3
            return $number;
299
        }
300
301
        // Adding 0 will automatically cast this to an int or float
302 171
        return $number + 0;
303
    }
304
305 93
    private function string()
306
    {
307 93
        $string = '';
308
309 93
        $delim = $this->currentByte;
310 93
        $this->next();
311 93
        while ($this->currentByte !== null) {
312 93
            if ($this->currentByte === $delim) {
313 87
                $this->next();
314
315 87
                return $string;
316
            }
317
318 93
            if ($this->currentByte === '\\') {
319 30
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
320 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
321 6
                    continue;
322
                }
323
324 24
                $this->next();
325 24
                if ($this->currentByte === "\r") {
326 6
                    if ($this->peek() === "\n") {
327 3
                        $this->next();
328 3
                    }
329 24
                } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
330 15
                    $string .= $escapee;
331 15
                } else {
332 3
                    break;
333
                }
334 93
            } elseif ($this->currentByte === "\n") {
335
                // unescaped newlines are invalid; see:
336
                // https://github.com/json5/json5/issues/24
337
                // @todo this feels special-cased; are there other invalid unescaped chars?
338 3
                break;
339
            } else {
340 93
                $string .= $this->currentByte;
341
            }
342
343 93
            $this->next();
344 93
        }
345
346 6
        $this->throwSyntaxError('Bad string');
347
    }
348
349
    /**
350
     * Skip an inline comment, assuming this is one.
351
     *
352
     * The current character should be the second / character in the // pair that begins this inline comment.
353
     * To finish the inline comment, we look for a newline or the end of the text.
354
     */
355 36
    private function inlineComment()
356
    {
357
        do {
358 36
            $this->next();
359 36
            if ($this->currentByte === "\n" || $this->currentByte === "\r") {
360 33
                $this->next();
361
362 33
                return;
363
            }
364 36
        } while ($this->currentByte !== null);
365 3
    }
366
367
    /**
368
     * Skip a block comment, assuming this is one.
369
     *
370
     * The current character should be the * character in the /* pair that begins this block comment.
371
     * To finish the block comment, we look for an ending *​/ pair of characters,
372
     * but we also watch for the end of text before the comment is terminated.
373
     */
374 21
    private function blockComment()
375
    {
376
        do {
377 21
            $this->next();
378 21
            while ($this->currentByte === '*') {
379 18
                $this->nextOrFail('*');
380 18
                if ($this->currentByte === '/') {
381 18
                    $this->nextOrFail('/');
382
383 18
                    return;
384
                }
385 3
            }
386 21
        } while ($this->currentByte !== null);
387
388 3
        $this->throwSyntaxError('Unterminated block comment');
389
    }
390
391
    /**
392
     * Skip a comment, whether inline or block-level, assuming this is one.
393
     */
394 57
    private function comment()
395
    {
396
        // Comments always begin with a / character.
397 57
        $this->nextOrFail('/');
398
399 57
        if ($this->currentByte === '/') {
400 36
            $this->inlineComment();
401 57
        } elseif ($this->currentByte === '*') {
402 21
            $this->blockComment();
403 18
        } else {
404 3
            $this->throwSyntaxError('Unrecognized comment');
405
        }
406 51
    }
407
408
    /**
409
     * Skip whitespace and comments.
410
     *
411
     * Note that we're detecting comments by only a single / character.
412
     * This works since regular expressions are not valid JSON(5), but this will
413
     * break if there are other valid values that begin with a / character!
414
     */
415 420
    private function white()
416
    {
417 420
        while ($this->currentByte !== null) {
418 417
            if ($this->currentByte === '/') {
419 57
                $this->comment();
420 414
            } elseif (preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) {
421 297
                $this->next();
422 408
            } elseif (ord($this->currentByte) === 0xC2 && ord($this->peek()) === 0xA0) {
423
                // Non-breaking space in UTF-8
424 3
                $this->next();
425 3
                $this->next();
426 3
            } else {
427 408
                return;
428
            }
429 303
        }
430 312
    }
431
432
    /**
433
     * Matches true, false, null, etc
434
     */
435 93
    private function word()
436
    {
437 93
        switch ($this->currentByte) {
438 93
            case 't':
439 36
                $this->nextOrFail('t');
440 36
                $this->nextOrFail('r');
441 36
                $this->nextOrFail('u');
442 36
                $this->nextOrFail('e');
443 36
                return true;
444 72
            case 'f':
445 18
                $this->nextOrFail('f');
446 18
                $this->nextOrFail('a');
447 18
                $this->nextOrFail('l');
448 18
                $this->nextOrFail('s');
449 18
                $this->nextOrFail('e');
450 18
                return false;
451 57
            case 'n':
452 18
                $this->nextOrFail('n');
453 18
                $this->nextOrFail('u');
454 18
                $this->nextOrFail('l');
455 18
                $this->nextOrFail('l');
456 18
                return null;
457 39
            case 'I':
458 18
                $this->nextOrFail('I');
459 18
                $this->nextOrFail('n');
460 15
                $this->nextOrFail('f');
461 12
                $this->nextOrFail('i');
462 12
                $this->nextOrFail('n');
463 12
                $this->nextOrFail('i');
464 12
                $this->nextOrFail('t');
465 12
                $this->nextOrFail('y');
466 12
                return INF;
467 21
            case 'N':
468 12
                $this->nextOrFail('N');
469 12
                $this->nextOrFail('a');
470 6
                $this->nextOrFail('N');
471 6
                return NAN;
472 9
        }
473
474 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
475
    }
476
477 45
    private function arr()
478
    {
479 45
        $arr = [];
480
481 45
        if (++$this->depth > $this->maxDepth) {
482 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
483
        }
484
485 45
        $this->nextOrFail('[');
486 45
        $this->white();
487 45
        while ($this->currentByte !== null) {
488 42
            if ($this->currentByte === ']') {
489 12
                $this->nextOrFail(']');
490 12
                $this->depth--;
491 12
                return $arr; // Potentially empty array
492
            }
493
            // ES5 allows omitting elements in arrays, e.g. [,] and
494
            // [,null]. We don't allow this in JSON5.
495 39
            if ($this->currentByte === ',') {
496 6
                $this->throwSyntaxError('Missing array element');
497
            }
498
499 33
            $arr[] = $this->value();
500
501 30
            $this->white();
502
            // If there's no comma after this value, this needs to
503
            // be the end of the array.
504 30
            if ($this->currentByte !== ',') {
505 21
                $this->nextOrFail(']');
506 18
                $this->depth--;
507 18
                return $arr;
508
            }
509 15
            $this->nextOrFail(',');
510 15
            $this->white();
511 15
        }
512
513 3
        $this->throwSyntaxError('Invalid array');
514
    }
515
516
    /**
517
     * Parse an object value
518
     */
519 96
    private function obj()
520
    {
521 96
        $object = $this->associative ? [] : new \stdClass;
522
523 96
        if (++$this->depth > $this->maxDepth) {
524 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
525
        }
526
527 96
        $this->nextOrFail('{');
528 96
        $this->white();
529 96
        while ($this->currentByte !== null) {
530 90
            if ($this->currentByte === '}') {
531 21
                $this->nextOrFail('}');
532 21
                $this->depth--;
533 21
                return $object; // Potentially empty object
534
            }
535
536
            // Keys can be unquoted. If they are, they need to be
537
            // valid JS identifiers.
538 78
            if ($this->currentByte === '"' || $this->currentByte === "'") {
539 39
                $key = $this->string();
540 39
            } else {
541 42
                $key = $this->identifier();
542
            }
543
544 69
            $this->white();
545 69
            $this->nextOrFail(':');
546 66
            if ($this->associative) {
547 45
                $object[$key] = $this->value();
548 45
            } else {
549 66
                $object->{$key} = $this->value();
550
            }
551 63
            $this->white();
552
            // If there's no comma after this pair, this needs to be
553
            // the end of the object.
554 63
            if ($this->currentByte !== ',') {
555 54
                $this->nextOrFail('}');
556 48
                $this->depth--;
557 48
                return $object;
558
            }
559 21
            $this->nextOrFail(',');
560 21
            $this->white();
561 21
        }
562
563 6
        $this->throwSyntaxError('Invalid object');
564
    }
565
566
    /**
567
     * Parse a JSON value.
568
     *
569
     * It could be an object, an array, a string, a number,
570
     * or a word.
571
     */
572 420
    private function value()
573
    {
574 420
        $this->white();
575 417
        switch ($this->currentByte) {
576 417
            case '{':
577 96
                return $this->obj();
578 384
            case '[':
579 45
                return $this->arr();
580 369
            case '"':
581 369
            case "'":
582 78
                return $this->string();
583 300
            case '-':
584 300
            case '+':
585 300
            case '.':
586 108
                return $this->number();
587 195
            default:
588 195
                return is_numeric($this->currentByte) ? $this->number() : $this->word();
589 195
        }
590
    }
591
592 129
    private function throwSyntaxError($message)
593
    {
594
        // Calculate the column number
595 129
        $str = substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt);
596 129
        $column = mb_strlen($str) + 1;
597
598 129
        throw new SyntaxError($message, $this->lineNumber, $column);
599
    }
600
601 33
    private static function renderChar($chr)
602
    {
603 33
        return $chr === null ? 'EOF' : "'" . $chr . "'";
604
    }
605
606
    /**
607
     * @param string $ch
608
     *
609
     * @return string|null
610
     */
611 18
    private static function getEscapee($ch)
612
    {
613
        switch ($ch) {
614
            // @codingStandardsIgnoreStart
615 18
            case "'":  return "'";
616 15
            case '"':  return '"';
617 15
            case '\\': return '\\';
618 15
            case '/':  return '/';
619 15
            case "\n": return '';
620 6
            case 'b':  return chr(8);
621 6
            case 'f':  return "\f";
622 6
            case 'n':  return "\n";
623 6
            case 'r':  return "\r";
624 6
            case 't':  return "\t";
625 3
            default:   return null;
626
            // @codingStandardsIgnoreEnd
627 3
        }
628
    }
629
}
630