Completed
Push — master ( 2bb3fb...2d1db6 )
by Colin
7s
created

Json5Decoder::nextOrFail()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 2.0054

Importance

Changes 0
Metric Value
cc 2
eloc 7
c 0
b 0
f 0
nc 2
nop 1
dl 0
loc 12
ccs 8
cts 9
cp 0.8889
crap 2.0054
rs 9.4285
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $at = 0;
20
21
    private $lineNumber = 1;
22
23
    private $columnNumber = 1;
24
25
    private $ch;
26
27
    private $chArr;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $length;
38
39
    private $remainderCache;
40
41
    private $remainderCacheAt;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 396
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 396
        $this->associative = $associative;
54 396
        $this->maxDepth = $depth;
55 396
        $this->castBigIntToString = $castBigIntToString;
56
57 396
        $this->length = mb_strlen($json, 'utf-8');
58
59 396
        $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
60 396
        $this->ch = $this->charAt(0);
61
62 396
        $this->remainderCache = $json;
63 396
        $this->remainderCacheAt = 0;
64 396
    }
65
66
    /**
67
     * Takes a JSON encoded string and converts it into a PHP variable.
68
     *
69
     * The parameters exactly match PHP's json_decode() function - see
70
     * http://php.net/manual/en/function.json-decode.php for more information.
71
     *
72
     * @param string $source      The JSON string being decoded.
73
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
74
     * @param int    $depth       User specified recursion depth.
75
     * @param int    $options     Bitmask of JSON decode options.
76
     *
77
     * @return mixed
78
     */
79 396
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
80
    {
81
        // Try parsing with json_decode first, since that's much faster
82
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
83 396
        if (PHP_VERSION_ID >= 700000) {
84
            $result = json_decode($source, $associative, $depth, $options);
85
            if (json_last_error() === JSON_ERROR_NONE) {
86
                return $result;
87
            }
88
        }
89
90
        // Fall back to JSON5 if that fails
91 396
        $associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY);
92 396
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
93
94 396
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
95
96 396
        $result = $decoder->value();
97 303
        $decoder->white();
98 300
        if ($decoder->ch) {
99 18
            $decoder->throwSyntaxError('Syntax error');
100
        }
101
102 282
        return $result;
103
    }
104
105
    /**
106
     * @param int $at
107
     *
108
     * @return string|null
109
     */
110 396
    private function charAt($at)
111
    {
112 396
        if ($at >= $this->length) {
113 297
            return null;
114
        }
115
116 393
        return $this->chArr[$at];
117
    }
118
119
    /**
120
     * Parse the next character.
121
     *
122
     * @return null|string
123
     */
124 366
    private function next()
125
    {
126
        // Get the next character. When there are no more characters,
127
        // return the empty string.
128 366
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
129 282
            $this->at++;
130 282
            $this->lineNumber++;
131 282
            $this->columnNumber = 1;
132 188
        } else {
133 327
            $this->at++;
134 327
            $this->columnNumber++;
135
        }
136
137 366
        $this->ch = $this->charAt($this->at);
138
139 366
        return $this->ch;
140
    }
141
142
    /**
143
     * Parse the next character if it matches $c or fail.
144
     *
145
     * @param string $c
146
     *
147
     * @return string|null
148
     */
149 168
    private function nextOrFail($c)
150
    {
151 168
        if ($c !== $this->ch) {
152 18
            $this->throwSyntaxError(sprintf(
153 18
                'Expected %s instead of %s',
154 18
                self::renderChar($c),
155 18
                self::renderChar($this->ch)
156 12
            ));
157
        }
158
159 168
        return $this->next();
160
    }
161
162
    /**
163
     * Get the next character without consuming it or
164
     * assigning it to the ch variable.
165
     *
166
     * @return mixed
167
     */
168 36
    private function peek()
169
    {
170 36
        return $this->charAt($this->at + 1);
171
    }
172
173
    /**
174
     * Attempt to match a regular expression at the current position on the current line.
175
     *
176
     * This function will not match across multiple lines.
177
     *
178
     * @param string $regex
179
     *
180
     * @return string|null
181
     */
182 210
    private function match($regex)
183
    {
184 210
        $subject = $this->getRemainder();
185
186 210
        $matches = [];
187 210
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
188 15
            return null;
189
        }
190
191
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
192 195
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
193
194
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
195
        // [0][1] contains the index of that match
196 195
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
197
198 195
        $this->at += $advanceBy;
199 195
        $this->columnNumber += $advanceBy;
200 195
        $this->ch = $this->charAt($this->at);
201
202 195
        return $matches[0][0];
203
    }
204
205
    /**
206
     * Parse an identifier.
207
     *
208
     * Normally, reserved words are disallowed here, but we
209
     * only use this for unquoted object keys, where reserved words are allowed,
210
     * so we don't check for those here. References:
211
     * - http://es5.github.com/#x7.6
212
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
213
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
214
     */
215 42
    private function identifier()
216
    {
217
        // @codingStandardsIgnoreStart
218
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
219 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
220
        // @codingStandardsIgnoreEnd
221
222 42
        if ($match === null) {
223 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
224
        }
225
226
        // Un-escape escaped Unicode chars
227 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
228 6
            return json_decode('"'.$m[0].'"');
229 33
        }, $match);
230
231 33
        return $unescaped;
232
    }
233
234 210
    private function number()
235
    {
236 210
        $number = null;
237 210
        $sign = '';
238 210
        $string = '';
239 210
        $base = 10;
240
241 210
        if ($this->ch === '-' || $this->ch === '+') {
242 93
            $sign = $this->ch;
243 93
            $this->next();
244 62
        }
245
246
        // support for Infinity
247 210
        if ($this->ch === 'I') {
248 6
            $number = $this->word();
249 6
            if ($number === null) {
250
                $this->throwSyntaxError('Unexpected word for number');
251
            }
252
253 6
            return ($sign === '-') ? -INF : INF;
254
        }
255
256
        // support for NaN
257 204
        if ($this->ch === 'N') {
258
            $number = $this->word();
259
            if ($number !== NAN) {
260
                $this->throwSyntaxError('expected word to be NaN');
261
            }
262
263
            // ignore sign as -NaN also is NaN
264
            return $number;
265
        }
266
267 204
        if ($this->ch === '0') {
268 105
            $string .= $this->ch;
269 105
            $this->next();
270 105
            if ($this->ch === 'x' || $this->ch === 'X') {
271 33
                $string .= $this->ch;
272 33
                $this->next();
273 33
                $base = 16;
274 94
            } elseif (is_numeric($this->ch)) {
275 30
                $this->throwSyntaxError('Octal literal');
276
            }
277 50
        }
278
279
        switch ($base) {
280 174
            case 10:
281 144 View Code Duplication
                if ((is_numeric($this->ch) || $this->ch === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
282 129
                    $string .= $match;
283 86
                }
284 144 View Code Duplication
                if (($this->ch === 'E' || $this->ch === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
285 45
                    $string .= $match;
286 30
                }
287 144
                $number = $string;
288 144
                break;
289 33
            case 16:
290 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
291 30
                    $string .= $match;
292 30
                    $number = hexdec($string);
293 30
                    break;
294
                }
295 3
                $this->throwSyntaxError('Bad hex number');
296
        }
297
298 171
        if ($sign === '-') {
299 33
            $number = -$number;
300 22
        }
301
302 171
        if (!is_numeric($number) || !is_finite($number)) {
303 3
            $this->throwSyntaxError('Bad number');
304
        }
305
306 168
        if ($this->castBigIntToString) {
307 3
            return $number;
308
        }
309
310
        // Adding 0 will automatically cast this to an int or float
311 165
        return $number + 0;
312
    }
313
314 93
    private function string()
315
    {
316 93
        $string = '';
317
318 93
        $delim = $this->ch;
319 93
        $this->next();
320 93
        while ($this->ch !== null) {
321 93
            if ($this->ch === $delim) {
322 87
                $this->next();
323
324 87
                return $string;
325
            }
326
327 93
            if ($this->ch === '\\') {
328 30
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
329 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
330 6
                    continue;
331
                }
332
333 24
                $this->next();
334 24
                if ($this->ch === "\r") {
335 6
                    if ($this->peek() === "\n") {
336 4
                        $this->next();
337 2
                    }
338 22
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
339 15
                    $string .= $escapee;
340 10
                } else {
341 10
                    break;
342
                }
343 93
            } elseif ($this->ch === "\n") {
344
                // unescaped newlines are invalid; see:
345
                // https://github.com/json5/json5/issues/24
346
                // @todo this feels special-cased; are there other invalid unescaped chars?
347 3
                break;
348
            } else {
349 93
                $string .= $this->ch;
350
            }
351
352 93
            $this->next();
353 62
        }
354
355 6
        $this->throwSyntaxError('Bad string');
356
    }
357
358
    /**
359
     * Skip an inline comment, assuming this is one.
360
     *
361
     * The current character should be the second / character in the // pair that begins this inline comment.
362
     * To finish the inline comment, we look for a newline or the end of the text.
363
     */
364 36
    private function inlineComment()
365
    {
366
        do {
367 36
            $this->next();
368 36
            if ($this->ch === "\n" || $this->ch === "\r") {
369 33
                $this->next();
370
371 33
                return;
372
            }
373 36
        } while ($this->ch !== null);
374 3
    }
375
376
    /**
377
     * Skip a block comment, assuming this is one.
378
     *
379
     * The current character should be the * character in the /* pair that begins this block comment.
380
     * To finish the block comment, we look for an ending *​/ pair of characters,
381
     * but we also watch for the end of text before the comment is terminated.
382
     */
383 21
    private function blockComment()
384
    {
385
        do {
386 21
            $this->next();
387 21
            while ($this->ch === '*') {
388 18
                $this->nextOrFail('*');
389 18
                if ($this->ch === '/') {
390 18
                    $this->nextOrFail('/');
391
392 18
                    return;
393
                }
394 2
            }
395 21
        } while ($this->ch !== null);
396
397 3
        $this->throwSyntaxError('Unterminated block comment');
398
    }
399
400
    /**
401
     * Skip a comment, whether inline or block-level, assuming this is one.
402
     */
403 57
    private function comment()
404
    {
405
        // Comments always begin with a / character.
406 57
        $this->nextOrFail('/');
407
408 57
        if ($this->ch === '/') {
409 36
            $this->inlineComment();
410 46
        } elseif ($this->ch === '*') {
411 21
            $this->blockComment();
412 12
        } else {
413 3
            $this->throwSyntaxError('Unrecognized comment');
414
        }
415 51
    }
416
417
    /**
418
     * Skip whitespace and comments.
419
     *
420
     * Note that we're detecting comments by only a single / character.
421
     * This works since regular expressions are not valid JSON(5), but this will
422
     * break if there are other valid values that begin with a / character!
423
     */
424 396
    private function white()
425
    {
426 396
        while ($this->ch !== null) {
427 393
            if ($this->ch === '/') {
428 57
                $this->comment();
429 388
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->ch) === 1) {
430 291
                $this->next();
431 194
            } else {
432 384
                return;
433
            }
434 198
        }
435 294
    }
436
437
    /**
438
     * Matches true, false, null, etc
439
     */
440 84
    private function word()
441
    {
442 84
        switch ($this->ch) {
443 84
            case 't':
444 36
                $this->nextOrFail('t');
445 36
                $this->nextOrFail('r');
446 36
                $this->nextOrFail('u');
447 36
                $this->nextOrFail('e');
448 36
                return true;
449 63
            case 'f':
450 18
                $this->nextOrFail('f');
451 18
                $this->nextOrFail('a');
452 18
                $this->nextOrFail('l');
453 18
                $this->nextOrFail('s');
454 18
                $this->nextOrFail('e');
455 18
                return false;
456 48
            case 'n':
457 18
                $this->nextOrFail('n');
458 18
                $this->nextOrFail('u');
459 18
                $this->nextOrFail('l');
460 18
                $this->nextOrFail('l');
461 18
                return null;
462 30
            case 'I':
463 15
                $this->nextOrFail('I');
464 15
                $this->nextOrFail('n');
465 12
                $this->nextOrFail('f');
466 12
                $this->nextOrFail('i');
467 12
                $this->nextOrFail('n');
468 12
                $this->nextOrFail('i');
469 12
                $this->nextOrFail('t');
470 12
                $this->nextOrFail('y');
471 12
                return INF;
472 15
            case 'N':
473 6
                $this->nextOrFail('N');
474 6
                $this->nextOrFail('a');
475 3
                $this->nextOrFail('N');
476 3
                return NAN;
477 6
        }
478
479 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
480
    }
481
482 42
    private function arr()
483
    {
484 42
        $arr = [];
485
486 42
        if ($this->ch === '[') {
487 42
            if (++$this->depth > $this->maxDepth) {
488 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
489
            }
490
491 42
            $this->nextOrFail('[');
492 42
            $this->white();
493 42
            while ($this->ch !== null) {
494 42
                if ($this->ch === ']') {
495 12
                    $this->nextOrFail(']');
496 12
                    $this->depth--;
497 12
                    return $arr; // Potentially empty array
498
                }
499
                // ES5 allows omitting elements in arrays, e.g. [,] and
500
                // [,null]. We don't allow this in JSON5.
501 39
                if ($this->ch === ',') {
502 6
                    $this->throwSyntaxError('Missing array element');
503
                }
504
505 33
                $arr[] = $this->value();
506
507 30
                $this->white();
508
                // If there's no comma after this value, this needs to
509
                // be the end of the array.
510 30
                if ($this->ch !== ',') {
511 21
                    $this->nextOrFail(']');
512 18
                    $this->depth--;
513 18
                    return $arr;
514
                }
515 15
                $this->nextOrFail(',');
516 15
                $this->white();
517 10
            }
518
        }
519
520
        $this->throwSyntaxError('Bad array');
521
    }
522
523
    /**
524
     * Parse an object value
525
     */
526 90
    private function obj()
527
    {
528 90
        $object = $this->associative ? [] : new \stdClass;
529
530 90
        if ($this->ch === '{') {
531 90
            if (++$this->depth > $this->maxDepth) {
532 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
533
            }
534
535 90
            $this->nextOrFail('{');
536 90
            $this->white();
537 90
            while ($this->ch) {
538 90
                if ($this->ch === '}') {
539 21
                    $this->nextOrFail('}');
540 21
                    $this->depth--;
541 21
                    return $object; // Potentially empty object
542
                }
543
544
                // Keys can be unquoted. If they are, they need to be
545
                // valid JS identifiers.
546 78
                if ($this->ch === '"' || $this->ch === "'") {
547 39
                    $key = $this->string();
548 26
                } else {
549 42
                    $key = $this->identifier();
550
                }
551
552 69
                $this->white();
553 69
                $this->nextOrFail(':');
554 66
                if ($this->associative) {
555 45
                    $object[$key] = $this->value();
556 30
                } else {
557 66
                    $object->{$key} = $this->value();
558
                }
559 63
                $this->white();
560
                // If there's no comma after this pair, this needs to be
561
                // the end of the object.
562 63
                if ($this->ch !== ',') {
563 54
                    $this->nextOrFail('}');
564 48
                    $this->depth--;
565 48
                    return $object;
566
                }
567 21
                $this->nextOrFail(',');
568 21
                $this->white();
569 14
            }
570
        }
571
572
        $this->throwSyntaxError('Bad object');
573
    }
574
575
    /**
576
     * Parse a JSON value.
577
     *
578
     * It could be an object, an array, a string, a number,
579
     * or a word.
580
     */
581 396
    private function value()
582
    {
583 396
        $this->white();
584 393
        switch ($this->ch) {
585 393
            case '{':
586 90
                return $this->obj();
587 366
            case '[':
588 42
                return $this->arr();
589 354
            case '"':
590 338
            case "'":
591 78
                return $this->string();
592 285
            case '-':
593 270
            case '+':
594 255
            case '.':
595 102
                return $this->number();
596 124
            default:
597 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
598 124
        }
599
    }
600
601 114
    private function throwSyntaxError($message)
602
    {
603 114
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
604
    }
605
606 27
    private static function renderChar($chr)
607
    {
608 27
        return $chr === null ? 'EOF' : "'" . $chr . "'";
609
    }
610
611
    /**
612
     * @param string $ch
613
     *
614
     * @return string|null
615
     */
616 18
    private static function getEscapee($ch)
617
    {
618
        switch ($ch) {
619
            // @codingStandardsIgnoreStart
620 18
            case "'":  return "'";
621 15
            case '"':  return '"';
622 15
            case '\\': return '\\';
623 15
            case '/':  return '/';
624 15
            case "\n": return '';
625 6
            case 'b':  return chr(8);
626 6
            case 'f':  return "\f";
627 6
            case 'n':  return "\n";
628 6
            case 'r':  return "\r";
629 6
            case 't':  return "\t";
630 3
            default:   return null;
631
            // @codingStandardsIgnoreEnd
632 2
        }
633
    }
634
635
    /**
636
     * Returns everything from $this->at onwards.
637
     *
638
     * Utilizes a cache so we don't have to continuously parse through UTF-8
639
     * data that was earlier in the string which we don't even care about.
640
     *
641
     * @return string
642
     */
643 210
    private function getRemainder()
644
    {
645 210
        if ($this->remainderCacheAt === $this->at) {
646 66
            return $this->remainderCache;
647
        }
648
649 186
        $subject = mb_substr($this->remainderCache, $this->at - $this->remainderCacheAt);
650 186
        $this->remainderCache = $subject;
651 186
        $this->remainderCacheAt = $this->at;
652
653 186
        return $subject;
654
    }
655
}
656