Completed
Pull Request — master (#2)
by Colin
01:25
created

Json5Decoder::comment()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 3
eloc 8
c 3
b 0
f 0
nc 3
nop 0
dl 0
loc 13
ccs 9
cts 9
cp 1
crap 3
rs 9.4285
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $at = 0;
20
21
    private $lineNumber = 1;
22
23
    private $columnNumber = 1;
24
25
    private $ch;
26
27
    private $chArr;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $length;
38
39
    private $remainderCache;
40
41
    private $remainderCacheAt;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 223
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 223
        $this->associative = $associative;
54 223
        $this->maxDepth = $depth;
55 223
        $this->castBigIntToString = $castBigIntToString;
56
57 223
        $this->length = mb_strlen($json, 'utf-8');
58
59 223
        $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
60 223
        $this->ch = $this->charAt(0);
61
62 223
        $this->remainderCache = $json;
63 223
        $this->remainderCacheAt = 0;
64 223
    }
65
66
    /**
67
     * Takes a JSON encoded string and converts it into a PHP variable.
68
     *
69
     * The parameters exactly match PHP's json_decode() function - see
70
     * http://php.net/manual/en/function.json-decode.php for more information.
71
     *
72
     * @param string $source      The JSON string being decoded.
73
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
74
     * @param int    $depth       User specified recursion depth.
75
     * @param int    $options     Bitmask of JSON decode options.
76
     *
77
     * @return mixed
78
     */
79 396
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
80
    {
81
        // Try parsing with json_decode first, since that's much faster
82 396
        $result = json_decode($source, $associative, $depth, $options);
83 396
        if (json_last_error() === JSON_ERROR_NONE) {
84 173
            return $result;
85
        }
86
87
        // Fall back to JSON5 if that fails
88 223
        $associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY);
89 223
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
90
91 223
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
92
93 223
        $result = $decoder->value();
94 152
        $decoder->white();
95 149
        if ($decoder->ch) {
96 18
            $decoder->throwSyntaxError('Syntax error');
97
        }
98
99 131
        return $result;
100
    }
101
102
    /**
103
     * @param int $at
104
     *
105
     * @return string|null
106
     */
107 223
    private function charAt($at)
108
    {
109 223
        if ($at >= $this->length) {
110 144
            return null;
111
        }
112
113 222
        return $this->chArr[$at];
114
    }
115
116
    /**
117
     * Parse the next character.
118
     *
119
     * @return null|string
120
     */
121 201
    private function next()
122
    {
123
        // Get the next character. When there are no more characters,
124
        // return the empty string.
125 201
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
126 164
            $this->at++;
127 164
            $this->lineNumber++;
128 164
            $this->columnNumber = 1;
129 94
        } else {
130 197
            $this->at++;
131 197
            $this->columnNumber++;
132
        }
133
134 201
        $this->ch = $this->charAt($this->at);
135
136 201
        return $this->ch;
137
    }
138
139
    /**
140
     * Parse the next character if it matches $c or fail.
141
     *
142
     * @param string $c
143
     *
144
     * @return string|null
145
     */
146 138
    private function nextOrFail($c)
147
    {
148 138
        if ($c !== $this->ch) {
149 18
            $this->throwSyntaxError(sprintf(
150 18
                'Expected %s instead of %s',
151 18
                self::renderChar($c),
152 18
                self::renderChar($this->ch)
153 12
            ));
154
        }
155
156 138
        return $this->next();
157
    }
158
159
    /**
160
     * Get the next character without consuming it or
161
     * assigning it to the ch variable.
162
     *
163
     * @return mixed
164
     */
165 30
    private function peek()
166
    {
167 30
        return $this->charAt($this->at + 1);
168
    }
169
170
    /**
171
     * Attempt to match a regular expression at the current position on the current line.
172
     *
173
     * This function will not match across multiple lines.
174
     *
175
     * @param string $regex
176
     *
177
     * @return string|null
178
     */
179 103
    private function match($regex)
180
    {
181 103
        $subject = $this->getRemainder();
182
183 103
        $matches = [];
184 103
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
185 15
            return null;
186
        }
187
188
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
189 88
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
190
191
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
192
        // [0][1] contains the index of that match
193 88
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
194
195 88
        $this->at += $advanceBy;
196 88
        $this->columnNumber += $advanceBy;
197 88
        $this->ch = $this->charAt($this->at);
198
199 88
        return $matches[0][0];
200
    }
201
202
    /**
203
     * Parse an identifier.
204
     *
205
     * Normally, reserved words are disallowed here, but we
206
     * only use this for unquoted object keys, where reserved words are allowed,
207
     * so we don't check for those here. References:
208
     * - http://es5.github.com/#x7.6
209
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
210
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
211
     */
212 42
    private function identifier()
213
    {
214
        // @codingStandardsIgnoreStart
215
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
216 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
217
        // @codingStandardsIgnoreEnd
218
219 42
        if ($match === null) {
220 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
221
        }
222
223
        // Un-escape escaped Unicode chars
224 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
225 6
            return json_decode('"'.$m[0].'"');
226 33
        }, $match);
227
228 33
        return $unescaped;
229
    }
230
231 78
    private function number()
232
    {
233 78
        $number = null;
234 78
        $sign = '';
235 78
        $string = '';
236 78
        $base = 10;
237
238 78
        if ($this->ch === '-' || $this->ch === '+') {
239 40
            $sign = $this->ch;
240 40
            $this->next($this->ch);
0 ignored issues
show
Unused Code introduced by
The call to Json5Decoder::next() has too many arguments starting with $this->ch.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
241 14
        }
242
243
        // support for Infinity
244 78
        if ($this->ch === 'I') {
245 6
            $number = $this->word();
246 6
            if ($number === null) {
247
                $this->throwSyntaxError('Unexpected word for number');
248
            }
249
250 6
            return ($sign === '-') ? -INF : INF;
251
        }
252
253
        // support for NaN
254 72
        if ($this->ch === 'N') {
255
            $number = $this->word();
256
            if ($number !== NAN) {
257
                $this->throwSyntaxError('expected word to be NaN');
258
            }
259
260
            // ignore sign as -NaN also is NaN
261
            return $number;
262
        }
263
264 72
        if ($this->ch === '0') {
265 39
            $string .= $this->ch;
266 39
            $this->next();
267 39
            if ($this->ch === 'x' || $this->ch === 'X') {
268 23
                $string .= $this->ch;
269 23
                $this->next();
270 23
                $base = 16;
271 28
            } elseif (is_numeric($this->ch)) {
272 10
                $this->throwSyntaxError('Octal literal');
273
            }
274 12
        }
275
276
        switch ($base) {
277 62
            case 10:
278 42 View Code Duplication
                if ((is_numeric($this->ch) || $this->ch === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
279 41
                    $string .= $match;
280 16
                }
281 42 View Code Duplication
                if (($this->ch === 'E' || $this->ch === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
282 19
                    $string .= $match;
283 12
                }
284 42
                $number = $string;
285 42
                break;
286 23
            case 16:
287 23
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
288 20
                    $string .= $match;
289 20
                    $number = hexdec($string);
290 20
                    break;
291
                }
292 3
                $this->throwSyntaxError('Bad hex number');
293
        }
294
295 59
        if ($sign === '-') {
296 10
            $number = -$number;
297 4
        }
298
299 59
        if (!is_numeric($number) || !is_finite($number)) {
300 3
            $this->throwSyntaxError('Bad number');
301
        }
302
303 56
        if ($this->castBigIntToString) {
304
            return $number;
305
        }
306
307
        // Adding 0 will automatically cast this to an int or float
308 56
        return $number + 0;
309
    }
310
311 63
    private function string()
312
    {
313 63
        $string = '';
314
315 63
        $delim = $this->ch;
316 63
        $this->next();
317 63
        while ($this->ch !== null) {
318 63
            if ($this->ch === $delim) {
319 57
                $this->next();
320
321 57
                return $string;
322
            }
323
324 63
            if ($this->ch === '\\') {
325 24
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
326
                    $string .= json_decode('"'.$unicodeEscaped.'"');
327
                    continue;
328
                }
329
330 24
                $this->next();
331 24
                if ($this->ch === "\r") {
332 6
                    if ($this->peek() === "\n") {
333 4
                        $this->next();
334 2
                    }
335 22
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
336 15
                    $string .= $escapee;
337 10
                } else {
338 10
                    break;
339
                }
340 63
            } elseif ($this->ch === "\n") {
341
                // unescaped newlines are invalid; see:
342
                // https://github.com/json5/json5/issues/24
343
                // @todo this feels special-cased; are there other invalid unescaped chars?
344 3
                break;
345
            } else {
346 63
                $string .= $this->ch;
347
            }
348
349 63
            $this->next();
350 42
        }
351
352 6
        $this->throwSyntaxError('Bad string');
353
    }
354
355
    /**
356
     * Skip an inline comment, assuming this is one.
357
     *
358
     * The current character should be the second / character in the // pair that begins this inline comment.
359
     * To finish the inline comment, we look for a newline or the end of the text.
360
     */
361 36
    private function inlineComment()
362
    {
363
        do {
364 36
            $this->next();
365 36
            if ($this->ch === "\n" || $this->ch === "\r") {
366 33
                $this->next();
367
368 33
                return;
369
            }
370 36
        } while ($this->ch !== null);
371 3
    }
372
373
    /**
374
     * Skip a block comment, assuming this is one.
375
     *
376
     * The current character should be the * character in the /* pair that begins this block comment.
377
     * To finish the block comment, we look for an ending *​/ pair of characters,
378
     * but we also watch for the end of text before the comment is terminated.
379
     */
380 21
    private function blockComment()
381
    {
382
        do {
383 21
            $this->next();
384 21
            while ($this->ch === '*') {
385 18
                $this->nextOrFail('*');
386 18
                if ($this->ch === '/') {
387 18
                    $this->nextOrFail('/');
388
389 18
                    return;
390
                }
391 2
            }
392 21
        } while ($this->ch !== null);
393
394 3
        $this->throwSyntaxError('Unterminated block comment');
395
    }
396
397
    /**
398
     * Skip a comment, whether inline or block-level, assuming this is one.
399
     */
400 57
    private function comment()
401
    {
402
        // Comments always begin with a / character.
403 57
        $this->nextOrFail('/');
404
405 57
        if ($this->ch === '/') {
406 36
            $this->inlineComment();
407 46
        } elseif ($this->ch === '*') {
408 21
            $this->blockComment();
409 12
        } else {
410 3
            $this->throwSyntaxError('Unrecognized comment');
411
        }
412 51
    }
413
414
    /**
415
     * Skip whitespace and comments.
416
     *
417
     * Note that we're detecting comments by only a single / character.
418
     * This works since regular expressions are not valid JSON(5), but this will
419
     * break if there are other valid values that begin with a / character!
420
     */
421 223
    private function white()
422
    {
423 223
        while ($this->ch !== null) {
424 222
            if ($this->ch === '/') {
425 57
                $this->comment();
426 217
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->ch) === 1) {
427 164
                $this->next();
428 94
            } else {
429 213
                return;
430
            }
431 98
        }
432 141
    }
433
434
    /**
435
     * Matches true, false, null, etc
436
     */
437 67
    private function word()
438
    {
439 67
        switch ($this->ch) {
440 67
            case 't':
441 21
                $this->nextOrFail('t');
442 21
                $this->nextOrFail('r');
443 21
                $this->nextOrFail('u');
444 21
                $this->nextOrFail('e');
445 21
                return true;
446 52
            case 'f':
447 9
                $this->nextOrFail('f');
448 9
                $this->nextOrFail('a');
449 9
                $this->nextOrFail('l');
450 9
                $this->nextOrFail('s');
451 9
                $this->nextOrFail('e');
452 9
                return false;
453 43
            case 'n':
454 15
                $this->nextOrFail('n');
455 15
                $this->nextOrFail('u');
456 15
                $this->nextOrFail('l');
457 15
                $this->nextOrFail('l');
458 15
                return null;
459 28
            case 'I':
460 15
                $this->nextOrFail('I');
461 15
                $this->nextOrFail('n');
462 12
                $this->nextOrFail('f');
463 12
                $this->nextOrFail('i');
464 12
                $this->nextOrFail('n');
465 12
                $this->nextOrFail('i');
466 12
                $this->nextOrFail('t');
467 12
                $this->nextOrFail('y');
468 12
                return INF;
469 13
            case 'N':
470 6
                $this->nextOrFail('N');
471 6
                $this->nextOrFail('a');
472 3
                $this->nextOrFail('N');
473 3
                return NAN;
474 4
        }
475
476 7
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
477
    }
478
479 27
    private function arr()
480
    {
481 27
        $arr = [];
482
483 27
        if ($this->ch === '[') {
484 27
            if (++$this->depth > $this->maxDepth) {
485 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
486
            }
487
488 27
            $this->nextOrFail('[');
489 27
            $this->white();
490 27
            while ($this->ch !== null) {
491 27
                if ($this->ch === ']') {
492 9
                    $this->nextOrFail(']');
493 9
                    $this->depth--;
494 9
                    return $arr; // Potentially empty array
495
                }
496
                // ES5 allows omitting elements in arrays, e.g. [,] and
497
                // [,null]. We don't allow this in JSON5.
498 27
                if ($this->ch === ',') {
499 6
                    $this->throwSyntaxError('Missing array element');
500
                }
501
502 21
                $arr[] = $this->value();
503
504 18
                $this->white();
505
                // If there's no comma after this value, this needs to
506
                // be the end of the array.
507 18
                if ($this->ch !== ',') {
508 9
                    $this->nextOrFail(']');
509 6
                    $this->depth--;
510 6
                    return $arr;
511
                }
512 9
                $this->nextOrFail(',');
513 9
                $this->white();
514 6
            }
515
        }
516
517
        $this->throwSyntaxError('Bad array');
518
    }
519
520
    /**
521
     * Parse an object value
522
     */
523 72
    private function obj()
524
    {
525 72
        $object = $this->associative ? [] : new \stdClass;
526
527 72
        if ($this->ch === '{') {
528 72
            if (++$this->depth > $this->maxDepth) {
529 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
530
            }
531
532 72
            $this->nextOrFail('{');
533 72
            $this->white();
534 72
            while ($this->ch) {
535 72
                if ($this->ch === '}') {
536 18
                    $this->nextOrFail('}');
537 18
                    $this->depth--;
538 18
                    return $object; // Potentially empty object
539
                }
540
541
                // Keys can be unquoted. If they are, they need to be
542
                // valid JS identifiers.
543 63
                if ($this->ch === '"' || $this->ch === "'") {
544 24
                    $key = $this->string();
545 16
                } else {
546 42
                    $key = $this->identifier();
547
                }
548
549 54
                $this->white();
550 54
                $this->nextOrFail(':');
551 51
                if ($this->associative) {
552 39
                    $object[$key] = $this->value();
553 26
                } else {
554 51
                    $object->{$key} = $this->value();
555
                }
556 48
                $this->white();
557
                // If there's no comma after this pair, this needs to be
558
                // the end of the object.
559 48
                if ($this->ch !== ',') {
560 39
                    $this->nextOrFail('}');
561 33
                    $this->depth--;
562 33
                    return $object;
563
                }
564 15
                $this->nextOrFail(',');
565 15
                $this->white();
566 10
            }
567
        }
568
569
        $this->throwSyntaxError('Bad object');
570
    }
571
572
    /**
573
     * Parse a JSON value.
574
     *
575
     * It could be an object, an array, a string, a number,
576
     * or a word.
577
     */
578 223
    private function value()
579
    {
580 223
        $this->white();
581 220
        switch ($this->ch) {
582 220
            case '{':
583 72
                return $this->obj();
584 196
            case '[':
585 27
                return $this->arr();
586 187
            case '"':
587 177
            case "'":
588 57
                return $this->string();
589 136
            case '-':
590 126
            case '+':
591 111
            case '.':
592 45
                return $this->number();
593 54
            default:
594 94
                return is_numeric($this->ch) ? $this->number() : $this->word();
595 54
        }
596
    }
597
598 92
    private function throwSyntaxError($message)
599
    {
600 92
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
601
    }
602
603 25
    private static function renderChar($chr)
604
    {
605 25
        return $chr === null ? 'EOF' : "'" . $chr . "'";
606
    }
607
608
    /**
609
     * @param string $ch
610
     *
611
     * @return string|null
612
     */
613 18
    private static function getEscapee($ch)
614
    {
615
        switch ($ch) {
616
            // @codingStandardsIgnoreStart
617 18
            case "'":  return "'";
618 15
            case '"':  return '"';
619 15
            case '\\': return '\\';
620 15
            case '/':  return '/';
621 15
            case "\n": return '';
622 6
            case 'b':  return chr(8);
623 6
            case 'f':  return "\f";
624 6
            case 'n':  return "\n";
625 6
            case 'r':  return "\r";
626 6
            case 't':  return "\t";
627 3
            default:   return null;
628
            // @codingStandardsIgnoreEnd
629 2
        }
630
    }
631
632
    /**
633
     * Returns everything from $this->at onwards.
634
     *
635
     * Utilizes a cache so we don't have to continuously parse through UTF-8
636
     * data that was earlier in the string which we don't even care about.
637
     *
638
     * @return string
639
     */
640 103
    private function getRemainder()
641
    {
642 103
        if ($this->remainderCacheAt === $this->at) {
643 25
            return $this->remainderCache;
644
        }
645
646 97
        $subject = mb_substr($this->remainderCache, $this->at - $this->remainderCacheAt);
647 97
        $this->remainderCache = $subject;
648 97
        $this->remainderCacheAt = $this->at;
649
650 97
        return $subject;
651
    }
652
}
653