Completed
Push — master ( 4bd364...82b0e0 )
by Colin
01:11
created

Json5Decoder::inlineComment()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 11
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 7
nc 2
nop 0
dl 0
loc 11
ccs 7
cts 7
cp 1
crap 4
rs 9.2
c 1
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u';
20
21
    private $json;
22
23
    private $at = 0;
24
25
    private $lineNumber = 1;
26
27
    private $columnNumber = 1;
28
29
    private $ch;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    private $lineCache;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 378
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 378
        $this->json = $json;
54 378
        $this->associative = $associative;
55 378
        $this->maxDepth = $depth;
56 378
        $this->castBigIntToString = $castBigIntToString;
57
58 378
        $this->length = mb_strlen($json, 'utf-8');
59
60 378
        $this->ch = $this->charAt(0);
61 378
    }
62
63
    /**
64
     * Takes a JSON encoded string and converts it into a PHP variable.
65
     *
66
     * The parameters exactly match PHP's json_decode() function - see
67
     * http://php.net/manual/en/function.json-decode.php for more information.
68
     *
69
     * @param string $source      The JSON string being decoded.
70
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
71
     * @param int    $depth       User specified recursion depth.
72
     * @param int    $options     Bitmask of JSON decode options.
73
     *
74
     * @return mixed
75
     */
76 378
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
77
    {
78 378
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
79 378
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
80
81 378
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
82
83 378
        $result = $decoder->value();
84 291
        $decoder->white();
85 288
        if ($decoder->ch) {
86 18
            $decoder->throwSyntaxError('Syntax error');
87
        }
88
89 270
        return $result;
90
    }
91
92
    /**
93
     * @param int $at
94
     *
95
     * @return string|null
96
     */
97 378
    private function charAt($at)
98
    {
99 378
        if ($at < 0 || $at >= $this->length) {
100 282
            return null;
101
        }
102
103 375
        return mb_substr($this->json, $at, 1, 'utf-8');
104
    }
105
106
    /**
107
     * Parse the next character.
108
     *
109
     * If $c is given, the next char will only be parsed if the current
110
     * one matches $c.
111
     *
112
     * @param string|null $c
113
     *
114
     * @return null|string
115
     */
116 348
    private function next($c = null)
117
    {
118
        // If a c parameter is provided, verify that it matches the current character.
119 348
        if ($c !== null && $c !== $this->ch) {
120 15
            $this->throwSyntaxError(sprintf(
121 15
                'Expected %s instead of %s',
122 15
                self::renderChar($c),
123 15
                self::renderChar($this->ch)
124 10
            ));
125
        }
126
127
        // Get the next character. When there are no more characters,
128
        // return the empty string.
129 348
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
130 270
            $this->at++;
131 270
            $this->lineNumber++;
132 270
            $this->columnNumber = 1;
133 180
        } else {
134 309
            $this->at++;
135 309
            $this->columnNumber++;
136
        }
137
138 348
        $this->ch = $this->charAt($this->at);
139
140 348
        return $this->ch;
141
    }
142
143
    /**
144
     * Get the next character without consuming it or
145
     * assigning it to the ch variable.
146
     *
147
     * @return mixed
148
     */
149 12
    private function peek()
150
    {
151 12
        return $this->charAt($this->at + 1);
152
    }
153
154
    /**
155
     * @return string
156
     */
157 216
    private function getLineRemainder()
158
    {
159
        // Line are separated by "\n" or "\r" without an "\n" next
160 216
        if ($this->lineCache === null) {
161 216
            $this->lineCache = preg_split('/\n|\r\n?/u', $this->json);
162 144
        }
163
164 216
        $line = $this->lineCache[$this->lineNumber - 1];
165
166 216
        return mb_substr($line, $this->columnNumber - 1);
167
    }
168
169
    /**
170
     * Attempt to match a regular expression at the current position on the current line.
171
     *
172
     * This function will not match across multiple lines.
173
     *
174
     * @param string $regex
175
     *
176
     * @return string|null
177
     */
178 216
    private function match($regex)
179
    {
180 216
        $subject = $this->getLineRemainder();
181
182 216
        $matches = [];
183 216
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
184 114
            return null;
185
        }
186
187
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
188 201
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
189
190
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
191
        // [0][1] contains the index of that match
192 201
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
193
194 201
        $this->at += $advanceBy;
195 201
        $this->columnNumber += $advanceBy;
196 201
        $this->ch = $this->charAt($this->at);
197
198 201
        return $matches[0][0];
199
    }
200
201
    /**
202
     * Parse an identifier.
203
     *
204
     * Normally, reserved words are disallowed here, but we
205
     * only use this for unquoted object keys, where reserved words are allowed,
206
     * so we don't check for those here. References:
207
     * - http://es5.github.com/#x7.6
208
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
209
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
210
     */
211 39
    private function identifier()
212
    {
213
        // @codingStandardsIgnoreStart
214
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
215 39
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
216
        // @codingStandardsIgnoreEnd
217
218 39
        if ($match === null) {
219 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
220
        }
221
222
        // Un-escape escaped Unicode chars
223 30
        $unescaped = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', function ($m) {
224 3
            return self::fromCharCode($m[1]);
225 30
        }, $match);
226
227 30
        return $unescaped;
228
    }
229
230 210
    private function number()
231
    {
232 210
        $number = null;
233 210
        $sign = '';
234 210
        $string = '';
235 210
        $base = 10;
236
237 210
        if ($this->ch === '-' || $this->ch === '+') {
238 93
            $sign = $this->ch;
239 93
            $this->next($this->ch);
240 62
        }
241
242
        // support for Infinity
243 210
        if ($this->ch === 'I') {
244 6
            $number = $this->word();
245 6
            if ($number === null) {
246
                $this->throwSyntaxError('Unexpected word for number');
247
            }
248
249 6
            return ($sign === '-') ? -INF : INF;
250
        }
251
252
        // support for NaN
253 204
        if ($this->ch === 'N') {
254
            $number = $this->word();
255
            if ($number !== NAN) {
256
                $this->throwSyntaxError('expected word to be NaN');
257
            }
258
259
            // ignore sign as -NaN also is NaN
260
            return $number;
261
        }
262
263 204
        if ($this->ch === '0') {
264 105
            $string .= $this->ch;
265 105
            $this->next();
266 105
            if ($this->ch === 'x' || $this->ch === 'X') {
267 33
                $string .= $this->ch;
268 33
                $this->next();
269 33
                $base = 16;
270 94
            } elseif (is_numeric($this->ch)) {
271 30
                $this->throwSyntaxError('Octal literal');
272
            }
273 50
        }
274
275
        switch ($base) {
276 174
            case 10:
277 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
278 144
                    $string .= $match;
279 96
                }
280 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
281 45
                    $string .= $match;
282 30
                }
283 144
                $number = $string;
284 144
                break;
285 33
            case 16:
286 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
287 30
                    $string .= $match;
288 30
                    $number = hexdec($string);
289 30
                    break;
290
                }
291 3
                $this->throwSyntaxError('Bad hex number');
292
        }
293
294 171
        if ($sign === '-') {
295 33
            $number = -$number;
296 22
        }
297
298 171
        if (!is_numeric($number) || !is_finite($number)) {
299 3
            $this->throwSyntaxError('Bad number');
300
        }
301
302 168
        if ($this->castBigIntToString) {
303 3
            return $number;
304
        }
305
306
        // Adding 0 will automatically cast this to an int or float
307 165
        return $number + 0;
308
    }
309
310 75
    private function string()
311
    {
312 75
        if (!($this->ch === '"' || $this->ch === "'")) {
313
            $this->throwSyntaxError('Bad string');
314
        }
315
316 75
        $string = '';
317
318 75
        $delim = $this->ch;
319 75
        $this->next();
320 75
        while ($this->ch !== null) {
321 75
            if ($this->ch === $delim) {
322 69
                $this->next();
323
324 69
                return $string;
325 75
            } elseif ($this->ch === '\\') {
326 27
                $this->next();
327 27
                if ($this->ch === 'u') {
328 6
                    $this->next();
329 6
                    $hex = $this->match('/^[A-Fa-f0-9]{4}/');
330 6
                    if ($hex === null) {
331 3
                        break;
332
                    }
333 3
                    $string .= self::fromCharCode($hex);
334 3
                    continue;
335 21
                } elseif ($this->ch === "\r") {
336 6
                    if ($this->peek() === "\n") {
337 4
                        $this->next();
338 2
                    }
339 19
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
340 15
                    $string .= $escapee;
341 10
                } else {
342 7
                    break;
343
                }
344 75
            } elseif ($this->ch === "\n") {
345
                // unescaped newlines are invalid; see:
346
                // https://github.com/json5/json5/issues/24
347
                // @todo this feels special-cased; are there other invalid unescaped chars?
348 3
                break;
349
            } else {
350 75
                $string .= $this->ch;
351
            }
352
353 75
            $this->next();
354 50
        }
355
356 6
        $this->throwSyntaxError('Bad string');
357
    }
358
359
    /**
360
     * Skip an inline comment, assuming this is one.
361
     *
362
     * The current character should be the second / character in the // pair that begins this inline comment.
363
     * To finish the inline comment, we look for a newline or the end of the text.
364
     */
365 36
    private function inlineComment()
366
    {
367
        do {
368 36
            $this->next();
369 36
            if ($this->ch === "\n" || $this->ch === "\r") {
370 33
                $this->next();
371
372 33
                return;
373
            }
374 36
        } while ($this->ch !== null);
375 3
    }
376
377
    /**
378
     * Skip a block comment, assuming this is one.
379
     *
380
     * The current character should be the * character in the /* pair that begins this block comment.
381
     * To finish the block comment, we look for an ending *​/ pair of characters,
382
     * but we also watch for the end of text before the comment is terminated.
383
     */
384 21
    private function blockComment()
385
    {
386
        do {
387 21
            $this->next();
388 21
            while ($this->ch === '*') {
389 18
                $this->next('*');
390 18
                if ($this->ch === '/') {
391 18
                    $this->next('/');
392
393 18
                    return;
394
                }
395 2
            }
396 21
        } while ($this->ch !== null);
397
398 3
        $this->throwSyntaxError('Unterminated block comment');
399
    }
400
401
    /**
402
     * Skip a comment, whether inline or block-level, assuming this is one.
403
     */
404 57
    private function comment()
405
    {
406
        // Comments always begin with a / character.
407 57
        if ($this->ch !== '/') {
408
            $this->throwSyntaxError('Not a comment');
409
        }
410
411 57
        $this->next('/');
412
413 57
        if ($this->ch === '/') {
414 36
            $this->inlineComment();
415 46
        } elseif ($this->ch === '*') {
416 21
            $this->blockComment();
417 12
        } else {
418 3
            $this->throwSyntaxError('Unrecognized comment');
419
        }
420 51
    }
421
422
    /**
423
     * Skip whitespace and comments.
424
     *
425
     * Note that we're detecting comments by only a single / character.
426
     * This works since regular expressions are not valid JSON(5), but this will
427
     * break if there are other valid values that begin with a / character!
428
     */
429 378
    private function white()
430
    {
431 378
        while ($this->ch !== null) {
432 375
            if ($this->ch === '/') {
433 57
                $this->comment();
434 370
            } elseif (preg_match(self::REGEX_WHITESPACE, $this->ch) === 1) {
435 273
                $this->next();
436 182
            } else {
437 366
                return;
438
            }
439 186
        }
440 279
    }
441
442
    /**
443
     * Matches true, false, null, etc
444
     */
445 84
    private function word()
446
    {
447 84
        switch ($this->ch) {
448 84
            case 't':
449 36
                $this->next('t');
450 36
                $this->next('r');
451 36
                $this->next('u');
452 36
                $this->next('e');
453 36
                return true;
454 63
            case 'f':
455 18
                $this->next('f');
456 18
                $this->next('a');
457 18
                $this->next('l');
458 18
                $this->next('s');
459 18
                $this->next('e');
460 18
                return false;
461 48
            case 'n':
462 18
                $this->next('n');
463 18
                $this->next('u');
464 18
                $this->next('l');
465 18
                $this->next('l');
466 18
                return null;
467 30
            case 'I':
468 15
                $this->next('I');
469 15
                $this->next('n');
470 12
                $this->next('f');
471 12
                $this->next('i');
472 12
                $this->next('n');
473 12
                $this->next('i');
474 12
                $this->next('t');
475 12
                $this->next('y');
476 12
                return INF;
477 15
            case 'N':
478 6
                $this->next('N');
479 6
                $this->next('a');
480 3
                $this->next('N');
481 3
                return NAN;
482 6
        }
483
484 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
485
    }
486
487 42
    private function arr()
488
    {
489 42
        $arr = [];
490
491 42
        if ($this->ch === '[') {
492 42
            if (++$this->depth > $this->maxDepth) {
493 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
494
            }
495
496 42
            $this->next('[');
497 42
            $this->white();
498 42
            while ($this->ch !== null) {
499 42
                if ($this->ch === ']') {
500 12
                    $this->next(']');
501 12
                    $this->depth--;
502 12
                    return $arr; // Potentially empty array
503
                }
504
                // ES5 allows omitting elements in arrays, e.g. [,] and
505
                // [,null]. We don't allow this in JSON5.
506 39
                if ($this->ch === ',') {
507 6
                    $this->throwSyntaxError('Missing array element');
508
                } else {
509 33
                    $arr[] = $this->value();
510
                }
511 30
                $this->white();
512
                // If there's no comma after this value, this needs to
513
                // be the end of the array.
514 30
                if ($this->ch !== ',') {
515 21
                    $this->next(']');
516 18
                    $this->depth--;
517 18
                    return $arr;
518
                }
519 15
                $this->next(',');
520 15
                $this->white();
521 10
            }
522
        }
523
524
        $this->throwSyntaxError('Bad array');
525
    }
526
527
    /**
528
     * Parse an object value
529
     */
530 78
    private function obj()
531
    {
532 78
        $object = $this->associative ? [] : new \stdClass;
533
534 78
        if ($this->ch === '{') {
535 78
            if (++$this->depth > $this->maxDepth) {
536
                $this->throwSyntaxError('Maximum stack depth exceeded');
537
            }
538
539 78
            $this->next('{');
540 78
            $this->white();
541 78
            while ($this->ch) {
542 78
                if ($this->ch === '}') {
543 21
                    $this->next('}');
544 21
                    $this->depth--;
545 21
                    return $object; // Potentially empty object
546
                }
547
548
                // Keys can be unquoted. If they are, they need to be
549
                // valid JS identifiers.
550 66
                if ($this->ch === '"' || $this->ch === "'") {
551 30
                    $key = $this->string();
552 20
                } else {
553 39
                    $key = $this->identifier();
554
                }
555
556 57
                $this->white();
557 57
                $this->next(':');
558 54
                if ($this->associative) {
559 45
                    $object[$key] = $this->value();
560 30
                } else {
561 51
                    $object->{$key} = $this->value();
562
                }
563 54
                $this->white();
564
                // If there's no comma after this pair, this needs to be
565
                // the end of the object.
566 54
                if ($this->ch !== ',') {
567 45
                    $this->next('}');
568 42
                    $this->depth--;
569 42
                    return $object;
570
                }
571 21
                $this->next(',');
572 21
                $this->white();
573 14
            }
574
        }
575
576
        $this->throwSyntaxError('Bad object');
577
    }
578
579
    /**
580
     * Parse a JSON value.
581
     *
582
     * It could be an object, an array, a string, a number,
583
     * or a word.
584
     */
585 378
    private function value()
586
    {
587 378
        $this->white();
588 375
        switch ($this->ch) {
589 375
            case '{':
590 78
                return $this->obj();
591 351
            case '[':
592 42
                return $this->arr();
593 339
            case '"':
594 328
            case "'":
595 63
                return $this->string();
596 285
            case '-':
597 270
            case '+':
598 255
            case '.':
599 102
                return $this->number();
600 124
            default:
601 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
602 124
        }
603
    }
604
605 108
    private function throwSyntaxError($message)
606
    {
607 108
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
608
    }
609
610 24
    private static function renderChar($chr)
611
    {
612 24
        return $chr === null ? 'EOF' : "'" . $chr . "'";
613
    }
614
615
    /**
616
     * @param string $hex Hex code
617
     *
618
     * @return string Unicode character
619
     */
620 6
    private static function fromCharCode($hex)
621
    {
622 6
        return mb_convert_encoding('&#' . hexdec($hex) . ';', 'UTF-8', 'HTML-ENTITIES');
623
    }
624
625
    /**
626
     * @param string $ch
627
     *
628
     * @return string|null
629
     */
630 15
    private static function getEscapee($ch)
631
    {
632
        switch ($ch) {
633
            // @codingStandardsIgnoreStart
634 15
            case "'":  return "'";
635 12
            case '"':  return '"';
636 12
            case '\\': return '\\';
637 12
            case '/':  return '/';
638 12
            case "\n": return '';
639 3
            case 'b':  return chr(8);
640 3
            case 'f':  return "\f";
641 3
            case 'n':  return "\n";
642 3
            case 'r':  return "\r";
643 3
            case 't':  return "\t";
644
            default:   return null;
645
            // @codingStandardsIgnoreEnd
646
        }
647
    }
648
}
649