Completed
Pull Request — master (#4)
by Colin
01:12
created

Json5Decoder::getRemainder()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 7
c 1
b 0
f 0
nc 2
nop 0
dl 0
loc 12
ccs 7
cts 7
cp 1
crap 2
rs 9.4285
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $length;
22
23
    private $at = 0;
24
25
    private $currentByte;
26
27
    private $lineNumber = 1;
28
29
    private $column = 1;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    /**
40
     * Private constructor.
41
     *
42
     * @param string $json
43
     * @param bool   $associative
44
     * @param int    $depth
45
     * @param bool   $castBigIntToString
46
     */
47
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
48
    {
49
        $this->json = $json;
50
        $this->associative = $associative;
51 405
        $this->maxDepth = $depth;
52
        $this->castBigIntToString = $castBigIntToString;
53 405
54 405
        $this->length = strlen($json);
55 405
        $this->currentByte = $this->getByte(0);
56
    }
57 405
58
    /**
59 405
     * Takes a JSON encoded string and converts it into a PHP variable.
60 405
     *
61
     * The parameters exactly match PHP's json_decode() function - see
62 405
     * http://php.net/manual/en/function.json-decode.php for more information.
63 405
     *
64 405
     * @param string $source      The JSON string being decoded.
65
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
66
     * @param int    $depth       User specified recursion depth.
67
     * @param int    $options     Bitmask of JSON decode options.
68
     *
69
     * @return mixed
70
     */
71
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
72
    {
73
        // Try parsing with json_decode first, since that's much faster
74
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
75
        if (PHP_VERSION_ID >= 70000) {
76
            $result = json_decode($source, $associative, $depth, $options);
77
            if (json_last_error() === JSON_ERROR_NONE) {
78
                return $result;
79 405
            }
80
        }
81
82
        // Fall back to JSON5 if that fails
83 405
        $associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY);
84
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
85
86
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
87
88
        $result = $decoder->value();
89
        $decoder->white();
90
        if ($decoder->currentByte) {
91 405
            $decoder->throwSyntaxError('Syntax error');
92 405
        }
93
94 405
        return $result;
95
    }
96 405
97 306
    /**
98 303
     * @param int $at
99 18
     *
100
     * @return null
101
     */
102 285
    private function getByte($at)
103
    {
104
        if ($at >= $this->length) {
105
            return null;
106
        }
107
108
        return $this->json[$at];
109
    }
110 405
111
    /**
112 405
     * @return string|null
113 300
     */
114
    private function currentChar()
115
    {
116 402
        if ($this->at >= $this->length) {
117
            return null;
118
        }
119
120
        return mb_substr(substr($this->json, $this->at, 4), 0, 1);
121
    }
122
123
    /**
124 375
     * Parse the next character.
125
     *
126
     * @return null|string
127
     */
128 375
    private function next()
129 282
    {
130 282
        // Get the next character. When there are no more characters,
131 282
        // return the empty string.
132 282
        if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
133 336
            $this->lineNumber++;
134 336
            $this->column = 1;
135
        } else {
136
            $this->column++;
137 375
        }
138
139 375
        $this->at++;
140
        
141
        return $this->currentByte = $this->getByte($this->at);
142
    }
143
144
    /**
145
     * Parse the next character if it matches $c or fail.
146
     *
147
     * @param string $c
148
     *
149 177
     * @return string|null
150
     */
151 177
    private function nextOrFail($c)
152 24
    {
153 24
        if ($c !== $this->currentByte) {
154 24
            $this->throwSyntaxError(sprintf(
155 24
                'Expected %s instead of %s',
156 24
                self::renderChar($c),
157
                self::renderChar($this->currentChar())
158
            ));
159 177
        }
160
161
        return $this->next();
162
    }
163
164
    /**
165
     * Get the next character without consuming it or
166
     * assigning it to the ch variable.
167
     *
168 36
     * @return mixed
169
     */
170 36
    private function peek()
171
    {
172
        return $this->getByte($this->at + 1);
173
    }
174
175
    /**
176
     * Attempt to match a regular expression at the current position on the current line.
177
     *
178
     * This function will not match across multiple lines.
179
     *
180
     * @param string $regex
181
     *
182 210
     * @return string|null
183
     */
184 210
    private function match($regex)
185
    {
186 210
        $subject = substr($this->json, $this->at);
187 210
        // Only match on the current line
188 15
        if ($pos = strpos($subject, "\n")) {
189
            $subject = substr($subject, 0, $pos);
190
        }
191
192 195
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
193
            return null;
194
        }
195
196 195
        $this->at += $matches[0][1] + strlen($matches[0][0]);
197
        $this->column += mb_strlen(substr($subject, 0, $matches[0][1]) . $matches[0][0]);
198 195
        $this->currentByte = $this->getByte($this->at);
199 195
200 195
        return $matches[0][0];
201
    }
202 195
203
    /**
204
     * Parse an identifier.
205
     *
206
     * Normally, reserved words are disallowed here, but we
207
     * only use this for unquoted object keys, where reserved words are allowed,
208
     * so we don't check for those here. References:
209
     * - http://es5.github.com/#x7.6
210
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
211
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
212
     */
213
    private function identifier()
214
    {
215 42
        // @codingStandardsIgnoreStart
216
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
217
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
218
        // @codingStandardsIgnoreEnd
219 42
220
        if ($match === null) {
221
            $this->throwSyntaxError('Bad identifier as unquoted key');
222 42
        }
223 9
224
        // Un-escape escaped Unicode chars
225
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
226
            return json_decode('"'.$m[0].'"');
227 33
        }, $match);
228 6
229 33
        return $unescaped;
230
    }
231 33
232
    private function number()
233
    {
234 216
        $number = null;
235
        $sign = '';
236 216
        $string = '';
237 216
        $base = 10;
238 216
239 216
        if ($this->currentByte === '-' || $this->currentByte === '+') {
240
            $sign = $this->currentByte;
241 216
            $this->next();
242 99
        }
243 99
244 99
        // support for Infinity
245
        if ($this->currentByte === 'I') {
246
            $this->word();
247 216
248 9
            return ($sign === '-') ? -INF : INF;
249
        }
250 6
251
        // support for NaN
252
        if ($this->currentByte === 'N') {
253
            $number = $this->word();
254 207
255 3
            // ignore sign as -NaN also is NaN
256
            return $number;
257
        }
258 3
259
        if ($this->currentByte === '0') {
260
            $string .= $this->currentByte;
261 204
            $this->next();
262 105
            if ($this->currentByte === 'x' || $this->currentByte === 'X') {
263 105
                $string .= $this->currentByte;
264 105
                $this->next();
265 33
                $base = 16;
266 33
            } elseif (is_numeric($this->currentByte)) {
267 33
                $this->throwSyntaxError('Octal literal');
268 105
            }
269 30
        }
270
271 75
        switch ($base) {
272
            case 10:
273 View Code Duplication
                if ((is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
274 174
                    $string .= $match;
275 144
                }
276 129 View Code Duplication
                if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
277 129
                    $string .= $match;
278 144
                }
279 45
                $number = $string;
280 45
                break;
281 144
            case 16:
282 144
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
283 33
                    $string .= $match;
284 33
                    $number = hexdec($string);
285 30
                    break;
286 30
                }
287 30
                $this->throwSyntaxError('Bad hex number');
288
        }
289 3
290
        if ($sign === '-') {
291
            $number = -$number;
292 171
        }
293 33
294 33
        if (!is_numeric($number) || !is_finite($number)) {
295
            $this->throwSyntaxError('Bad number');
296 171
        }
297 3
298
        if ($this->castBigIntToString) {
299
            return $number;
300 168
        }
301 3
302
        // Adding 0 will automatically cast this to an int or float
303
        return $number + 0;
304
    }
305 165
306
    private function string()
307
    {
308 93
        $string = '';
309
310 93
        $delim = $this->currentByte;
311
        $this->next();
312 93
        while ($this->currentByte !== null) {
313 93
            if ($this->currentByte === $delim) {
314 93
                $this->next();
315 93
316 87
                return $string;
317
            }
318 87
319
            if ($this->currentByte === '\\') {
320
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
321 93
                    $string .= json_decode('"'.$unicodeEscaped.'"');
322 30
                    continue;
323 6
                }
324 6
325
                $this->next();
326
                if ($this->currentByte === "\r") {
327 24
                    if ($this->peek() === "\n") {
328 24
                        $this->next();
329 6
                    }
330 3
                } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
331 3
                    $string .= $escapee;
332 24
                } else {
333 15
                    break;
334 15
                }
335 3
            } elseif ($this->currentByte === "\n") {
336
                // unescaped newlines are invalid; see:
337 93
                // https://github.com/json5/json5/issues/24
338
                // @todo this feels special-cased; are there other invalid unescaped chars?
339
                break;
340
            } else {
341 3
                $string .= $this->currentByte;
342
            }
343 93
344
            $this->next();
345
        }
346 93
347 93
        $this->throwSyntaxError('Bad string');
348
    }
349 6
350
    /**
351
     * Skip an inline comment, assuming this is one.
352
     *
353
     * The current character should be the second / character in the // pair that begins this inline comment.
354
     * To finish the inline comment, we look for a newline or the end of the text.
355
     */
356
    private function inlineComment()
357
    {
358 36
        do {
359
            $this->next();
360
            if ($this->currentByte === "\n" || $this->currentByte === "\r") {
361 36
                $this->next();
362 36
363 33
                return;
364
            }
365 33
        } while ($this->currentByte !== null);
366
    }
367 36
368 3
    /**
369
     * Skip a block comment, assuming this is one.
370
     *
371
     * The current character should be the * character in the /* pair that begins this block comment.
372
     * To finish the block comment, we look for an ending *​/ pair of characters,
373
     * but we also watch for the end of text before the comment is terminated.
374
     */
375
    private function blockComment()
376
    {
377 21
        do {
378
            $this->next();
379
            while ($this->currentByte === '*') {
380 21
                $this->nextOrFail('*');
381 21
                if ($this->currentByte === '/') {
382 18
                    $this->nextOrFail('/');
383 18
384 18
                    return;
385
                }
386 18
            }
387
        } while ($this->currentByte !== null);
388 3
389 21
        $this->throwSyntaxError('Unterminated block comment');
390
    }
391 3
392
    /**
393
     * Skip a comment, whether inline or block-level, assuming this is one.
394
     */
395
    private function comment()
396
    {
397 57
        // Comments always begin with a / character.
398
        $this->nextOrFail('/');
399
400 57
        if ($this->currentByte === '/') {
401
            $this->inlineComment();
402 57
        } elseif ($this->currentByte === '*') {
403 36
            $this->blockComment();
404 57
        } else {
405 21
            $this->throwSyntaxError('Unrecognized comment');
406 18
        }
407 3
    }
408
409 51
    /**
410
     * Skip whitespace and comments.
411
     *
412
     * Note that we're detecting comments by only a single / character.
413
     * This works since regular expressions are not valid JSON(5), but this will
414
     * break if there are other valid values that begin with a / character!
415
     */
416
    private function white()
417
    {
418 405
        while ($this->currentByte !== null) {
419
            if ($this->currentByte === '/') {
420 405
                $this->comment();
421 402
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->currentByte) === 1) {
422 57
                $this->next();
423 399
            } else {
424 291
                return;
425 291
            }
426 393
        }
427
    }
428 297
429 297
    /**
430
     * Matches true, false, null, etc
431
     */
432
    private function word()
433
    {
434 93
        switch ($this->currentByte) {
435
            case 't':
436 93
                $this->nextOrFail('t');
437 93
                $this->nextOrFail('r');
438 36
                $this->nextOrFail('u');
439 36
                $this->nextOrFail('e');
440 36
                return true;
441 36
            case 'f':
442 36
                $this->nextOrFail('f');
443 72
                $this->nextOrFail('a');
444 18
                $this->nextOrFail('l');
445 18
                $this->nextOrFail('s');
446 18
                $this->nextOrFail('e');
447 18
                return false;
448 18
            case 'n':
449 18
                $this->nextOrFail('n');
450 57
                $this->nextOrFail('u');
451 18
                $this->nextOrFail('l');
452 18
                $this->nextOrFail('l');
453 18
                return null;
454 18
            case 'I':
455 18
                $this->nextOrFail('I');
456 39
                $this->nextOrFail('n');
457 18
                $this->nextOrFail('f');
458 18
                $this->nextOrFail('i');
459 15
                $this->nextOrFail('n');
460 12
                $this->nextOrFail('i');
461 12
                $this->nextOrFail('t');
462 12
                $this->nextOrFail('y');
463 12
                return INF;
464 12
            case 'N':
465 12
                $this->nextOrFail('N');
466 21
                $this->nextOrFail('a');
467 12
                $this->nextOrFail('N');
468 12
                return NAN;
469 6
        }
470 6
471 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
472
    }
473 9
474
    private function arr()
475
    {
476 42
        $arr = [];
477
478 42
        if (++$this->depth > $this->maxDepth) {
479
            $this->throwSyntaxError('Maximum stack depth exceeded');
480 42
        }
481 3
482
        $this->nextOrFail('[');
483
        $this->white();
484 42
        while ($this->currentByte !== null) {
485 42
            if ($this->currentByte === ']') {
486 42
                $this->nextOrFail(']');
487 42
                $this->depth--;
488 12
                return $arr; // Potentially empty array
489 12
            }
490 12
            // ES5 allows omitting elements in arrays, e.g. [,] and
491
            // [,null]. We don't allow this in JSON5.
492
            if ($this->currentByte === ',') {
493
                $this->throwSyntaxError('Missing array element');
494 39
            }
495 6
496
            $arr[] = $this->value();
497
498 33
            $this->white();
499
            // If there's no comma after this value, this needs to
500 30
            // be the end of the array.
501
            if ($this->currentByte !== ',') {
502
                $this->nextOrFail(']');
503 30
                $this->depth--;
504 21
                return $arr;
505 18
            }
506 18
            $this->nextOrFail(',');
507
            $this->white();
508 15
        }
509 15
    }
510 15
511
    /**
512
     * Parse an object value
513
     */
514
    private function obj()
515
    {
516 90
        $object = $this->associative ? [] : new \stdClass;
517
518 90
        if (++$this->depth > $this->maxDepth) {
519
            $this->throwSyntaxError('Maximum stack depth exceeded');
520 90
        }
521 3
522
        $this->nextOrFail('{');
523
        $this->white();
524 90
        while ($this->currentByte !== null) {
525 90
            if ($this->currentByte === '}') {
526 90
                $this->nextOrFail('}');
527 90
                $this->depth--;
528 21
                return $object; // Potentially empty object
529 21
            }
530 21
531
            // Keys can be unquoted. If they are, they need to be
532
            // valid JS identifiers.
533
            if ($this->currentByte === '"' || $this->currentByte === "'") {
534
                $key = $this->string();
535 78
            } else {
536 39
                $key = $this->identifier();
537 39
            }
538 42
539
            $this->white();
540
            $this->nextOrFail(':');
541 69
            if ($this->associative) {
542 69
                $object[$key] = $this->value();
543 66
            } else {
544 45
                $object->{$key} = $this->value();
545 45
            }
546 66
            $this->white();
547
            // If there's no comma after this pair, this needs to be
548 63
            // the end of the object.
549
            if ($this->currentByte !== ',') {
550
                $this->nextOrFail('}');
551 63
                $this->depth--;
552 54
                return $object;
553 48
            }
554 48
            $this->nextOrFail(',');
555
            $this->white();
556 21
        }
557 21
    }
558 21
559
    /**
560
     * Parse a JSON value.
561
     *
562
     * It could be an object, an array, a string, a number,
563
     * or a word.
564
     */
565
    private function value()
566
    {
567 405
        $this->white();
568
        switch ($this->currentByte) {
569 405
            case '{':
570 402
                return $this->obj();
571 402
            case '[':
572 90
                return $this->arr();
573 375
            case '"':
574 42
            case "'":
575 363
                return $this->string();
576 363
            case '-':
577 78
            case '+':
578 294
            case '.':
579 294
                return $this->number();
580 294
            default:
581 108
                return is_numeric($this->currentByte) ? $this->number() : $this->word();
582 189
        }
583 189
    }
584 189
585
    private function throwSyntaxError($message)
586
    {
587 120
        throw new SyntaxError($message, $this->lineNumber, $this->column);
588
    }
589 120
590
    private static function renderChar($chr)
591
    {
592 33
        return $chr === null ? 'EOF' : "'" . $chr . "'";
593
    }
594 33
595
    /**
596
     * @param string $ch
597
     *
598
     * @return string|null
599
     */
600
    private static function getEscapee($ch)
601
    {
602 18
        switch ($ch) {
603
            // @codingStandardsIgnoreStart
604
            case "'":  return "'";
605
            case '"':  return '"';
606 18
            case '\\': return '\\';
607 15
            case '/':  return '/';
608 15
            case "\n": return '';
609 15
            case 'b':  return chr(8);
610 15
            case 'f':  return "\f";
611 6
            case 'n':  return "\n";
612 6
            case 'r':  return "\r";
613 6
            case 't':  return "\t";
614 6
            default:   return null;
615 6
            // @codingStandardsIgnoreEnd
616 3
        }
617
    }
618
}
619