Completed
Push — master ( d426c3...66aac6 )
by Colin
8s
created

Json5Decoder::white()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 16
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 6

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 6
eloc 11
c 2
b 0
f 0
nc 5
nop 0
dl 0
loc 16
ccs 13
cts 13
cp 1
crap 6
rs 8.8571
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $length;
22
23
    private $at = 0;
24
25
    private $currentByte;
26
27
    private $lineNumber = 1;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $currentLineStartsAt = 0;
38
39
    /**
40
     * Private constructor.
41
     *
42
     * @param string $json
43
     * @param bool   $associative
44
     * @param int    $depth
45
     * @param bool   $castBigIntToString
46
     */
47 376
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
48
    {
49 376
        $this->json = $json;
50 376
        $this->associative = $associative;
51 376
        $this->maxDepth = $depth;
52 376
        $this->castBigIntToString = $castBigIntToString;
53
54 376
        $this->length = strlen($json);
55 376
        $this->currentByte = $this->getByte(0);
56 376
    }
57
58
    /**
59
     * Takes a JSON encoded string and converts it into a PHP variable.
60
     *
61
     * The parameters exactly match PHP's json_decode() function - see
62
     * http://php.net/manual/en/function.json-decode.php for more information.
63
     *
64
     * @param string $source      The JSON string being decoded.
65
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
66
     * @param int    $depth       User specified recursion depth.
67
     * @param int    $options     Bitmask of JSON decode options.
68
     *
69
     * @return mixed
70
     */
71 411
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
72
    {
73
        // Try parsing with json_decode first, since that's much faster
74
        // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
75 411
        if (PHP_VERSION_ID >= 70000) {
76 137
            $result = json_decode($source, $associative, $depth, $options);
77 137
            if (json_last_error() === JSON_ERROR_NONE) {
78 35
                return $result;
79
            }
80
        }
81
82
        // Fall back to JSON5 if that fails
83 376
        $associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY);
84 376
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
85
86 376
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
87
88 376
        $result = $decoder->value();
89 277
        $decoder->white();
90 274
        if ($decoder->currentByte) {
91 18
            $decoder->throwSyntaxError('Syntax error');
92
        }
93
94 256
        return $result;
95
    }
96
97
    /**
98
     * @param int $at
99
     *
100
     * @return null
101
     */
102 376
    private function getByte($at)
103
    {
104 376
        if ($at >= $this->length) {
105 271
            return null;
106
        }
107
108 373
        return $this->json[$at];
109
    }
110
111
    /**
112
     * @return string|null
113
     */
114 33
    private function currentChar()
115
    {
116 33
        if ($this->at >= $this->length) {
117 12
            return null;
118
        }
119
120 21
        return mb_substr(substr($this->json, $this->at, 4), 0, 1);
121
    }
122
123
    /**
124
     * Parse the next character.
125
     *
126
     * @return null|string
127
     */
128 348
    private function next()
129
    {
130
        // Get the next character. When there are no more characters,
131
        // return the empty string.
132 348
        if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
133 258
            $this->lineNumber++;
134 258
            $this->currentLineStartsAt = $this->at + 1;
135 188
        }
136
137 348
        $this->at++;
138
        
139 348
        return $this->currentByte = $this->getByte($this->at);
140
    }
141
142
    /**
143
     * Parse the next character if it matches $c or fail.
144
     *
145
     * @param string $c
146
     *
147
     * @return string|null
148
     */
149 167
    private function nextOrFail($c)
150
    {
151 167
        if ($c !== $this->currentByte) {
152 24
            $this->throwSyntaxError(sprintf(
153 24
                'Expected %s instead of %s',
154 24
                self::renderChar($c),
155 24
                self::renderChar($this->currentChar())
156 16
            ));
157
        }
158
159 167
        return $this->next();
160
    }
161
162
    /**
163
     * Get the next character without consuming it or
164
     * assigning it to the ch variable.
165
     *
166
     * @return mixed
167
     */
168 37
    private function peek()
169
    {
170 37
        return $this->getByte($this->at + 1);
171
    }
172
173
    /**
174
     * Attempt to match a regular expression at the current position on the current line.
175
     *
176
     * This function will not match across multiple lines.
177
     *
178
     * @param string $regex
179
     *
180
     * @return string|null
181
     */
182 195
    private function match($regex)
183
    {
184 195
        $subject = substr($this->json, $this->at);
185
        // Only match on the current line
186 195
        if ($pos = strpos($subject, "\n")) {
187 180
            $subject = substr($subject, 0, $pos);
188 132
        }
189
190 195
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
191 15
            return null;
192
        }
193
194 180
        $this->at += $matches[0][1] + strlen($matches[0][0]);
195 180
        $this->currentByte = $this->getByte($this->at);
196
197 180
        return $matches[0][0];
198
    }
199
200
    /**
201
     * Parse an identifier.
202
     *
203
     * Normally, reserved words are disallowed here, but we
204
     * only use this for unquoted object keys, where reserved words are allowed,
205
     * so we don't check for those here. References:
206
     * - http://es5.github.com/#x7.6
207
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
208
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
209
     */
210 42
    private function identifier()
211
    {
212
        // @codingStandardsIgnoreStart
213
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
214 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
215
        // @codingStandardsIgnoreEnd
216
217 42
        if ($match === null) {
218 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
219
        }
220
221
        // Un-escape escaped Unicode chars
222 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
223 6
            return json_decode('"'.$m[0].'"');
224 33
        }, $match);
225
226 33
        return $unescaped;
227
    }
228
229 200
    private function number()
230
    {
231 200
        $number = null;
232 200
        $sign = '';
233 200
        $string = '';
234 200
        $base = 10;
235
236 200
        if ($this->currentByte === '-' || $this->currentByte === '+') {
237 94
            $sign = $this->currentByte;
238 94
            $this->next();
239 66
        }
240
241
        // support for Infinity
242 200
        if ($this->currentByte === 'I') {
243 9
            $this->word();
244
245 6
            return ($sign === '-') ? -INF : INF;
246
        }
247
248
        // support for NaN
249 191
        if ($this->currentByte === 'N') {
250 3
            $number = $this->word();
251
252
            // ignore sign as -NaN also is NaN
253 3
            return $number;
254
        }
255
256 188
        if ($this->currentByte === '0') {
257 97
            $string .= $this->currentByte;
258 97
            $this->next();
259 97
            if ($this->currentByte === 'x' || $this->currentByte === 'X') {
260 33
                $string .= $this->currentByte;
261 33
                $this->next();
262 33
                $base = 16;
263 86
            } elseif (is_numeric($this->currentByte)) {
264 30
                $this->throwSyntaxError('Octal literal');
265
            }
266 50
        }
267
268
        switch ($base) {
269 158
            case 10:
270
                // @codingStandardsIgnoreStart
271 128 View Code Duplication
                if ((is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
272 117
                    $string .= $match;
273 90
                }
274 128 View Code Duplication
                if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
275 37
                    $string .= $match;
276 30
                }
277
                // @codingStandardsIgnoreEnd
278 128
                $number = $string;
279 128
                break;
280 33
            case 16:
281 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
282 30
                    $string .= $match;
283 30
                    $number = hexdec($string);
284 30
                    break;
285
                }
286 3
                $this->throwSyntaxError('Bad hex number');
287
        }
288
289 155
        if ($sign === '-') {
290 28
            $number = -$number;
291 22
        }
292
293 155
        if (!is_numeric($number) || !is_finite($number)) {
294 3
            $this->throwSyntaxError('Bad number');
295
        }
296
297 152
        if ($this->castBigIntToString) {
298 2
            return $number;
299
        }
300
301
        // Adding 0 will automatically cast this to an int or float
302 150
        return $number + 0;
303
    }
304
305 83
    private function string()
306
    {
307 83
        $string = '';
308
309 83
        $delim = $this->currentByte;
310 83
        $this->next();
311 83
        while ($this->currentByte !== null) {
312 83
            if ($this->currentByte === $delim) {
313 77
                $this->next();
314
315 77
                return $string;
316
            }
317
318 83
            if ($this->currentByte === '\\') {
319 28
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
320 4
                    $string .= json_decode('"'.$unicodeEscaped.'"');
321 4
                    continue;
322
                }
323
324 24
                $this->next();
325 24
                if ($this->currentByte === "\r") {
326 6
                    if ($this->peek() === "\n") {
327 4
                        $this->next();
328 2
                    }
329 22
                } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
330 15
                    $string .= $escapee;
331 10
                } else {
332 10
                    break;
333
                }
334 83
            } elseif ($this->currentByte === "\n") {
335
                // unescaped newlines are invalid; see:
336
                // https://github.com/json5/json5/issues/24
337
                // @todo this feels special-cased; are there other invalid unescaped chars?
338 3
                break;
339
            } else {
340 83
                $string .= $this->currentByte;
341
            }
342
343 83
            $this->next();
344 62
        }
345
346 6
        $this->throwSyntaxError('Bad string');
347
    }
348
349
    /**
350
     * Skip an inline comment, assuming this is one.
351
     *
352
     * The current character should be the second / character in the // pair that begins this inline comment.
353
     * To finish the inline comment, we look for a newline or the end of the text.
354
     */
355 36
    private function inlineComment()
356
    {
357
        do {
358 36
            $this->next();
359 36
            if ($this->currentByte === "\n" || $this->currentByte === "\r") {
360 33
                $this->next();
361
362 33
                return;
363
            }
364 36
        } while ($this->currentByte !== null);
365 3
    }
366
367
    /**
368
     * Skip a block comment, assuming this is one.
369
     *
370
     * The current character should be the * character in the /* pair that begins this block comment.
371
     * To finish the block comment, we look for an ending *​/ pair of characters,
372
     * but we also watch for the end of text before the comment is terminated.
373
     */
374 21
    private function blockComment()
375
    {
376
        do {
377 21
            $this->next();
378 21
            while ($this->currentByte === '*') {
379 18
                $this->nextOrFail('*');
380 18
                if ($this->currentByte === '/') {
381 18
                    $this->nextOrFail('/');
382
383 18
                    return;
384
                }
385 2
            }
386 21
        } while ($this->currentByte !== null);
387
388 3
        $this->throwSyntaxError('Unterminated block comment');
389
    }
390
391
    /**
392
     * Skip a comment, whether inline or block-level, assuming this is one.
393
     */
394 57
    private function comment()
395
    {
396
        // Comments always begin with a / character.
397 57
        $this->nextOrFail('/');
398
399 57
        if ($this->currentByte === '/') {
400 36
            $this->inlineComment();
401 46
        } elseif ($this->currentByte === '*') {
402 21
            $this->blockComment();
403 12
        } else {
404 3
            $this->throwSyntaxError('Unrecognized comment');
405
        }
406 51
    }
407
408
    /**
409
     * Skip whitespace and comments.
410
     *
411
     * Note that we're detecting comments by only a single / character.
412
     * This works since regular expressions are not valid JSON(5), but this will
413
     * break if there are other valid values that begin with a / character!
414
     */
415 376
    private function white()
416
    {
417 376
        while ($this->currentByte !== null) {
418 373
            if ($this->currentByte === '/') {
419 57
                $this->comment();
420 368
            } elseif (preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) {
421 270
                $this->next();
422 364
            } elseif (ord($this->currentByte) === 0xC2 && ord($this->peek()) === 0xA0) {
423
                // Non-breaking space in UTF-8
424 3
                $this->next();
425 3
                $this->next();
426 2
            } else {
427 364
                return;
428
            }
429 202
        }
430 268
    }
431
432
    /**
433
     * Matches true, false, null, etc
434
     */
435 88
    private function word()
436
    {
437 88
        switch ($this->currentByte) {
438 88
            case 't':
439 31
                $this->nextOrFail('t');
440 31
                $this->nextOrFail('r');
441 31
                $this->nextOrFail('u');
442 31
                $this->nextOrFail('e');
443 31
                return true;
444 69
            case 'f':
445 15
                $this->nextOrFail('f');
446 15
                $this->nextOrFail('a');
447 15
                $this->nextOrFail('l');
448 15
                $this->nextOrFail('s');
449 15
                $this->nextOrFail('e');
450 15
                return false;
451 56
            case 'n':
452 17
                $this->nextOrFail('n');
453 17
                $this->nextOrFail('u');
454 17
                $this->nextOrFail('l');
455 17
                $this->nextOrFail('l');
456 17
                return null;
457 39
            case 'I':
458 18
                $this->nextOrFail('I');
459 18
                $this->nextOrFail('n');
460 15
                $this->nextOrFail('f');
461 12
                $this->nextOrFail('i');
462 12
                $this->nextOrFail('n');
463 12
                $this->nextOrFail('i');
464 12
                $this->nextOrFail('t');
465 12
                $this->nextOrFail('y');
466 12
                return INF;
467 21
            case 'N':
468 12
                $this->nextOrFail('N');
469 12
                $this->nextOrFail('a');
470 6
                $this->nextOrFail('N');
471 6
                return NAN;
472 6
        }
473
474 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
475
    }
476
477 37
    private function arr()
478
    {
479 37
        $arr = [];
480
481 37
        if (++$this->depth > $this->maxDepth) {
482 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
483
        }
484
485 37
        $this->nextOrFail('[');
486 37
        $this->white();
487 37
        while ($this->currentByte !== null) {
488 37
            if ($this->currentByte === ']') {
489 11
                $this->nextOrFail(']');
490 11
                $this->depth--;
491 11
                return $arr; // Potentially empty array
492
            }
493
            // ES5 allows omitting elements in arrays, e.g. [,] and
494
            // [,null]. We don't allow this in JSON5.
495 35
            if ($this->currentByte === ',') {
496 6
                $this->throwSyntaxError('Missing array element');
497
            }
498
499 29
            $arr[] = $this->value();
500
501 26
            $this->white();
502
            // If there's no comma after this value, this needs to
503
            // be the end of the array.
504 26
            if ($this->currentByte !== ',') {
505 17
                $this->nextOrFail(']');
506 14
                $this->depth--;
507 14
                return $arr;
508
            }
509 13
            $this->nextOrFail(',');
510 13
            $this->white();
511 10
        }
512
    }
513
514
    /**
515
     * Parse an object value
516
     */
517 84
    private function obj()
518
    {
519 84
        $object = $this->associative ? [] : new \stdClass;
520
521 84
        if (++$this->depth > $this->maxDepth) {
522 3
            $this->throwSyntaxError('Maximum stack depth exceeded');
523
        }
524
525 84
        $this->nextOrFail('{');
526 84
        $this->white();
527 84
        while ($this->currentByte !== null) {
528 84
            if ($this->currentByte === '}') {
529 20
                $this->nextOrFail('}');
530 20
                $this->depth--;
531 20
                return $object; // Potentially empty object
532
            }
533
534
            // Keys can be unquoted. If they are, they need to be
535
            // valid JS identifiers.
536 73
            if ($this->currentByte === '"' || $this->currentByte === "'") {
537 34
                $key = $this->string();
538 26
            } else {
539 42
                $key = $this->identifier();
540
            }
541
542 64
            $this->white();
543 64
            $this->nextOrFail(':');
544 61
            if ($this->associative) {
545 43
                $object[$key] = $this->value();
546 30
            } else {
547 61
                $object->{$key} = $this->value();
548
            }
549 58
            $this->white();
550
            // If there's no comma after this pair, this needs to be
551
            // the end of the object.
552 58
            if ($this->currentByte !== ',') {
553 49
                $this->nextOrFail('}');
554 43
                $this->depth--;
555 43
                return $object;
556
            }
557 19
            $this->nextOrFail(',');
558 19
            $this->white();
559 14
        }
560
    }
561
562
    /**
563
     * Parse a JSON value.
564
     *
565
     * It could be an object, an array, a string, a number,
566
     * or a word.
567
     */
568 376
    private function value()
569
    {
570 376
        $this->white();
571 373
        switch ($this->currentByte) {
572 373
            case '{':
573 84
                return $this->obj();
574 347
            case '[':
575 37
                return $this->arr();
576 336
            case '"':
577 326
            case "'":
578 71
                return $this->string();
579 273
            case '-':
580 263
            case '+':
581 246
            case '.':
582 103
                return $this->number();
583 130
            default:
584 173
                return is_numeric($this->currentByte) ? $this->number() : $this->word();
585 130
        }
586
    }
587
588 120
    private function throwSyntaxError($message)
589
    {
590
        // Calculate the column number
591 120
        $str = substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt);
592 120
        $column = mb_strlen($str) + 1;
593
594 120
        throw new SyntaxError($message, $this->lineNumber, $column);
595
    }
596
597 33
    private static function renderChar($chr)
598
    {
599 33
        return $chr === null ? 'EOF' : "'" . $chr . "'";
600
    }
601
602
    /**
603
     * @param string $ch
604
     *
605
     * @return string|null
606
     */
607 18
    private static function getEscapee($ch)
608
    {
609
        switch ($ch) {
610
            // @codingStandardsIgnoreStart
611 18
            case "'":  return "'";
612 15
            case '"':  return '"';
613 15
            case '\\': return '\\';
614 15
            case '/':  return '/';
615 15
            case "\n": return '';
616 6
            case 'b':  return chr(8);
617 6
            case 'f':  return "\f";
618 6
            case 'n':  return "\n";
619 6
            case 'r':  return "\r";
620 6
            case 't':  return "\t";
621 3
            default:   return null;
622
            // @codingStandardsIgnoreEnd
623 2
        }
624
    }
625
}
626