Completed
Pull Request — master (#2)
by Colin
01:24
created

Json5Decoder::blockComment()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 4.0119

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 4
eloc 10
c 2
b 0
f 0
nc 3
nop 0
dl 0
loc 16
ccs 10
cts 11
cp 0.9091
crap 4.0119
rs 9.2
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $at = 0;
20
21
    private $lineNumber = 1;
22
23
    private $columnNumber = 1;
24
25
    private $ch;
26
27
    private $chArr;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $length;
38
39
    private $remainderCache;
40
41
    private $remainderCacheAt;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 396
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 396
        $this->associative = $associative;
54 396
        $this->maxDepth = $depth;
55 396
        $this->castBigIntToString = $castBigIntToString;
56
57 396
        $this->length = mb_strlen($json, 'utf-8');
58
59 396
        $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
60 396
        $this->ch = $this->charAt(0);
61
62 396
        $this->remainderCache = $json;
63 396
        $this->remainderCacheAt = 0;
64 396
    }
65
66
    /**
67
     * Takes a JSON encoded string and converts it into a PHP variable.
68
     *
69
     * The parameters exactly match PHP's json_decode() function - see
70
     * http://php.net/manual/en/function.json-decode.php for more information.
71
     *
72
     * @param string $source      The JSON string being decoded.
73
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
74
     * @param int    $depth       User specified recursion depth.
75
     * @param int    $options     Bitmask of JSON decode options.
76
     *
77
     * @return mixed
78
     */
79 396
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
80
    {
81 396
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
82 396
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
83
84 396
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
85
86 396
        $result = $decoder->value();
87 303
        $decoder->white();
88 300
        if ($decoder->ch) {
89 18
            $decoder->throwSyntaxError('Syntax error');
90
        }
91
92 282
        return $result;
93
    }
94
95
    /**
96
     * @param int $at
97
     *
98
     * @return string|null
99
     */
100 396
    private function charAt($at)
101
    {
102 396
        if ($at < 0 || $at >= $this->length) {
103 297
            return null;
104
        }
105
106 393
        return $this->chArr[$at];
107
    }
108
109
    /**
110
     * Parse the next character.
111
     *
112
     * If $c is given, the next char will only be parsed if the current
113
     * one matches $c.
114
     *
115
     * @param string|null $c
116
     *
117
     * @return null|string
118
     */
119 366
    private function next($c = null)
120
    {
121
        // If a c parameter is provided, verify that it matches the current character.
122 366
        if ($c !== null && $c !== $this->ch) {
123 18
            $this->throwSyntaxError(sprintf(
124 18
                'Expected %s instead of %s',
125 18
                self::renderChar($c),
126 18
                self::renderChar($this->ch)
127 12
            ));
128
        }
129
130
        // Get the next character. When there are no more characters,
131
        // return the empty string.
132 366
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
133 282
            $this->at++;
134 282
            $this->lineNumber++;
135 282
            $this->columnNumber = 1;
136 188
        } else {
137 327
            $this->at++;
138 327
            $this->columnNumber++;
139
        }
140
141 366
        $this->ch = $this->charAt($this->at);
142
143 366
        return $this->ch;
144
    }
145
146
    /**
147
     * Get the next character without consuming it or
148
     * assigning it to the ch variable.
149
     *
150
     * @return mixed
151
     */
152 12
    private function peek()
153
    {
154 12
        return $this->charAt($this->at + 1);
155
    }
156
157
    /**
158
     * Attempt to match a regular expression at the current position on the current line.
159
     *
160
     * This function will not match across multiple lines.
161
     *
162
     * @param string $regex
163
     *
164
     * @return string|null
165
     */
166 231
    private function match($regex)
167
    {
168 231
        $subject = $this->getRemainder();
169
170 231
        $matches = [];
171 231
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
172 132
            return null;
173
        }
174
175
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
176 207
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
177
178
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
179
        // [0][1] contains the index of that match
180 207
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
181
182 207
        $this->at += $advanceBy;
183 207
        $this->columnNumber += $advanceBy;
184 207
        $this->ch = $this->charAt($this->at);
185
186 207
        return $matches[0][0];
187
    }
188
189
    /**
190
     * Parse an identifier.
191
     *
192
     * Normally, reserved words are disallowed here, but we
193
     * only use this for unquoted object keys, where reserved words are allowed,
194
     * so we don't check for those here. References:
195
     * - http://es5.github.com/#x7.6
196
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
197
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
198
     */
199 42
    private function identifier()
200
    {
201
        // @codingStandardsIgnoreStart
202
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
203 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
204
        // @codingStandardsIgnoreEnd
205
206 42
        if ($match === null) {
207 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
208
        }
209
210
        // Un-escape escaped Unicode chars
211 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
212 6
            return json_decode('"'.$m[0].'"');
213 33
        }, $match);
214
215 33
        return $unescaped;
216
    }
217
218 210
    private function number()
219
    {
220 210
        $number = null;
221 210
        $sign = '';
222 210
        $string = '';
223 210
        $base = 10;
224
225 210
        if ($this->ch === '-' || $this->ch === '+') {
226 93
            $sign = $this->ch;
227 93
            $this->next($this->ch);
228 62
        }
229
230
        // support for Infinity
231 210
        if ($this->ch === 'I') {
232 6
            $number = $this->word();
233 6
            if ($number === null) {
234
                $this->throwSyntaxError('Unexpected word for number');
235
            }
236
237 6
            return ($sign === '-') ? -INF : INF;
238
        }
239
240
        // support for NaN
241 204
        if ($this->ch === 'N') {
242
            $number = $this->word();
243
            if ($number !== NAN) {
244
                $this->throwSyntaxError('expected word to be NaN');
245
            }
246
247
            // ignore sign as -NaN also is NaN
248
            return $number;
249
        }
250
251 204
        if ($this->ch === '0') {
252 105
            $string .= $this->ch;
253 105
            $this->next();
254 105
            if ($this->ch === 'x' || $this->ch === 'X') {
255 33
                $string .= $this->ch;
256 33
                $this->next();
257 33
                $base = 16;
258 94
            } elseif (is_numeric($this->ch)) {
259 30
                $this->throwSyntaxError('Octal literal');
260
            }
261 50
        }
262
263
        switch ($base) {
264 174
            case 10:
265 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
266 144
                    $string .= $match;
267 96
                }
268 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
269 45
                    $string .= $match;
270 30
                }
271 144
                $number = $string;
272 144
                break;
273 33
            case 16:
274 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
275 30
                    $string .= $match;
276 30
                    $number = hexdec($string);
277 30
                    break;
278
                }
279 3
                $this->throwSyntaxError('Bad hex number');
280
        }
281
282 171
        if ($sign === '-') {
283 33
            $number = -$number;
284 22
        }
285
286 171
        if (!is_numeric($number) || !is_finite($number)) {
287 3
            $this->throwSyntaxError('Bad number');
288
        }
289
290 168
        if ($this->castBigIntToString) {
291 3
            return $number;
292
        }
293
294
        // Adding 0 will automatically cast this to an int or float
295 165
        return $number + 0;
296
    }
297
298 93
    private function string()
299
    {
300 93
        $string = '';
301
302 93
        $delim = $this->ch;
303 93
        $this->next();
304 93
        while ($this->ch !== null) {
305 93
            if ($this->ch === $delim) {
306 87
                $this->next();
307
308 87
                return $string;
309
            }
310
311 93
            if ($this->ch === '\\') {
312 30
                if ($unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
313 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
314 6
                    continue;
315
                }
316
317 24
                $this->next();
318 24
                if ($this->ch === "\r") {
319 6
                    if ($this->peek() === "\n") {
320 4
                        $this->next();
321 2
                    }
322 22
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
323 15
                    $string .= $escapee;
324 10
                } else {
325 10
                    break;
326
                }
327 93
            } elseif ($this->ch === "\n") {
328
                // unescaped newlines are invalid; see:
329
                // https://github.com/json5/json5/issues/24
330
                // @todo this feels special-cased; are there other invalid unescaped chars?
331 3
                break;
332
            } else {
333 93
                $string .= $this->ch;
334
            }
335
336 93
            $this->next();
337 62
        }
338
339 6
        $this->throwSyntaxError('Bad string');
340
    }
341
342
    /**
343
     * Skip an inline comment, assuming this is one.
344
     *
345
     * The current character should be the second / character in the // pair that begins this inline comment.
346
     * To finish the inline comment, we look for a newline or the end of the text.
347
     */
348 36
    private function inlineComment()
349
    {
350
        do {
351 36
            $this->next();
352 36
            if ($this->ch === "\n" || $this->ch === "\r") {
353 33
                $this->next();
354
355 33
                return;
356
            }
357 36
        } while ($this->ch !== null);
358 3
    }
359
360
    /**
361
     * Skip a block comment, assuming this is one.
362
     *
363
     * The current character should be the * character in the /* pair that begins this block comment.
364
     * To finish the block comment, we look for an ending *​/ pair of characters,
365
     * but we also watch for the end of text before the comment is terminated.
366
     */
367 21
    private function blockComment()
368
    {
369
        do {
370 21
            $this->next();
371 21
            while ($this->ch === '*') {
372 18
                $this->next('*');
373 18
                if ($this->ch === '/') {
374 18
                    $this->next('/');
375
376 18
                    return;
377
                }
378 2
            }
379 21
        } while ($this->ch !== null);
380
381 3
        $this->throwSyntaxError('Unterminated block comment');
382
    }
383
384
    /**
385
     * Skip a comment, whether inline or block-level, assuming this is one.
386
     */
387 57
    private function comment()
388
    {
389
        // Comments always begin with a / character.
390 57
        $this->next('/');
391
392 57
        if ($this->ch === '/') {
393 36
            $this->inlineComment();
394 46
        } elseif ($this->ch === '*') {
395 21
            $this->blockComment();
396 12
        } else {
397 3
            $this->throwSyntaxError('Unrecognized comment');
398
        }
399 51
    }
400
401
    /**
402
     * Skip whitespace and comments.
403
     *
404
     * Note that we're detecting comments by only a single / character.
405
     * This works since regular expressions are not valid JSON(5), but this will
406
     * break if there are other valid values that begin with a / character!
407
     */
408 396
    private function white()
409
    {
410 396
        while ($this->ch !== null) {
411 393
            if ($this->ch === '/') {
412 57
                $this->comment();
413 388
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->ch) === 1) {
414 291
                $this->next();
415 194
            } else {
416 384
                return;
417
            }
418 198
        }
419 294
    }
420
421
    /**
422
     * Matches true, false, null, etc
423
     */
424 84
    private function word()
425
    {
426 84
        switch ($this->ch) {
427 84
            case 't':
428 36
                $this->next('t');
429 36
                $this->next('r');
430 36
                $this->next('u');
431 36
                $this->next('e');
432 36
                return true;
433 63
            case 'f':
434 18
                $this->next('f');
435 18
                $this->next('a');
436 18
                $this->next('l');
437 18
                $this->next('s');
438 18
                $this->next('e');
439 18
                return false;
440 48
            case 'n':
441 18
                $this->next('n');
442 18
                $this->next('u');
443 18
                $this->next('l');
444 18
                $this->next('l');
445 18
                return null;
446 30
            case 'I':
447 15
                $this->next('I');
448 15
                $this->next('n');
449 12
                $this->next('f');
450 12
                $this->next('i');
451 12
                $this->next('n');
452 12
                $this->next('i');
453 12
                $this->next('t');
454 12
                $this->next('y');
455 12
                return INF;
456 15
            case 'N':
457 6
                $this->next('N');
458 6
                $this->next('a');
459 3
                $this->next('N');
460 3
                return NAN;
461 6
        }
462
463 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
464
    }
465
466 42
    private function arr()
467
    {
468 42
        $arr = [];
469
470 42
        if ($this->ch === '[') {
471 42
            if (++$this->depth > $this->maxDepth) {
472 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
473
            }
474
475 42
            $this->next('[');
476 42
            $this->white();
477 42
            while ($this->ch !== null) {
478 42
                if ($this->ch === ']') {
479 12
                    $this->next(']');
480 12
                    $this->depth--;
481 12
                    return $arr; // Potentially empty array
482
                }
483
                // ES5 allows omitting elements in arrays, e.g. [,] and
484
                // [,null]. We don't allow this in JSON5.
485 39
                if ($this->ch === ',') {
486 6
                    $this->throwSyntaxError('Missing array element');
487
                }
488
489 33
                $arr[] = $this->value();
490
491 30
                $this->white();
492
                // If there's no comma after this value, this needs to
493
                // be the end of the array.
494 30
                if ($this->ch !== ',') {
495 21
                    $this->next(']');
496 18
                    $this->depth--;
497 18
                    return $arr;
498
                }
499 15
                $this->next(',');
500 15
                $this->white();
501 10
            }
502
        }
503
504
        $this->throwSyntaxError('Bad array');
505
    }
506
507
    /**
508
     * Parse an object value
509
     */
510 90
    private function obj()
511
    {
512 90
        $object = $this->associative ? [] : new \stdClass;
513
514 90
        if ($this->ch === '{') {
515 90
            if (++$this->depth > $this->maxDepth) {
516 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
517
            }
518
519 90
            $this->next('{');
520 90
            $this->white();
521 90
            while ($this->ch) {
522 90
                if ($this->ch === '}') {
523 21
                    $this->next('}');
524 21
                    $this->depth--;
525 21
                    return $object; // Potentially empty object
526
                }
527
528
                // Keys can be unquoted. If they are, they need to be
529
                // valid JS identifiers.
530 78
                if ($this->ch === '"' || $this->ch === "'") {
531 39
                    $key = $this->string();
532 26
                } else {
533 42
                    $key = $this->identifier();
534
                }
535
536 69
                $this->white();
537 69
                $this->next(':');
538 66
                if ($this->associative) {
539 48
                    $object[$key] = $this->value();
540 32
                } else {
541 63
                    $object->{$key} = $this->value();
542
                }
543 63
                $this->white();
544
                // If there's no comma after this pair, this needs to be
545
                // the end of the object.
546 63
                if ($this->ch !== ',') {
547 54
                    $this->next('}');
548 48
                    $this->depth--;
549 48
                    return $object;
550
                }
551 21
                $this->next(',');
552 21
                $this->white();
553 14
            }
554
        }
555
556
        $this->throwSyntaxError('Bad object');
557
    }
558
559
    /**
560
     * Parse a JSON value.
561
     *
562
     * It could be an object, an array, a string, a number,
563
     * or a word.
564
     */
565 396
    private function value()
566
    {
567 396
        $this->white();
568 393
        switch ($this->ch) {
569 393
            case '{':
570 90
                return $this->obj();
571 366
            case '[':
572 42
                return $this->arr();
573 354
            case '"':
574 338
            case "'":
575 78
                return $this->string();
576 285
            case '-':
577 270
            case '+':
578 255
            case '.':
579 102
                return $this->number();
580 124
            default:
581 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
582 124
        }
583
    }
584
585 114
    private function throwSyntaxError($message)
586
    {
587 114
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
588
    }
589
590 27
    private static function renderChar($chr)
591
    {
592 27
        return $chr === null ? 'EOF' : "'" . $chr . "'";
593
    }
594
595
    /**
596
     * @param string $ch
597
     *
598
     * @return string|null
599
     */
600 18
    private static function getEscapee($ch)
601
    {
602
        switch ($ch) {
603
            // @codingStandardsIgnoreStart
604 18
            case "'":  return "'";
605 15
            case '"':  return '"';
606 15
            case '\\': return '\\';
607 15
            case '/':  return '/';
608 15
            case "\n": return '';
609 6
            case 'b':  return chr(8);
610 6
            case 'f':  return "\f";
611 6
            case 'n':  return "\n";
612 6
            case 'r':  return "\r";
613 6
            case 't':  return "\t";
614 3
            default:   return null;
615
            // @codingStandardsIgnoreEnd
616 2
        }
617
    }
618
619
    /**
620
     * Returns everything from $this->at onwards.
621
     *
622
     * Utilizes a cache so we don't have to continuously parse through UTF-8
623
     * data that was earlier in the string which we don't even care about.
624
     *
625
     * @return string
626
     */
627 231
    private function getRemainder()
628
    {
629 231
        if ($this->remainderCacheAt === $this->at) {
630 81
            return $this->remainderCache;
631
        }
632
633 231
        $subject = mb_substr($this->remainderCache, $this->at - $this->remainderCacheAt);
634 231
        $this->remainderCache = $subject;
635 231
        $this->remainderCacheAt = $this->at;
636
637 231
        return $subject;
638
    }
639
}
640