Completed
Pull Request — master (#2)
by Colin
02:25
created

Json5Decoder::blockComment()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 4.0119

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 4
eloc 10
c 2
b 0
f 0
nc 3
nop 0
dl 0
loc 16
ccs 10
cts 11
cp 0.9091
crap 4.0119
rs 9.2
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $at = 0;
22
23
    private $lineNumber = 1;
24
25
    private $columnNumber = 1;
26
27
    private $ch;
28
29
    private $chArr;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    private $lineCache;
0 ignored issues
show
Unused Code introduced by
The property $lineCache is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 396
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 396
        $this->json = $json;
54 396
        $this->associative = $associative;
55 396
        $this->maxDepth = $depth;
56 396
        $this->castBigIntToString = $castBigIntToString;
57
58 396
        $this->length = mb_strlen($json, 'utf-8');
59
60 396
        $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
61 396
        $this->ch = $this->charAt(0);
62 396
    }
63
64
    /**
65
     * Takes a JSON encoded string and converts it into a PHP variable.
66
     *
67
     * The parameters exactly match PHP's json_decode() function - see
68
     * http://php.net/manual/en/function.json-decode.php for more information.
69
     *
70
     * @param string $source      The JSON string being decoded.
71
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
72
     * @param int    $depth       User specified recursion depth.
73
     * @param int    $options     Bitmask of JSON decode options.
74
     *
75
     * @return mixed
76
     */
77 396
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
78
    {
79 396
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
80 396
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
81
82 396
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
83
84 396
        $result = $decoder->value();
85 303
        $decoder->white();
86 300
        if ($decoder->ch) {
87 18
            $decoder->throwSyntaxError('Syntax error');
88
        }
89
90 282
        return $result;
91
    }
92
93
    /**
94
     * @param int $at
95
     *
96
     * @return string|null
97
     */
98 396
    private function charAt($at)
99
    {
100 396
        if ($at < 0 || $at >= $this->length) {
101 297
            return null;
102
        }
103
104 393
        return $this->chArr[$at];
105
    }
106
107
    /**
108
     * Parse the next character.
109
     *
110
     * If $c is given, the next char will only be parsed if the current
111
     * one matches $c.
112
     *
113
     * @param string|null $c
114
     *
115
     * @return null|string
116
     */
117 366
    private function next($c = null)
118
    {
119
        // If a c parameter is provided, verify that it matches the current character.
120 366
        if ($c !== null && $c !== $this->ch) {
121 18
            $this->throwSyntaxError(sprintf(
122 18
                'Expected %s instead of %s',
123 18
                self::renderChar($c),
124 18
                self::renderChar($this->ch)
125 18
            ));
126
        }
127
128
        // Get the next character. When there are no more characters,
129
        // return the empty string.
130 366
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
131 282
            $this->at++;
132 282
            $this->lineNumber++;
133 282
            $this->columnNumber = 1;
134 282
        } else {
135 327
            $this->at++;
136 327
            $this->columnNumber++;
137
        }
138
139 366
        $this->ch = $this->charAt($this->at);
140
141 366
        return $this->ch;
142
    }
143
144
    /**
145
     * Get the next character without consuming it or
146
     * assigning it to the ch variable.
147
     *
148
     * @return mixed
149
     */
150 12
    private function peek()
151
    {
152 12
        return $this->charAt($this->at + 1);
153
    }
154
155
    /**
156
     * Attempt to match a regular expression at the current position on the current line.
157
     *
158
     * This function will not match across multiple lines.
159
     *
160
     * @param string $regex
161
     *
162
     * @return string|null
163
     */
164 231
    private function match($regex)
165
    {
166 231
        $subject = mb_substr($this->json, $this->at);
167
168 231
        $matches = [];
169 231
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
170 132
            return null;
171
        }
172
173
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
174 207
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
175
176
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
177
        // [0][1] contains the index of that match
178 207
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
179
180 207
        $this->at += $advanceBy;
181 207
        $this->columnNumber += $advanceBy;
182 207
        $this->ch = $this->charAt($this->at);
183
184 207
        return $matches[0][0];
185
    }
186
187
    /**
188
     * Parse an identifier.
189
     *
190
     * Normally, reserved words are disallowed here, but we
191
     * only use this for unquoted object keys, where reserved words are allowed,
192
     * so we don't check for those here. References:
193
     * - http://es5.github.com/#x7.6
194
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
195
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
196
     */
197 42
    private function identifier()
198
    {
199
        // @codingStandardsIgnoreStart
200
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
201 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
202
        // @codingStandardsIgnoreEnd
203
204 42
        if ($match === null) {
205 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
206
        }
207
208
        // Un-escape escaped Unicode chars
209 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
210 6
            return json_decode('"'.$m[0].'"');
211 33
        }, $match);
212
213 33
        return $unescaped;
214
    }
215
216 210
    private function number()
217
    {
218 210
        $number = null;
219 210
        $sign = '';
220 210
        $string = '';
221 210
        $base = 10;
222
223 210
        if ($this->ch === '-' || $this->ch === '+') {
224 93
            $sign = $this->ch;
225 93
            $this->next($this->ch);
226 93
        }
227
228
        // support for Infinity
229 210
        if ($this->ch === 'I') {
230 6
            $number = $this->word();
231 6
            if ($number === null) {
232
                $this->throwSyntaxError('Unexpected word for number');
233
            }
234
235 6
            return ($sign === '-') ? -INF : INF;
236
        }
237
238
        // support for NaN
239 204
        if ($this->ch === 'N') {
240
            $number = $this->word();
241
            if ($number !== NAN) {
242
                $this->throwSyntaxError('expected word to be NaN');
243
            }
244
245
            // ignore sign as -NaN also is NaN
246
            return $number;
247
        }
248
249 204
        if ($this->ch === '0') {
250 105
            $string .= $this->ch;
251 105
            $this->next();
252 105
            if ($this->ch === 'x' || $this->ch === 'X') {
253 33
                $string .= $this->ch;
254 33
                $this->next();
255 33
                $base = 16;
256 105
            } elseif (is_numeric($this->ch)) {
257 30
                $this->throwSyntaxError('Octal literal');
258
            }
259 75
        }
260
261
        switch ($base) {
262 174
            case 10:
263 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
264 144
                    $string .= $match;
265 144
                }
266 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
267 45
                    $string .= $match;
268 45
                }
269 144
                $number = $string;
270 144
                break;
271 33
            case 16:
272 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
273 30
                    $string .= $match;
274 30
                    $number = hexdec($string);
275 30
                    break;
276
                }
277 3
                $this->throwSyntaxError('Bad hex number');
278
        }
279
280 171
        if ($sign === '-') {
281 33
            $number = -$number;
282 33
        }
283
284 171
        if (!is_numeric($number) || !is_finite($number)) {
285 3
            $this->throwSyntaxError('Bad number');
286
        }
287
288 168
        if ($this->castBigIntToString) {
289 3
            return $number;
290
        }
291
292
        // Adding 0 will automatically cast this to an int or float
293 165
        return $number + 0;
294
    }
295
296 93
    private function string()
297
    {
298 93
        $string = '';
299
300 93
        $delim = $this->ch;
301 93
        $this->next();
302 93
        while ($this->ch !== null) {
303 93
            if ($this->ch === $delim) {
304 87
                $this->next();
305
306 87
                return $string;
307
            }
308
309 93
            if ($this->ch === '\\') {
310 30
                if ($unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
311 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
312 6
                    continue;
313
                }
314
315 24
                $this->next();
316 24
                if ($this->ch === "\r") {
317 6
                    if ($this->peek() === "\n") {
318 3
                        $this->next();
319 3
                    }
320 24
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
321 15
                    $string .= $escapee;
322 15
                } else {
323 3
                    break;
324
                }
325 93
            } elseif ($this->ch === "\n") {
326
                // unescaped newlines are invalid; see:
327
                // https://github.com/json5/json5/issues/24
328
                // @todo this feels special-cased; are there other invalid unescaped chars?
329 3
                break;
330
            } else {
331 93
                $string .= $this->ch;
332
            }
333
334 93
            $this->next();
335 93
        }
336
337 6
        $this->throwSyntaxError('Bad string');
338
    }
339
340
    /**
341
     * Skip an inline comment, assuming this is one.
342
     *
343
     * The current character should be the second / character in the // pair that begins this inline comment.
344
     * To finish the inline comment, we look for a newline or the end of the text.
345
     */
346 36
    private function inlineComment()
347
    {
348
        do {
349 36
            $this->next();
350 36
            if ($this->ch === "\n" || $this->ch === "\r") {
351 33
                $this->next();
352
353 33
                return;
354
            }
355 36
        } while ($this->ch !== null);
356 3
    }
357
358
    /**
359
     * Skip a block comment, assuming this is one.
360
     *
361
     * The current character should be the * character in the /* pair that begins this block comment.
362
     * To finish the block comment, we look for an ending *​/ pair of characters,
363
     * but we also watch for the end of text before the comment is terminated.
364
     */
365 21
    private function blockComment()
366
    {
367
        do {
368 21
            $this->next();
369 21
            while ($this->ch === '*') {
370 18
                $this->next('*');
371 18
                if ($this->ch === '/') {
372 18
                    $this->next('/');
373
374 18
                    return;
375
                }
376 3
            }
377 21
        } while ($this->ch !== null);
378
379 3
        $this->throwSyntaxError('Unterminated block comment');
380
    }
381
382
    /**
383
     * Skip a comment, whether inline or block-level, assuming this is one.
384
     */
385 57
    private function comment()
386
    {
387
        // Comments always begin with a / character.
388 57
        $this->next('/');
389
390 57
        if ($this->ch === '/') {
391 36
            $this->inlineComment();
392 57
        } elseif ($this->ch === '*') {
393 21
            $this->blockComment();
394 18
        } else {
395 3
            $this->throwSyntaxError('Unrecognized comment');
396
        }
397 51
    }
398
399
    /**
400
     * Skip whitespace and comments.
401
     *
402
     * Note that we're detecting comments by only a single / character.
403
     * This works since regular expressions are not valid JSON(5), but this will
404
     * break if there are other valid values that begin with a / character!
405
     */
406 396
    private function white()
407
    {
408 396
        while ($this->ch !== null) {
409 393
            if ($this->ch === '/') {
410 57
                $this->comment();
411 390
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->ch) === 1) {
412 291
                $this->next();
413 291
            } else {
414 384
                return;
415
            }
416 297
        }
417 294
    }
418
419
    /**
420
     * Matches true, false, null, etc
421
     */
422 84
    private function word()
423
    {
424 84
        switch ($this->ch) {
425 84
            case 't':
426 36
                $this->next('t');
427 36
                $this->next('r');
428 36
                $this->next('u');
429 36
                $this->next('e');
430 36
                return true;
431 63
            case 'f':
432 18
                $this->next('f');
433 18
                $this->next('a');
434 18
                $this->next('l');
435 18
                $this->next('s');
436 18
                $this->next('e');
437 18
                return false;
438 48
            case 'n':
439 18
                $this->next('n');
440 18
                $this->next('u');
441 18
                $this->next('l');
442 18
                $this->next('l');
443 18
                return null;
444 30
            case 'I':
445 15
                $this->next('I');
446 15
                $this->next('n');
447 12
                $this->next('f');
448 12
                $this->next('i');
449 12
                $this->next('n');
450 12
                $this->next('i');
451 12
                $this->next('t');
452 12
                $this->next('y');
453 12
                return INF;
454 15
            case 'N':
455 6
                $this->next('N');
456 6
                $this->next('a');
457 3
                $this->next('N');
458 3
                return NAN;
459 9
        }
460
461 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
462
    }
463
464 42
    private function arr()
465
    {
466 42
        $arr = [];
467
468 42
        if ($this->ch === '[') {
469 42
            if (++$this->depth > $this->maxDepth) {
470 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
471
            }
472
473 42
            $this->next('[');
474 42
            $this->white();
475 42
            while ($this->ch !== null) {
476 42
                if ($this->ch === ']') {
477 12
                    $this->next(']');
478 12
                    $this->depth--;
479 12
                    return $arr; // Potentially empty array
480
                }
481
                // ES5 allows omitting elements in arrays, e.g. [,] and
482
                // [,null]. We don't allow this in JSON5.
483 39
                if ($this->ch === ',') {
484 6
                    $this->throwSyntaxError('Missing array element');
485
                }
486
487 33
                $arr[] = $this->value();
488
489 30
                $this->white();
490
                // If there's no comma after this value, this needs to
491
                // be the end of the array.
492 30
                if ($this->ch !== ',') {
493 21
                    $this->next(']');
494 18
                    $this->depth--;
495 18
                    return $arr;
496
                }
497 15
                $this->next(',');
498 15
                $this->white();
499 15
            }
500
        }
501
502
        $this->throwSyntaxError('Bad array');
503
    }
504
505
    /**
506
     * Parse an object value
507
     */
508 90
    private function obj()
509
    {
510 90
        $object = $this->associative ? [] : new \stdClass;
511
512 90
        if ($this->ch === '{') {
513 90
            if (++$this->depth > $this->maxDepth) {
514 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
515
            }
516
517 90
            $this->next('{');
518 90
            $this->white();
519 90
            while ($this->ch) {
520 90
                if ($this->ch === '}') {
521 21
                    $this->next('}');
522 21
                    $this->depth--;
523 21
                    return $object; // Potentially empty object
524
                }
525
526
                // Keys can be unquoted. If they are, they need to be
527
                // valid JS identifiers.
528 78
                if ($this->ch === '"' || $this->ch === "'") {
529 39
                    $key = $this->string();
530 39
                } else {
531 42
                    $key = $this->identifier();
532
                }
533
534 69
                $this->white();
535 69
                $this->next(':');
536 66
                if ($this->associative) {
537 48
                    $object[$key] = $this->value();
538 48
                } else {
539 63
                    $object->{$key} = $this->value();
540
                }
541 63
                $this->white();
542
                // If there's no comma after this pair, this needs to be
543
                // the end of the object.
544 63
                if ($this->ch !== ',') {
545 54
                    $this->next('}');
546 48
                    $this->depth--;
547 48
                    return $object;
548
                }
549 21
                $this->next(',');
550 21
                $this->white();
551 21
            }
552
        }
553
554
        $this->throwSyntaxError('Bad object');
555
    }
556
557
    /**
558
     * Parse a JSON value.
559
     *
560
     * It could be an object, an array, a string, a number,
561
     * or a word.
562
     */
563 396
    private function value()
564
    {
565 396
        $this->white();
566 393
        switch ($this->ch) {
567 393
            case '{':
568 90
                return $this->obj();
569 366
            case '[':
570 42
                return $this->arr();
571 354
            case '"':
572 354
            case "'":
573 78
                return $this->string();
574 285
            case '-':
575 285
            case '+':
576 285
            case '.':
577 102
                return $this->number();
578 186
            default:
579 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
580 186
        }
581
    }
582
583 114
    private function throwSyntaxError($message)
584
    {
585 114
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
586
    }
587
588 27
    private static function renderChar($chr)
589
    {
590 27
        return $chr === null ? 'EOF' : "'" . $chr . "'";
591
    }
592
593
    /**
594
     * @param string $ch
595
     *
596
     * @return string|null
597
     */
598 18
    private static function getEscapee($ch)
599
    {
600
        switch ($ch) {
601
            // @codingStandardsIgnoreStart
602 18
            case "'":  return "'";
603 15
            case '"':  return '"';
604 15
            case '\\': return '\\';
605 15
            case '/':  return '/';
606 15
            case "\n": return '';
607 6
            case 'b':  return chr(8);
608 6
            case 'f':  return "\f";
609 6
            case 'n':  return "\n";
610 6
            case 'r':  return "\r";
611 6
            case 't':  return "\t";
612 3
            default:   return null;
613
            // @codingStandardsIgnoreEnd
614 3
        }
615
    }
616
}
617