Completed
Push — master ( caaa12...341b80 )
by Colin
01:09
created

Json5Decoder::blockComment()   B

Complexity

Conditions 5
Paths 6

Size

Total Lines 20
Code Lines 12

Duplication

Lines 20
Ratio 100 %

Code Coverage

Tests 11
CRAP Score 5.246

Importance

Changes 0
Metric Value
cc 5
eloc 12
nc 6
nop 0
dl 20
loc 20
ccs 11
cts 14
cp 0.7856
crap 5.246
rs 8.8571
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u';
20
21
    private $json;
22
23
    private $at = 0;
24
25
    private $lineNumber = 1;
26
27
    private $columnNumber = 1;
28
29
    private $ch;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    private $lineCache;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 360
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 360
        $this->json = $json;
54 360
        $this->associative = $associative;
55 360
        $this->maxDepth = $depth;
56 360
        $this->castBigIntToString = $castBigIntToString;
57
58 360
        $this->length = mb_strlen($json, 'utf-8');
59
60 360
        $this->ch = $this->charAt(0);
61 360
    }
62
63
    /**
64
     * Takes a JSON encoded string and converts it into a PHP variable.
65
     *
66
     * The parameters exactly match PHP's json_decode() function - see
67
     * http://php.net/manual/en/function.json-decode.php for more information.
68
     *
69
     * @param string $source      The JSON string being decoded.
70
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
71
     * @param int    $depth       User specified recursion depth.
72
     * @param int    $options     Bitmask of JSON decode options.
73
     *
74
     * @return mixed
75
     */
76 360
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
77
    {
78 360
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
79 360
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
80
81 360
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
82
83 360
        $result = $decoder->value();
84 285
        $decoder->white();
85 282
        if ($decoder->ch) {
86 18
            $decoder->throwSyntaxError('Syntax error');
87
        }
88
89 264
        return $result;
90
    }
91
92
    /**
93
     * @param int $at
94
     *
95
     * @return string|null
96
     */
97 360
    private function charAt($at)
98
    {
99 360
        if ($at < 0 || $at >= $this->length) {
100 276
            return null;
101
        }
102
103 357
        return mb_substr($this->json, $at, 1, 'utf-8');
104
    }
105
106
    /**
107
     * Parse the next character.
108
     *
109
     * If $c is given, the next char will only be parsed if the current
110
     * one matches $c.
111
     *
112
     * @param string|null $c
113
     *
114
     * @return null|string
115
     */
116 330
    private function next($c = null)
117
    {
118
        // If a c parameter is provided, verify that it matches the current character.
119 330
        if ($c !== null && $c !== $this->ch) {
120 9
            $this->throwSyntaxError(sprintf(
121 9
                'Expected %s instead of %s',
122 9
                self::renderChar($c),
123 9
                self::renderChar($this->ch)
124 9
            ));
125
        }
126
127
        // Get the next character. When there are no more characters,
128
        // return the empty string.
129 330
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
130 264
            $this->at++;
131 264
            $this->lineNumber++;
132 264
            $this->columnNumber = 1;
133 264
        } else {
134 291
            $this->at++;
135 291
            $this->columnNumber++;
136
        }
137
138 330
        $this->ch = $this->charAt($this->at);
139
140 330
        return $this->ch;
141
    }
142
143
    /**
144
     * Get the next character without consuming it or
145
     * assigning it to the ch variable.
146
     *
147
     * @return mixed
148
     */
149 12
    private function peek()
150
    {
151 12
        return $this->charAt($this->at + 1);
152
    }
153
154
    /**
155
     * @return string
156
     */
157 210
    private function getLineRemainder()
158
    {
159
        // Line are separated by "\n" or "\r" without an "\n" next
160 210
        if ($this->lineCache === null) {
161 210
            $this->lineCache = preg_split('/\n|\r\n?/u', $this->json);
162 210
        }
163
164 210
        $line = $this->lineCache[$this->lineNumber - 1];
165
166 210
        return mb_substr($line, $this->columnNumber - 1);
167
    }
168
169
    /**
170
     * Attempt to match a regular expression at the current position on the current line.
171
     *
172
     * This function will not match across multiple lines.
173
     *
174
     * @param string $regex
175
     *
176
     * @return string|null
177
     */
178 210
    private function match($regex)
179
    {
180 210
        $subject = $this->getLineRemainder();
181
182 210
        $matches = array();
183 210
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
184 111
            return null;
185
        }
186
187
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
188 198
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
189
190
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
191
        // [0][1] contains the index of that match
192 198
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
193
194 198
        $this->at += $advanceBy;
195 198
        $this->columnNumber += $advanceBy;
196 198
        $this->ch = $this->charAt($this->at);
197
198 198
        return $matches[0][0];
199
    }
200
201
    /**
202
     * Parse an identifier.
203
     *
204
     * Normally, reserved words are disallowed here, but we
205
     * only use this for unquoted object keys, where reserved words are allowed,
206
     * so we don't check for those here. References:
207
     * - http://es5.github.com/#x7.6
208
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
209
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
210
     */
211 39
    private function identifier()
212
    {
213
        // @codingStandardsIgnoreStart
214
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
215 39
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
216
        // @codingStandardsIgnoreEnd
217
218 39
        if ($match === null) {
219 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
220
        }
221
222
        // Un-escape escaped Unicode chars
223 30
        $unescaped = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', function ($m) {
224 3
            return Json5Decoder::fromCharCode($m[1]);
225 30
        }, $match);
226
227 30
        return $unescaped;
228
    }
229
230 210
    private function number()
231
    {
232 210
        $number = null;
233 210
        $sign = '';
234 210
        $string = '';
235 210
        $base = 10;
236
237 210
        if ($this->ch === '-' || $this->ch === '+') {
238 93
            $sign = $this->ch;
239 93
            $this->next($this->ch);
240 93
        }
241
242
        // support for Infinity
243 210
        if ($this->ch === 'I') {
244 6
            $number = $this->word();
245 6
            if ($number === null) {
246
                $this->throwSyntaxError('Unexpected word for number');
247
            }
248
249 6
            return ($sign === '-') ? -INF : INF;
250
        }
251
252
        // support for NaN
253 204
        if ($this->ch === 'N') {
254
            $number = $this->word();
255
            if ($number !== NAN) {
256
                $this->throwSyntaxError('expected word to be NaN');
257
            }
258
259
            // ignore sign as -NaN also is NaN
260
            return $number;
261
        }
262
263 204
        if ($this->ch === '0') {
264 105
            $string .= $this->ch;
265 105
            $this->next();
266 105
            if ($this->ch === 'x' || $this->ch === 'X') {
267 33
                $string .= $this->ch;
268 33
                $this->next();
269 33
                $base = 16;
270 105
            } elseif (is_numeric($this->ch)) {
271 30
                $this->throwSyntaxError('Octal literal');
272
            }
273 75
        }
274
275
        switch ($base) {
276 174
            case 10:
277 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
278 144
                    $string .= $match;
279 144
                }
280 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
281 45
                    $string .= $match;
282 45
                }
283 144
                $number = $string;
284 144
                break;
285 33
            case 16:
286 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
287 30
                    $string .= $match;
288 30
                    $number = hexdec($string);
289 30
                    break;
290
                }
291 3
                $this->throwSyntaxError('Bad hex number');
292
        }
293
294 171
        if ($sign === '-') {
295 33
            $number = -$number;
296 33
        }
297
298 171
        if (!is_numeric($number) || !is_finite($number)) {
299 3
            $this->throwSyntaxError('Bad number');
300
        }
301
302 168
        if ($this->castBigIntToString) {
303 3
            return $number;
304
        }
305
306
        // Adding 0 will automatically cast this to an int or float
307 165
        return $number + 0;
308
    }
309
310 66
    private function string()
311
    {
312 66
        if (!($this->ch === '"' || $this->ch === "'")) {
313
            $this->throwSyntaxError('Bad string');
314
        }
315
316 66
        $string = '';
317
318 66
        $delim = $this->ch;
319 66
        while ($this->next() !== null) {
320 66
            if ($this->ch === $delim) {
321 63
                $this->next();
322
323 63
                return $string;
324 66
            } elseif ($this->ch === '\\') {
325 18
                $this->next();
326 18
                if ($this->ch === 'u') {
327
                    $this->next();
328
                    $hex = $this->match('/^[A-Fa-f0-9]{4}/');
329
                    if ($hex === null) {
330
                        break;
331
                    }
332
                    $string .= self::fromCharCode($hex);
333 18
                } elseif ($this->ch === "\r") {
334 6
                    if ($this->peek() === "\n") {
335 3
                        $this->next();
336 3
                    }
337 18
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
338 12
                    $string .= $escapee;
339 12
                } else {
340
                    break;
341
                }
342 66
            } elseif ($this->ch === "\n") {
343
                // unescaped newlines are invalid; see:
344
                // https://github.com/json5/json5/issues/24
345
                // @todo this feels special-cased; are there other invalid unescaped chars?
346 3
                break;
347
            } else {
348 66
                $string .= $this->ch;
349
            }
350 66
        }
351
352 3
        $this->throwSyntaxError('Bad string');
353
    }
354
355
    /**
356
     * Skip an inline comment, assuming this is one.
357
     *
358
     * The current character should be the second / character in the // pair that begins this inline comment.
359
     * To finish the inline comment, we look for a newline or the end of the text.
360
     */
361 36
    private function inlineComment()
362
    {
363 36
        if ($this->ch !== '/') {
364
            $this->throwSyntaxError('Not an inline comment');
365
        }
366
367
        do {
368 36
            $this->next();
369 36
            if ($this->ch === "\n" || $this->ch === "\r") {
370 33
                $this->next();
371
372 33
                return;
373
            }
374 36
        } while ($this->ch !== null);
375 3
    }
376
377
    /**
378
     * Skip a block comment, assuming this is one.
379
     *
380
     * The current character should be the * character in the /* pair that begins this block comment.
381
     * To finish the block comment, we look for an ending *​/ pair of characters,
382
     * but we also watch for the end of text before the comment is terminated.
383
     */
384 21 View Code Duplication
    private function blockComment()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
385
    {
386 21
        if ($this->ch !== '*') {
387
            $this->throwSyntaxError('Not a block comment');
388
        }
389
390
        do {
391 21
            $this->next();
392 21
            while ($this->ch === '*') {
393 18
                $this->next('*');
394 18
                if ($this->ch === '/') {
395 18
                    $this->next('/');
396
397 18
                    return;
398
                }
399 3
            }
400 21
        } while ($this->ch);
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->ch of type null|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
401
402 3
        $this->throwSyntaxError('Unterminated block comment');
403
    }
404
405
    /**
406
     * Skip a comment, whether inline or block-level, assuming this is one.
407
     */
408 54 View Code Duplication
    private function comment()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
409
    {
410
        // Comments always begin with a / character.
411 54
        if ($this->ch !== '/') {
412
            $this->throwSyntaxError('Not a comment');
413
        }
414
415 54
        $this->next('/');
416
417 54
        if ($this->ch === '/') {
418 36
            $this->inlineComment();
419 54
        } elseif ($this->ch === '*') {
420 21
            $this->blockComment();
421 18
        } else {
422
            $this->throwSyntaxError('Unrecognized comment');
423
        }
424 51
    }
425
426
    /**
427
     * Skip whitespace and comments.
428
     *
429
     * Note that we're detecting comments by only a single / character.
430
     * This works since regular expressions are not valid JSON(5), but this will
431
     * break if there are other valid values that begin with a / character!
432
     */
433 360
    private function white()
434
    {
435 360
        while ($this->ch) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->ch of type null|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
436 342
            if ($this->ch === '/') {
437 54
                $this->comment();
438 342
            } elseif (preg_match(self::REGEX_WHITESPACE, $this->ch) === 1) {
439 267
                $this->next();
440 267
            } else {
441 306
                return;
442
            }
443 273
        }
444 288
    }
445
446
    /**
447
     * Matches true, false, null, etc
448
     */
449 78
    private function word()
450
    {
451 78
        switch ($this->ch) {
452 78
            case 't':
453 36
                $this->next('t');
454 36
                $this->next('r');
455 36
                $this->next('u');
456 36
                $this->next('e');
457 36
                return true;
458 57
            case 'f':
459 18
                $this->next('f');
460 18
                $this->next('a');
461 18
                $this->next('l');
462 18
                $this->next('s');
463 18
                $this->next('e');
464 18
                return false;
465 42
            case 'n':
466 18
                $this->next('n');
467 18
                $this->next('u');
468 18
                $this->next('l');
469 18
                $this->next('l');
470 18
                return null;
471 24
            case 'I':
472 12
                $this->next('I');
473 12
                $this->next('n');
474 12
                $this->next('f');
475 12
                $this->next('i');
476 12
                $this->next('n');
477 12
                $this->next('i');
478 12
                $this->next('t');
479 12
                $this->next('y');
480 12
                return INF;
481 12
            case 'N':
482 3
                $this->next('N');
483 3
                $this->next('a');
484 3
                $this->next('N');
485 3
                return NAN;
486 9
        }
487
488 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
489
    }
490
491 42
    private function arr()
492
    {
493 42
        $arr = array();
494
495 42
        if ($this->ch === '[') {
496 42
            if (++$this->depth > $this->maxDepth) {
497 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
498
            }
499
500 42
            $this->next('[');
501 42
            $this->white();
502 42
            while ($this->ch !== null) {
503 42
                if ($this->ch === ']') {
504 12
                    $this->next(']');
505 12
                    $this->depth--;
506 12
                    return $arr; // Potentially empty array
507
                }
508
                // ES5 allows omitting elements in arrays, e.g. [,] and
509
                // [,null]. We don't allow this in JSON5.
510 39
                if ($this->ch === ',') {
511 6
                    $this->throwSyntaxError('Missing array element');
512
                } else {
513 33
                    $arr[] = $this->value();
514
                }
515 30
                $this->white();
516
                // If there's no comma after this value, this needs to
517
                // be the end of the array.
518 30
                if ($this->ch !== ',') {
519 21
                    $this->next(']');
520 18
                    $this->depth--;
521 18
                    return $arr;
522
                }
523 15
                $this->next(',');
524 15
                $this->white();
525 15
            }
526
        }
527
528
        $this->throwSyntaxError('Bad array');
529
    }
530
531
    /**
532
     * Parse an object value
533
     */
534 75
    private function obj()
535
    {
536 75
        $key = null;
0 ignored issues
show
Unused Code introduced by
$key is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
537 75
        $object = $this->associative ? array() : new \stdClass;
538
539 75
        if ($this->ch === '{') {
540 75
            if (++$this->depth > $this->maxDepth) {
541
                $this->throwSyntaxError('Maximum stack depth exceeded');
542
            }
543
544 75
            $this->next('{');
545 75
            $this->white();
546 75
            while ($this->ch) {
547 75
                if ($this->ch === '}') {
548 21
                    $this->next('}');
549 21
                    $this->depth--;
550 21
                    return $object; // Potentially empty object
551
                }
552
553
                // Keys can be unquoted. If they are, they need to be
554
                // valid JS identifiers.
555 63
                if ($this->ch === '"' || $this->ch === "'") {
556 27
                    $key = $this->string();
557 27
                } else {
558 39
                    $key = $this->identifier();
559
                }
560
561 54
                $this->white();
562 54
                $this->next(':');
563 51
                if ($this->associative) {
564 45
                    $object[$key] = $this->value();
565 45
                } else {
566 48
                    $object->{$key} = $this->value();
567
                }
568 51
                $this->white();
569
                // If there's no comma after this pair, this needs to be
570
                // the end of the object.
571 51
                if ($this->ch !== ',') {
572 42
                    $this->next('}');
573 39
                    $this->depth--;
574 39
                    return $object;
575
                }
576 18
                $this->next(',');
577 18
                $this->white();
578 18
            }
579
        }
580
581
        $this->throwSyntaxError('Bad object');
582
    }
583
584
    /**
585
     * Parse a JSON value.
586
     *
587
     * It could be an object, an array, a string, a number,
588
     * or a word.
589
     */
590 360
    private function value()
591
    {
592 360
        $this->white();
593 360
        switch ($this->ch) {
594 360
            case '{':
595 75
                return $this->obj();
596 336
            case '[':
597 42
                return $this->arr();
598 324
            case '"':
599 324
            case "'":
600 54
                return $this->string();
601 279
            case '-':
602 279
            case '+':
603 279
            case '.':
604 102
                return $this->number();
605 180
            default:
606 180
                return is_numeric($this->ch) ? $this->number() : $this->word();
607 180
        }
608
    }
609
610 96
    private function throwSyntaxError($message)
611
    {
612 96
        throw new SyntaxError($message, $this->at, $this->lineNumber, $this->columnNumber);
613
    }
614
615 18
    private static function renderChar($chr)
616
    {
617 18
        return $chr === null ? 'EOF' : "'" . $chr . "'";
618
    }
619
620
    /**
621
     * @param string $hex Hex code
622
     *
623
     * @return string Unicode character
624
     */
625 3
    private static function fromCharCode($hex)
626
    {
627 3
        return mb_convert_encoding('&#' . hexdec($hex) . ';', 'UTF-8', 'HTML-ENTITIES');
628
    }
629
630
    /**
631
     * @param string $ch
632
     *
633
     * @return string|null
634
     */
635 12
    private static function getEscapee($ch)
636
    {
637
        switch ($ch) {
638
            // @codingStandardsIgnoreStart
639 12
            case "'":  return "'";
640 9
            case '"':  return '"';
641 9
            case '\\': return '\\';
642 9
            case '/':  return '/';
643 9
            case "\n": return '';
644
            case 'b':  return '\b';
645
            case 'f':  return '\f';
646
            case 'n':  return '\n';
647
            case 'r':  return '\r';
648
            case 't':  return '\t';
649
            default:   return null;
650
            // @codingStandardsIgnoreEnd
651
        }
652
    }
653
}
654