Completed
Push — master ( 1751fd...4bd364 )
by Colin
01:13
created

Json5Decoder::string()   C

Complexity

Conditions 12
Paths 10

Size

Total Lines 48
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 31
CRAP Score 12.215

Importance

Changes 0
Metric Value
cc 12
eloc 32
nc 10
nop 0
dl 0
loc 48
ccs 31
cts 35
cp 0.8857
crap 12.215
rs 5.1266
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u';
20
21
    private $json;
22
23
    private $at = 0;
24
25
    private $lineNumber = 1;
26
27
    private $columnNumber = 1;
28
29
    private $ch;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    private $lineCache;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 363
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 363
        $this->json = $json;
54 363
        $this->associative = $associative;
55 363
        $this->maxDepth = $depth;
56 363
        $this->castBigIntToString = $castBigIntToString;
57
58 363
        $this->length = mb_strlen($json, 'utf-8');
59
60 363
        $this->ch = $this->charAt(0);
61 363
    }
62
63
    /**
64
     * Takes a JSON encoded string and converts it into a PHP variable.
65
     *
66
     * The parameters exactly match PHP's json_decode() function - see
67
     * http://php.net/manual/en/function.json-decode.php for more information.
68
     *
69
     * @param string $source      The JSON string being decoded.
70
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
71
     * @param int    $depth       User specified recursion depth.
72
     * @param int    $options     Bitmask of JSON decode options.
73
     *
74
     * @return mixed
75
     */
76 363
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
77
    {
78 363
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
79 363
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
80
81 363
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
82
83 363
        $result = $decoder->value();
84 288
        $decoder->white();
85 285
        if ($decoder->ch) {
86 18
            $decoder->throwSyntaxError('Syntax error');
87
        }
88
89 267
        return $result;
90
    }
91
92
    /**
93
     * @param int $at
94
     *
95
     * @return string|null
96
     */
97 363
    private function charAt($at)
98
    {
99 363
        if ($at < 0 || $at >= $this->length) {
100 279
            return null;
101
        }
102
103 360
        return mb_substr($this->json, $at, 1, 'utf-8');
104
    }
105
106
    /**
107
     * Parse the next character.
108
     *
109
     * If $c is given, the next char will only be parsed if the current
110
     * one matches $c.
111
     *
112
     * @param string|null $c
113
     *
114
     * @return null|string
115
     */
116 333
    private function next($c = null)
117
    {
118
        // If a c parameter is provided, verify that it matches the current character.
119 333
        if ($c !== null && $c !== $this->ch) {
120 9
            $this->throwSyntaxError(sprintf(
121 9
                'Expected %s instead of %s',
122 9
                self::renderChar($c),
123 9
                self::renderChar($this->ch)
124 6
            ));
125
        }
126
127
        // Get the next character. When there are no more characters,
128
        // return the empty string.
129 333
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
130 267
            $this->at++;
131 267
            $this->lineNumber++;
132 267
            $this->columnNumber = 1;
133 178
        } else {
134 294
            $this->at++;
135 294
            $this->columnNumber++;
136
        }
137
138 333
        $this->ch = $this->charAt($this->at);
139
140 333
        return $this->ch;
141
    }
142
143
    /**
144
     * Get the next character without consuming it or
145
     * assigning it to the ch variable.
146
     *
147
     * @return mixed
148
     */
149 12
    private function peek()
150
    {
151 12
        return $this->charAt($this->at + 1);
152
    }
153
154
    /**
155
     * @return string
156
     */
157 213
    private function getLineRemainder()
158
    {
159
        // Line are separated by "\n" or "\r" without an "\n" next
160 213
        if ($this->lineCache === null) {
161 213
            $this->lineCache = preg_split('/\n|\r\n?/u', $this->json);
162 142
        }
163
164 213
        $line = $this->lineCache[$this->lineNumber - 1];
165
166 213
        return mb_substr($line, $this->columnNumber - 1);
167
    }
168
169
    /**
170
     * Attempt to match a regular expression at the current position on the current line.
171
     *
172
     * This function will not match across multiple lines.
173
     *
174
     * @param string $regex
175
     *
176
     * @return string|null
177
     */
178 213
    private function match($regex)
179
    {
180 213
        $subject = $this->getLineRemainder();
181
182 213
        $matches = [];
183 213
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
184 111
            return null;
185
        }
186
187
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
188 201
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
189
190
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
191
        // [0][1] contains the index of that match
192 201
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
193
194 201
        $this->at += $advanceBy;
195 201
        $this->columnNumber += $advanceBy;
196 201
        $this->ch = $this->charAt($this->at);
197
198 201
        return $matches[0][0];
199
    }
200
201
    /**
202
     * Parse an identifier.
203
     *
204
     * Normally, reserved words are disallowed here, but we
205
     * only use this for unquoted object keys, where reserved words are allowed,
206
     * so we don't check for those here. References:
207
     * - http://es5.github.com/#x7.6
208
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
209
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
210
     */
211 39
    private function identifier()
212
    {
213
        // @codingStandardsIgnoreStart
214
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
215 39
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
216
        // @codingStandardsIgnoreEnd
217
218 39
        if ($match === null) {
219 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
220
        }
221
222
        // Un-escape escaped Unicode chars
223 30
        $unescaped = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', function ($m) {
224 3
            return self::fromCharCode($m[1]);
225 30
        }, $match);
226
227 30
        return $unescaped;
228
    }
229
230 210
    private function number()
231
    {
232 210
        $number = null;
233 210
        $sign = '';
234 210
        $string = '';
235 210
        $base = 10;
236
237 210
        if ($this->ch === '-' || $this->ch === '+') {
238 93
            $sign = $this->ch;
239 93
            $this->next($this->ch);
240 62
        }
241
242
        // support for Infinity
243 210
        if ($this->ch === 'I') {
244 6
            $number = $this->word();
245 6
            if ($number === null) {
246
                $this->throwSyntaxError('Unexpected word for number');
247
            }
248
249 6
            return ($sign === '-') ? -INF : INF;
250
        }
251
252
        // support for NaN
253 204
        if ($this->ch === 'N') {
254
            $number = $this->word();
255
            if ($number !== NAN) {
256
                $this->throwSyntaxError('expected word to be NaN');
257
            }
258
259
            // ignore sign as -NaN also is NaN
260
            return $number;
261
        }
262
263 204
        if ($this->ch === '0') {
264 105
            $string .= $this->ch;
265 105
            $this->next();
266 105
            if ($this->ch === 'x' || $this->ch === 'X') {
267 33
                $string .= $this->ch;
268 33
                $this->next();
269 33
                $base = 16;
270 94
            } elseif (is_numeric($this->ch)) {
271 30
                $this->throwSyntaxError('Octal literal');
272
            }
273 50
        }
274
275
        switch ($base) {
276 174
            case 10:
277 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
278 144
                    $string .= $match;
279 96
                }
280 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
281 45
                    $string .= $match;
282 30
                }
283 144
                $number = $string;
284 144
                break;
285 33
            case 16:
286 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
287 30
                    $string .= $match;
288 30
                    $number = hexdec($string);
289 30
                    break;
290
                }
291 3
                $this->throwSyntaxError('Bad hex number');
292
        }
293
294 171
        if ($sign === '-') {
295 33
            $number = -$number;
296 22
        }
297
298 171
        if (!is_numeric($number) || !is_finite($number)) {
299 3
            $this->throwSyntaxError('Bad number');
300
        }
301
302 168
        if ($this->castBigIntToString) {
303 3
            return $number;
304
        }
305
306
        // Adding 0 will automatically cast this to an int or float
307 165
        return $number + 0;
308
    }
309
310 69
    private function string()
311
    {
312 69
        if (!($this->ch === '"' || $this->ch === "'")) {
313
            $this->throwSyntaxError('Bad string');
314
        }
315
316 69
        $string = '';
317
318 69
        $delim = $this->ch;
319 69
        $this->next();
320 69
        while ($this->ch !== null) {
321 69
            if ($this->ch === $delim) {
322 66
                $this->next();
323
324 66
                return $string;
325 69
            } elseif ($this->ch === '\\') {
326 21
                $this->next();
327 21
                if ($this->ch === 'u') {
328 3
                    $this->next();
329 3
                    $hex = $this->match('/^[A-Fa-f0-9]{4}/');
330 3
                    if ($hex === null) {
331
                        break;
332
                    }
333 3
                    $string .= self::fromCharCode($hex);
334 3
                    continue;
335 18
                } elseif ($this->ch === "\r") {
336 6
                    if ($this->peek() === "\n") {
337 4
                        $this->next();
338 2
                    }
339 16
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
340 12
                    $string .= $escapee;
341 8
                } else {
342 6
                    break;
343
                }
344 69
            } elseif ($this->ch === "\n") {
345
                // unescaped newlines are invalid; see:
346
                // https://github.com/json5/json5/issues/24
347
                // @todo this feels special-cased; are there other invalid unescaped chars?
348 3
                break;
349
            } else {
350 69
                $string .= $this->ch;
351
            }
352
353 69
            $this->next();
354 46
        }
355
356 3
        $this->throwSyntaxError('Bad string');
357
    }
358
359
    /**
360
     * Skip an inline comment, assuming this is one.
361
     *
362
     * The current character should be the second / character in the // pair that begins this inline comment.
363
     * To finish the inline comment, we look for a newline or the end of the text.
364
     */
365 36
    private function inlineComment()
366
    {
367 36
        if ($this->ch !== '/') {
368
            $this->throwSyntaxError('Not an inline comment');
369
        }
370
371
        do {
372 36
            $this->next();
373 36
            if ($this->ch === "\n" || $this->ch === "\r") {
374 33
                $this->next();
375
376 33
                return;
377
            }
378 36
        } while ($this->ch !== null);
379 3
    }
380
381
    /**
382
     * Skip a block comment, assuming this is one.
383
     *
384
     * The current character should be the * character in the /* pair that begins this block comment.
385
     * To finish the block comment, we look for an ending *​/ pair of characters,
386
     * but we also watch for the end of text before the comment is terminated.
387
     */
388 21
    private function blockComment()
389
    {
390 21
        if ($this->ch !== '*') {
391
            $this->throwSyntaxError('Not a block comment');
392
        }
393
394
        do {
395 21
            $this->next();
396 21
            while ($this->ch === '*') {
397 18
                $this->next('*');
398 18
                if ($this->ch === '/') {
399 18
                    $this->next('/');
400
401 18
                    return;
402
                }
403 2
            }
404 21
        } while ($this->ch !== null);
405
406 3
        $this->throwSyntaxError('Unterminated block comment');
407
    }
408
409
    /**
410
     * Skip a comment, whether inline or block-level, assuming this is one.
411
     */
412 54
    private function comment()
413
    {
414
        // Comments always begin with a / character.
415 54
        if ($this->ch !== '/') {
416
            $this->throwSyntaxError('Not a comment');
417
        }
418
419 54
        $this->next('/');
420
421 54
        if ($this->ch === '/') {
422 36
            $this->inlineComment();
423 43
        } elseif ($this->ch === '*') {
424 21
            $this->blockComment();
425 12
        } else {
426
            $this->throwSyntaxError('Unrecognized comment');
427
        }
428 51
    }
429
430
    /**
431
     * Skip whitespace and comments.
432
     *
433
     * Note that we're detecting comments by only a single / character.
434
     * This works since regular expressions are not valid JSON(5), but this will
435
     * break if there are other valid values that begin with a / character!
436
     */
437 363
    private function white()
438
    {
439 363
        while ($this->ch !== null) {
440 360
            if ($this->ch === '/') {
441 54
                $this->comment();
442 358
            } elseif (preg_match(self::REGEX_WHITESPACE, $this->ch) === 1) {
443 270
                $this->next();
444 180
            } else {
445 354
                return;
446
            }
447 184
        }
448 276
    }
449
450
    /**
451
     * Matches true, false, null, etc
452
     */
453 78
    private function word()
454
    {
455 78
        switch ($this->ch) {
456 78
            case 't':
457 36
                $this->next('t');
458 36
                $this->next('r');
459 36
                $this->next('u');
460 36
                $this->next('e');
461 36
                return true;
462 57
            case 'f':
463 18
                $this->next('f');
464 18
                $this->next('a');
465 18
                $this->next('l');
466 18
                $this->next('s');
467 18
                $this->next('e');
468 18
                return false;
469 42
            case 'n':
470 18
                $this->next('n');
471 18
                $this->next('u');
472 18
                $this->next('l');
473 18
                $this->next('l');
474 18
                return null;
475 24
            case 'I':
476 12
                $this->next('I');
477 12
                $this->next('n');
478 12
                $this->next('f');
479 12
                $this->next('i');
480 12
                $this->next('n');
481 12
                $this->next('i');
482 12
                $this->next('t');
483 12
                $this->next('y');
484 12
                return INF;
485 12
            case 'N':
486 3
                $this->next('N');
487 3
                $this->next('a');
488 3
                $this->next('N');
489 3
                return NAN;
490 6
        }
491
492 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
493
    }
494
495 42
    private function arr()
496
    {
497 42
        $arr = [];
498
499 42
        if ($this->ch === '[') {
500 42
            if (++$this->depth > $this->maxDepth) {
501 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
502
            }
503
504 42
            $this->next('[');
505 42
            $this->white();
506 42
            while ($this->ch !== null) {
507 42
                if ($this->ch === ']') {
508 12
                    $this->next(']');
509 12
                    $this->depth--;
510 12
                    return $arr; // Potentially empty array
511
                }
512
                // ES5 allows omitting elements in arrays, e.g. [,] and
513
                // [,null]. We don't allow this in JSON5.
514 39
                if ($this->ch === ',') {
515 6
                    $this->throwSyntaxError('Missing array element');
516
                } else {
517 33
                    $arr[] = $this->value();
518
                }
519 30
                $this->white();
520
                // If there's no comma after this value, this needs to
521
                // be the end of the array.
522 30
                if ($this->ch !== ',') {
523 21
                    $this->next(']');
524 18
                    $this->depth--;
525 18
                    return $arr;
526
                }
527 15
                $this->next(',');
528 15
                $this->white();
529 10
            }
530
        }
531
532
        $this->throwSyntaxError('Bad array');
533
    }
534
535
    /**
536
     * Parse an object value
537
     */
538 75
    private function obj()
539
    {
540 75
        $object = $this->associative ? [] : new \stdClass;
541
542 75
        if ($this->ch === '{') {
543 75
            if (++$this->depth > $this->maxDepth) {
544
                $this->throwSyntaxError('Maximum stack depth exceeded');
545
            }
546
547 75
            $this->next('{');
548 75
            $this->white();
549 75
            while ($this->ch) {
550 75
                if ($this->ch === '}') {
551 21
                    $this->next('}');
552 21
                    $this->depth--;
553 21
                    return $object; // Potentially empty object
554
                }
555
556
                // Keys can be unquoted. If they are, they need to be
557
                // valid JS identifiers.
558 63
                if ($this->ch === '"' || $this->ch === "'") {
559 27
                    $key = $this->string();
560 18
                } else {
561 39
                    $key = $this->identifier();
562
                }
563
564 54
                $this->white();
565 54
                $this->next(':');
566 51
                if ($this->associative) {
567 45
                    $object[$key] = $this->value();
568 30
                } else {
569 48
                    $object->{$key} = $this->value();
570
                }
571 51
                $this->white();
572
                // If there's no comma after this pair, this needs to be
573
                // the end of the object.
574 51
                if ($this->ch !== ',') {
575 42
                    $this->next('}');
576 39
                    $this->depth--;
577 39
                    return $object;
578
                }
579 18
                $this->next(',');
580 18
                $this->white();
581 12
            }
582
        }
583
584
        $this->throwSyntaxError('Bad object');
585
    }
586
587
    /**
588
     * Parse a JSON value.
589
     *
590
     * It could be an object, an array, a string, a number,
591
     * or a word.
592
     */
593 363
    private function value()
594
    {
595 363
        $this->white();
596 363
        switch ($this->ch) {
597 363
            case '{':
598 75
                return $this->obj();
599 339
            case '[':
600 42
                return $this->arr();
601 327
            case '"':
602 317
            case "'":
603 57
                return $this->string();
604 279
            case '-':
605 264
            case '+':
606 249
            case '.':
607 102
                return $this->number();
608 120
            default:
609 180
                return is_numeric($this->ch) ? $this->number() : $this->word();
610 120
        }
611
    }
612
613 96
    private function throwSyntaxError($message)
614
    {
615 96
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
616
    }
617
618 18
    private static function renderChar($chr)
619
    {
620 18
        return $chr === null ? 'EOF' : "'" . $chr . "'";
621
    }
622
623
    /**
624
     * @param string $hex Hex code
625
     *
626
     * @return string Unicode character
627
     */
628 6
    private static function fromCharCode($hex)
629
    {
630 6
        return mb_convert_encoding('&#' . hexdec($hex) . ';', 'UTF-8', 'HTML-ENTITIES');
631
    }
632
633
    /**
634
     * @param string $ch
635
     *
636
     * @return string|null
637
     */
638 12
    private static function getEscapee($ch)
639
    {
640
        switch ($ch) {
641
            // @codingStandardsIgnoreStart
642 12
            case "'":  return "'";
643 9
            case '"':  return '"';
644 9
            case '\\': return '\\';
645 9
            case '/':  return '/';
646 9
            case "\n": return '';
647
            case 'b':  return '\b';
648
            case 'f':  return '\f';
649
            case 'n':  return '\n';
650
            case 'r':  return '\r';
651
            case 't':  return '\t';
652
            default:   return null;
653
            // @codingStandardsIgnoreEnd
654
        }
655
    }
656
}
657