Completed
Push — master ( 341b80...1751fd )
by Colin
02:14
created

Json5Decoder::string()   C

Complexity

Conditions 12
Paths 12

Size

Total Lines 44
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 24
CRAP Score 14.25

Importance

Changes 0
Metric Value
cc 12
eloc 29
nc 12
nop 0
dl 0
loc 44
ccs 24
cts 32
cp 0.75
crap 14.25
rs 5.1612
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u';
20
21
    private $json;
22
23
    private $at = 0;
24
25
    private $lineNumber = 1;
26
27
    private $columnNumber = 1;
28
29
    private $ch;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    private $lineCache;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 360
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 360
        $this->json = $json;
54 360
        $this->associative = $associative;
55 360
        $this->maxDepth = $depth;
56 360
        $this->castBigIntToString = $castBigIntToString;
57
58 360
        $this->length = mb_strlen($json, 'utf-8');
59
60 360
        $this->ch = $this->charAt(0);
61 360
    }
62
63
    /**
64
     * Takes a JSON encoded string and converts it into a PHP variable.
65
     *
66
     * The parameters exactly match PHP's json_decode() function - see
67
     * http://php.net/manual/en/function.json-decode.php for more information.
68
     *
69
     * @param string $source      The JSON string being decoded.
70
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
71
     * @param int    $depth       User specified recursion depth.
72
     * @param int    $options     Bitmask of JSON decode options.
73
     *
74
     * @return mixed
75
     */
76 360
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
77
    {
78 360
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
79 360
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
80
81 360
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
82
83 360
        $result = $decoder->value();
84 285
        $decoder->white();
85 282
        if ($decoder->ch) {
86 18
            $decoder->throwSyntaxError('Syntax error');
87
        }
88
89 264
        return $result;
90
    }
91
92
    /**
93
     * @param int $at
94
     *
95
     * @return string|null
96
     */
97 360
    private function charAt($at)
98
    {
99 360
        if ($at < 0 || $at >= $this->length) {
100 276
            return null;
101
        }
102
103 357
        return mb_substr($this->json, $at, 1, 'utf-8');
104
    }
105
106
    /**
107
     * Parse the next character.
108
     *
109
     * If $c is given, the next char will only be parsed if the current
110
     * one matches $c.
111
     *
112
     * @param string|null $c
113
     *
114
     * @return null|string
115
     */
116 330
    private function next($c = null)
117
    {
118
        // If a c parameter is provided, verify that it matches the current character.
119 330
        if ($c !== null && $c !== $this->ch) {
120 9
            $this->throwSyntaxError(sprintf(
121 9
                'Expected %s instead of %s',
122 9
                self::renderChar($c),
123 9
                self::renderChar($this->ch)
124 6
            ));
125
        }
126
127
        // Get the next character. When there are no more characters,
128
        // return the empty string.
129 330
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
130 264
            $this->at++;
131 264
            $this->lineNumber++;
132 264
            $this->columnNumber = 1;
133 176
        } else {
134 291
            $this->at++;
135 291
            $this->columnNumber++;
136
        }
137
138 330
        $this->ch = $this->charAt($this->at);
139
140 330
        return $this->ch;
141
    }
142
143
    /**
144
     * Get the next character without consuming it or
145
     * assigning it to the ch variable.
146
     *
147
     * @return mixed
148
     */
149 12
    private function peek()
150
    {
151 12
        return $this->charAt($this->at + 1);
152
    }
153
154
    /**
155
     * @return string
156
     */
157 210
    private function getLineRemainder()
158
    {
159
        // Line are separated by "\n" or "\r" without an "\n" next
160 210
        if ($this->lineCache === null) {
161 210
            $this->lineCache = preg_split('/\n|\r\n?/u', $this->json);
162 140
        }
163
164 210
        $line = $this->lineCache[$this->lineNumber - 1];
165
166 210
        return mb_substr($line, $this->columnNumber - 1);
167
    }
168
169
    /**
170
     * Attempt to match a regular expression at the current position on the current line.
171
     *
172
     * This function will not match across multiple lines.
173
     *
174
     * @param string $regex
175
     *
176
     * @return string|null
177
     */
178 210
    private function match($regex)
179
    {
180 210
        $subject = $this->getLineRemainder();
181
182 210
        $matches = [];
183 210
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
184 111
            return null;
185
        }
186
187
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
188 198
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
189
190
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
191
        // [0][1] contains the index of that match
192 198
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
193
194 198
        $this->at += $advanceBy;
195 198
        $this->columnNumber += $advanceBy;
196 198
        $this->ch = $this->charAt($this->at);
197
198 198
        return $matches[0][0];
199
    }
200
201
    /**
202
     * Parse an identifier.
203
     *
204
     * Normally, reserved words are disallowed here, but we
205
     * only use this for unquoted object keys, where reserved words are allowed,
206
     * so we don't check for those here. References:
207
     * - http://es5.github.com/#x7.6
208
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
209
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
210
     */
211 39
    private function identifier()
212
    {
213
        // @codingStandardsIgnoreStart
214
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
215 39
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
216
        // @codingStandardsIgnoreEnd
217
218 39
        if ($match === null) {
219 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
220
        }
221
222
        // Un-escape escaped Unicode chars
223 30
        $unescaped = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', function ($m) {
224 3
            return self::fromCharCode($m[1]);
225 30
        }, $match);
226
227 30
        return $unescaped;
228
    }
229
230 210
    private function number()
231
    {
232 210
        $number = null;
233 210
        $sign = '';
234 210
        $string = '';
235 210
        $base = 10;
236
237 210
        if ($this->ch === '-' || $this->ch === '+') {
238 93
            $sign = $this->ch;
239 93
            $this->next($this->ch);
240 62
        }
241
242
        // support for Infinity
243 210
        if ($this->ch === 'I') {
244 6
            $number = $this->word();
245 6
            if ($number === null) {
246
                $this->throwSyntaxError('Unexpected word for number');
247
            }
248
249 6
            return ($sign === '-') ? -INF : INF;
250
        }
251
252
        // support for NaN
253 204
        if ($this->ch === 'N') {
254
            $number = $this->word();
255
            if ($number !== NAN) {
256
                $this->throwSyntaxError('expected word to be NaN');
257
            }
258
259
            // ignore sign as -NaN also is NaN
260
            return $number;
261
        }
262
263 204
        if ($this->ch === '0') {
264 105
            $string .= $this->ch;
265 105
            $this->next();
266 105
            if ($this->ch === 'x' || $this->ch === 'X') {
267 33
                $string .= $this->ch;
268 33
                $this->next();
269 33
                $base = 16;
270 94
            } elseif (is_numeric($this->ch)) {
271 30
                $this->throwSyntaxError('Octal literal');
272
            }
273 50
        }
274
275
        switch ($base) {
276 174
            case 10:
277 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
278 144
                    $string .= $match;
279 96
                }
280 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
281 45
                    $string .= $match;
282 30
                }
283 144
                $number = $string;
284 144
                break;
285 33
            case 16:
286 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
287 30
                    $string .= $match;
288 30
                    $number = hexdec($string);
289 30
                    break;
290
                }
291 3
                $this->throwSyntaxError('Bad hex number');
292
        }
293
294 171
        if ($sign === '-') {
295 33
            $number = -$number;
296 22
        }
297
298 171
        if (!is_numeric($number) || !is_finite($number)) {
299 3
            $this->throwSyntaxError('Bad number');
300
        }
301
302 168
        if ($this->castBigIntToString) {
303 3
            return $number;
304
        }
305
306
        // Adding 0 will automatically cast this to an int or float
307 165
        return $number + 0;
308
    }
309
310 66
    private function string()
311
    {
312 66
        if (!($this->ch === '"' || $this->ch === "'")) {
313
            $this->throwSyntaxError('Bad string');
314
        }
315
316 66
        $string = '';
317
318 66
        $delim = $this->ch;
319 66
        while ($this->next() !== null) {
320 66
            if ($this->ch === $delim) {
321 63
                $this->next();
322
323 63
                return $string;
324 66
            } elseif ($this->ch === '\\') {
325 18
                $this->next();
326 18
                if ($this->ch === 'u') {
327
                    $this->next();
328
                    $hex = $this->match('/^[A-Fa-f0-9]{4}/');
329
                    if ($hex === null) {
330
                        break;
331
                    }
332
                    $string .= self::fromCharCode($hex);
333 18
                } elseif ($this->ch === "\r") {
334 6
                    if ($this->peek() === "\n") {
335 4
                        $this->next();
336 2
                    }
337 16
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
338 12
                    $string .= $escapee;
339 8
                } else {
340 6
                    break;
341
                }
342 66
            } elseif ($this->ch === "\n") {
343
                // unescaped newlines are invalid; see:
344
                // https://github.com/json5/json5/issues/24
345
                // @todo this feels special-cased; are there other invalid unescaped chars?
346 3
                break;
347
            } else {
348 66
                $string .= $this->ch;
349
            }
350 44
        }
351
352 3
        $this->throwSyntaxError('Bad string');
353
    }
354
355
    /**
356
     * Skip an inline comment, assuming this is one.
357
     *
358
     * The current character should be the second / character in the // pair that begins this inline comment.
359
     * To finish the inline comment, we look for a newline or the end of the text.
360
     */
361 36
    private function inlineComment()
362
    {
363 36
        if ($this->ch !== '/') {
364
            $this->throwSyntaxError('Not an inline comment');
365
        }
366
367
        do {
368 36
            $this->next();
369 36
            if ($this->ch === "\n" || $this->ch === "\r") {
370 33
                $this->next();
371
372 33
                return;
373
            }
374 36
        } while ($this->ch !== null);
375 3
    }
376
377
    /**
378
     * Skip a block comment, assuming this is one.
379
     *
380
     * The current character should be the * character in the /* pair that begins this block comment.
381
     * To finish the block comment, we look for an ending *​/ pair of characters,
382
     * but we also watch for the end of text before the comment is terminated.
383
     */
384 21
    private function blockComment()
385
    {
386 21
        if ($this->ch !== '*') {
387
            $this->throwSyntaxError('Not a block comment');
388
        }
389
390
        do {
391 21
            $this->next();
392 21
            while ($this->ch === '*') {
393 18
                $this->next('*');
394 18
                if ($this->ch === '/') {
395 18
                    $this->next('/');
396
397 18
                    return;
398
                }
399 2
            }
400 21
        } while ($this->ch !== null);
401
402 3
        $this->throwSyntaxError('Unterminated block comment');
403
    }
404
405
    /**
406
     * Skip a comment, whether inline or block-level, assuming this is one.
407
     */
408 54
    private function comment()
409
    {
410
        // Comments always begin with a / character.
411 54
        if ($this->ch !== '/') {
412
            $this->throwSyntaxError('Not a comment');
413
        }
414
415 54
        $this->next('/');
416
417 54
        if ($this->ch === '/') {
418 36
            $this->inlineComment();
419 43
        } elseif ($this->ch === '*') {
420 21
            $this->blockComment();
421 12
        } else {
422
            $this->throwSyntaxError('Unrecognized comment');
423
        }
424 51
    }
425
426
    /**
427
     * Skip whitespace and comments.
428
     *
429
     * Note that we're detecting comments by only a single / character.
430
     * This works since regular expressions are not valid JSON(5), but this will
431
     * break if there are other valid values that begin with a / character!
432
     */
433 360
    private function white()
434
    {
435 360
        while ($this->ch !== null) {
436 357
            if ($this->ch === '/') {
437 54
                $this->comment();
438 355
            } elseif (preg_match(self::REGEX_WHITESPACE, $this->ch) === 1) {
439 267
                $this->next();
440 178
            } else {
441 351
                return;
442
            }
443 182
        }
444 273
    }
445
446
    /**
447
     * Matches true, false, null, etc
448
     */
449 78
    private function word()
450
    {
451 78
        switch ($this->ch) {
452 78
            case 't':
453 36
                $this->next('t');
454 36
                $this->next('r');
455 36
                $this->next('u');
456 36
                $this->next('e');
457 36
                return true;
458 57
            case 'f':
459 18
                $this->next('f');
460 18
                $this->next('a');
461 18
                $this->next('l');
462 18
                $this->next('s');
463 18
                $this->next('e');
464 18
                return false;
465 42
            case 'n':
466 18
                $this->next('n');
467 18
                $this->next('u');
468 18
                $this->next('l');
469 18
                $this->next('l');
470 18
                return null;
471 24
            case 'I':
472 12
                $this->next('I');
473 12
                $this->next('n');
474 12
                $this->next('f');
475 12
                $this->next('i');
476 12
                $this->next('n');
477 12
                $this->next('i');
478 12
                $this->next('t');
479 12
                $this->next('y');
480 12
                return INF;
481 12
            case 'N':
482 3
                $this->next('N');
483 3
                $this->next('a');
484 3
                $this->next('N');
485 3
                return NAN;
486 6
        }
487
488 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
489
    }
490
491 42
    private function arr()
492
    {
493 42
        $arr = [];
494
495 42
        if ($this->ch === '[') {
496 42
            if (++$this->depth > $this->maxDepth) {
497 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
498
            }
499
500 42
            $this->next('[');
501 42
            $this->white();
502 42
            while ($this->ch !== null) {
503 42
                if ($this->ch === ']') {
504 12
                    $this->next(']');
505 12
                    $this->depth--;
506 12
                    return $arr; // Potentially empty array
507
                }
508
                // ES5 allows omitting elements in arrays, e.g. [,] and
509
                // [,null]. We don't allow this in JSON5.
510 39
                if ($this->ch === ',') {
511 6
                    $this->throwSyntaxError('Missing array element');
512
                } else {
513 33
                    $arr[] = $this->value();
514
                }
515 30
                $this->white();
516
                // If there's no comma after this value, this needs to
517
                // be the end of the array.
518 30
                if ($this->ch !== ',') {
519 21
                    $this->next(']');
520 18
                    $this->depth--;
521 18
                    return $arr;
522
                }
523 15
                $this->next(',');
524 15
                $this->white();
525 10
            }
526
        }
527
528
        $this->throwSyntaxError('Bad array');
529
    }
530
531
    /**
532
     * Parse an object value
533
     */
534 75
    private function obj()
535
    {
536 75
        $object = $this->associative ? [] : new \stdClass;
537
538 75
        if ($this->ch === '{') {
539 75
            if (++$this->depth > $this->maxDepth) {
540
                $this->throwSyntaxError('Maximum stack depth exceeded');
541
            }
542
543 75
            $this->next('{');
544 75
            $this->white();
545 75
            while ($this->ch) {
546 75
                if ($this->ch === '}') {
547 21
                    $this->next('}');
548 21
                    $this->depth--;
549 21
                    return $object; // Potentially empty object
550
                }
551
552
                // Keys can be unquoted. If they are, they need to be
553
                // valid JS identifiers.
554 63
                if ($this->ch === '"' || $this->ch === "'") {
555 27
                    $key = $this->string();
556 18
                } else {
557 39
                    $key = $this->identifier();
558
                }
559
560 54
                $this->white();
561 54
                $this->next(':');
562 51
                if ($this->associative) {
563 45
                    $object[$key] = $this->value();
564 30
                } else {
565 48
                    $object->{$key} = $this->value();
566
                }
567 51
                $this->white();
568
                // If there's no comma after this pair, this needs to be
569
                // the end of the object.
570 51
                if ($this->ch !== ',') {
571 42
                    $this->next('}');
572 39
                    $this->depth--;
573 39
                    return $object;
574
                }
575 18
                $this->next(',');
576 18
                $this->white();
577 12
            }
578
        }
579
580
        $this->throwSyntaxError('Bad object');
581
    }
582
583
    /**
584
     * Parse a JSON value.
585
     *
586
     * It could be an object, an array, a string, a number,
587
     * or a word.
588
     */
589 360
    private function value()
590
    {
591 360
        $this->white();
592 360
        switch ($this->ch) {
593 360
            case '{':
594 75
                return $this->obj();
595 336
            case '[':
596 42
                return $this->arr();
597 324
            case '"':
598 315
            case "'":
599 54
                return $this->string();
600 279
            case '-':
601 264
            case '+':
602 249
            case '.':
603 102
                return $this->number();
604 120
            default:
605 180
                return is_numeric($this->ch) ? $this->number() : $this->word();
606 120
        }
607
    }
608
609 96
    private function throwSyntaxError($message)
610
    {
611 96
        throw new SyntaxError($message, $this->at, $this->lineNumber, $this->columnNumber);
612
    }
613
614 18
    private static function renderChar($chr)
615
    {
616 18
        return $chr === null ? 'EOF' : "'" . $chr . "'";
617
    }
618
619
    /**
620
     * @param string $hex Hex code
621
     *
622
     * @return string Unicode character
623
     */
624 3
    private static function fromCharCode($hex)
625
    {
626 3
        return mb_convert_encoding('&#' . hexdec($hex) . ';', 'UTF-8', 'HTML-ENTITIES');
627
    }
628
629
    /**
630
     * @param string $ch
631
     *
632
     * @return string|null
633
     */
634 12
    private static function getEscapee($ch)
635
    {
636
        switch ($ch) {
637
            // @codingStandardsIgnoreStart
638 12
            case "'":  return "'";
639 9
            case '"':  return '"';
640 9
            case '\\': return '\\';
641 9
            case '/':  return '/';
642 9
            case "\n": return '';
643
            case 'b':  return '\b';
644
            case 'f':  return '\f';
645
            case 'n':  return '\n';
646
            case 'r':  return '\r';
647
            case 't':  return '\t';
648
            default:   return null;
649
            // @codingStandardsIgnoreEnd
650
        }
651
    }
652
}
653