Completed
Push — master ( a91b19...9e8ff9 )
by Colin
01:47
created

Json5Decoder::string()   D

Complexity

Conditions 9
Paths 4

Size

Total Lines 43
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 27
CRAP Score 9.0036

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 9
eloc 26
c 4
b 0
f 0
nc 4
nop 0
dl 0
loc 43
ccs 27
cts 28
cp 0.9643
crap 9.0036
rs 4.909
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u';
20
21
    private $json;
22
23
    private $at = 0;
24
25
    private $lineNumber = 1;
26
27
    private $columnNumber = 1;
28
29
    private $ch;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    private $lineCache;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 393
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 393
        $this->json = $json;
54 393
        $this->associative = $associative;
55 393
        $this->maxDepth = $depth;
56 393
        $this->castBigIntToString = $castBigIntToString;
57
58 393
        $this->length = mb_strlen($json, 'utf-8');
59
60 393
        $this->ch = $this->charAt(0);
61 393
    }
62
63
    /**
64
     * Takes a JSON encoded string and converts it into a PHP variable.
65
     *
66
     * The parameters exactly match PHP's json_decode() function - see
67
     * http://php.net/manual/en/function.json-decode.php for more information.
68
     *
69
     * @param string $source      The JSON string being decoded.
70
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
71
     * @param int    $depth       User specified recursion depth.
72
     * @param int    $options     Bitmask of JSON decode options.
73
     *
74
     * @return mixed
75
     */
76 393
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
77
    {
78 393
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
79 393
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
80
81 393
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
82
83 393
        $result = $decoder->value();
84 303
        $decoder->white();
85 300
        if ($decoder->ch) {
86 18
            $decoder->throwSyntaxError('Syntax error');
87
        }
88
89 282
        return $result;
90
    }
91
92
    /**
93
     * @param int $at
94
     *
95
     * @return string|null
96
     */
97 393
    private function charAt($at)
98
    {
99 393
        if ($at < 0 || $at >= $this->length) {
100 294
            return null;
101
        }
102
103 390
        return mb_substr($this->json, $at, 1, 'utf-8');
104
    }
105
106
    /**
107
     * Parse the next character.
108
     *
109
     * If $c is given, the next char will only be parsed if the current
110
     * one matches $c.
111
     *
112
     * @param string|null $c
113
     *
114
     * @return null|string
115
     */
116 363
    private function next($c = null)
117
    {
118
        // If a c parameter is provided, verify that it matches the current character.
119 363
        if ($c !== null && $c !== $this->ch) {
120 15
            $this->throwSyntaxError(sprintf(
121 15
                'Expected %s instead of %s',
122 15
                self::renderChar($c),
123 15
                self::renderChar($this->ch)
124 15
            ));
125
        }
126
127
        // Get the next character. When there are no more characters,
128
        // return the empty string.
129 363
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
130 279
            $this->at++;
131 279
            $this->lineNumber++;
132 279
            $this->columnNumber = 1;
133 279
        } else {
134 324
            $this->at++;
135 324
            $this->columnNumber++;
136
        }
137
138 363
        $this->ch = $this->charAt($this->at);
139
140 363
        return $this->ch;
141
    }
142
143
    /**
144
     * Get the next character without consuming it or
145
     * assigning it to the ch variable.
146
     *
147
     * @return mixed
148
     */
149 12
    private function peek()
150
    {
151 12
        return $this->charAt($this->at + 1);
152
    }
153
154
    /**
155
     * @return string
156
     */
157 231
    private function getLineRemainder()
158
    {
159
        // Line are separated by "\n" or "\r" without an "\n" next
160 231
        if ($this->lineCache === null) {
161 231
            $this->lineCache = preg_split('/\n|\r\n?/u', $this->json);
162 231
        }
163
164 231
        $line = $this->lineCache[$this->lineNumber - 1];
165
166 231
        return mb_substr($line, $this->columnNumber - 1);
167
    }
168
169
    /**
170
     * Attempt to match a regular expression at the current position on the current line.
171
     *
172
     * This function will not match across multiple lines.
173
     *
174
     * @param string $regex
175
     *
176
     * @return string|null
177
     */
178 231
    private function match($regex)
179
    {
180 231
        $subject = $this->getLineRemainder();
181
182 231
        $matches = [];
183 231
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
184 132
            return null;
185
        }
186
187
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
188 207
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
189
190
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
191
        // [0][1] contains the index of that match
192 207
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
193
194 207
        $this->at += $advanceBy;
195 207
        $this->columnNumber += $advanceBy;
196 207
        $this->ch = $this->charAt($this->at);
197
198 207
        return $matches[0][0];
199
    }
200
201
    /**
202
     * Parse an identifier.
203
     *
204
     * Normally, reserved words are disallowed here, but we
205
     * only use this for unquoted object keys, where reserved words are allowed,
206
     * so we don't check for those here. References:
207
     * - http://es5.github.com/#x7.6
208
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
209
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
210
     */
211 42
    private function identifier()
212
    {
213
        // @codingStandardsIgnoreStart
214
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
215 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
216
        // @codingStandardsIgnoreEnd
217
218 42
        if ($match === null) {
219 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
220
        }
221
222
        // Un-escape escaped Unicode chars
223 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
224 6
            return json_decode('"'.$m[0].'"');
225 33
        }, $match);
226
227 33
        return $unescaped;
228
    }
229
230 210
    private function number()
231
    {
232 210
        $number = null;
233 210
        $sign = '';
234 210
        $string = '';
235 210
        $base = 10;
236
237 210
        if ($this->ch === '-' || $this->ch === '+') {
238 93
            $sign = $this->ch;
239 93
            $this->next($this->ch);
240 93
        }
241
242
        // support for Infinity
243 210
        if ($this->ch === 'I') {
244 6
            $number = $this->word();
245 6
            if ($number === null) {
246
                $this->throwSyntaxError('Unexpected word for number');
247
            }
248
249 6
            return ($sign === '-') ? -INF : INF;
250
        }
251
252
        // support for NaN
253 204
        if ($this->ch === 'N') {
254
            $number = $this->word();
255
            if ($number !== NAN) {
256
                $this->throwSyntaxError('expected word to be NaN');
257
            }
258
259
            // ignore sign as -NaN also is NaN
260
            return $number;
261
        }
262
263 204
        if ($this->ch === '0') {
264 105
            $string .= $this->ch;
265 105
            $this->next();
266 105
            if ($this->ch === 'x' || $this->ch === 'X') {
267 33
                $string .= $this->ch;
268 33
                $this->next();
269 33
                $base = 16;
270 105
            } elseif (is_numeric($this->ch)) {
271 30
                $this->throwSyntaxError('Octal literal');
272
            }
273 75
        }
274
275
        switch ($base) {
276 174
            case 10:
277 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
278 144
                    $string .= $match;
279 144
                }
280 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
281 45
                    $string .= $match;
282 45
                }
283 144
                $number = $string;
284 144
                break;
285 33
            case 16:
286 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
287 30
                    $string .= $match;
288 30
                    $number = hexdec($string);
289 30
                    break;
290
                }
291 3
                $this->throwSyntaxError('Bad hex number');
292
        }
293
294 171
        if ($sign === '-') {
295 33
            $number = -$number;
296 33
        }
297
298 171
        if (!is_numeric($number) || !is_finite($number)) {
299 3
            $this->throwSyntaxError('Bad number');
300
        }
301
302 168
        if ($this->castBigIntToString) {
303 3
            return $number;
304
        }
305
306
        // Adding 0 will automatically cast this to an int or float
307 165
        return $number + 0;
308
    }
309
310 90
    private function string()
311
    {
312 90
        $string = '';
313
314 90
        $delim = $this->ch;
315 90
        $this->next();
316 90
        while ($this->ch !== null) {
317 90
            if ($this->ch === $delim) {
318 84
                $this->next();
319
320 84
                return $string;
321
            }
322
323 90
            if ($this->ch === '\\') {
324 30
                if ($unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
325 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
326 6
                    continue;
327
                }
328
329 24
                $this->next();
330 24
                if ($this->ch === "\r") {
331 6
                    if ($this->peek() === "\n") {
332 3
                        $this->next();
333 3
                    }
334 24
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
335 15
                    $string .= $escapee;
336 15
                } else {
337 3
                    break;
338
                }
339 90
            } elseif ($this->ch === "\n") {
340
                // unescaped newlines are invalid; see:
341
                // https://github.com/json5/json5/issues/24
342
                // @todo this feels special-cased; are there other invalid unescaped chars?
343 3
                break;
344
            } else {
345 90
                $string .= $this->ch;
346
            }
347
348 90
            $this->next();
349 90
        }
350
351 6
        $this->throwSyntaxError('Bad string');
352
    }
353
354
    /**
355
     * Skip an inline comment, assuming this is one.
356
     *
357
     * The current character should be the second / character in the // pair that begins this inline comment.
358
     * To finish the inline comment, we look for a newline or the end of the text.
359
     */
360 36
    private function inlineComment()
361
    {
362
        do {
363 36
            $this->next();
364 36
            if ($this->ch === "\n" || $this->ch === "\r") {
365 33
                $this->next();
366
367 33
                return;
368
            }
369 36
        } while ($this->ch !== null);
370 3
    }
371
372
    /**
373
     * Skip a block comment, assuming this is one.
374
     *
375
     * The current character should be the * character in the /* pair that begins this block comment.
376
     * To finish the block comment, we look for an ending *​/ pair of characters,
377
     * but we also watch for the end of text before the comment is terminated.
378
     */
379 21
    private function blockComment()
380
    {
381
        do {
382 21
            $this->next();
383 21
            while ($this->ch === '*') {
384 18
                $this->next('*');
385 18
                if ($this->ch === '/') {
386 18
                    $this->next('/');
387
388 18
                    return;
389
                }
390 3
            }
391 21
        } while ($this->ch !== null);
392
393 3
        $this->throwSyntaxError('Unterminated block comment');
394
    }
395
396
    /**
397
     * Skip a comment, whether inline or block-level, assuming this is one.
398
     */
399 57
    private function comment()
400
    {
401
        // Comments always begin with a / character.
402 57
        $this->next('/');
403
404 57
        if ($this->ch === '/') {
405 36
            $this->inlineComment();
406 57
        } elseif ($this->ch === '*') {
407 21
            $this->blockComment();
408 18
        } else {
409 3
            $this->throwSyntaxError('Unrecognized comment');
410
        }
411 51
    }
412
413
    /**
414
     * Skip whitespace and comments.
415
     *
416
     * Note that we're detecting comments by only a single / character.
417
     * This works since regular expressions are not valid JSON(5), but this will
418
     * break if there are other valid values that begin with a / character!
419
     */
420 393
    private function white()
421
    {
422 393
        while ($this->ch !== null) {
423 390
            if ($this->ch === '/') {
424 57
                $this->comment();
425 387
            } elseif (preg_match(self::REGEX_WHITESPACE, $this->ch) === 1) {
426 288
                $this->next();
427 288
            } else {
428 381
                return;
429
            }
430 294
        }
431 291
    }
432
433
    /**
434
     * Matches true, false, null, etc
435
     */
436 84
    private function word()
437
    {
438 84
        switch ($this->ch) {
439 84
            case 't':
440 36
                $this->next('t');
441 36
                $this->next('r');
442 36
                $this->next('u');
443 36
                $this->next('e');
444 36
                return true;
445 63
            case 'f':
446 18
                $this->next('f');
447 18
                $this->next('a');
448 18
                $this->next('l');
449 18
                $this->next('s');
450 18
                $this->next('e');
451 18
                return false;
452 48
            case 'n':
453 18
                $this->next('n');
454 18
                $this->next('u');
455 18
                $this->next('l');
456 18
                $this->next('l');
457 18
                return null;
458 30
            case 'I':
459 15
                $this->next('I');
460 15
                $this->next('n');
461 12
                $this->next('f');
462 12
                $this->next('i');
463 12
                $this->next('n');
464 12
                $this->next('i');
465 12
                $this->next('t');
466 12
                $this->next('y');
467 12
                return INF;
468 15
            case 'N':
469 6
                $this->next('N');
470 6
                $this->next('a');
471 3
                $this->next('N');
472 3
                return NAN;
473 9
        }
474
475 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
476
    }
477
478 42
    private function arr()
479
    {
480 42
        $arr = [];
481
482 42
        if ($this->ch === '[') {
483 42
            if (++$this->depth > $this->maxDepth) {
484 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
485
            }
486
487 42
            $this->next('[');
488 42
            $this->white();
489 42
            while ($this->ch !== null) {
490 42
                if ($this->ch === ']') {
491 12
                    $this->next(']');
492 12
                    $this->depth--;
493 12
                    return $arr; // Potentially empty array
494
                }
495
                // ES5 allows omitting elements in arrays, e.g. [,] and
496
                // [,null]. We don't allow this in JSON5.
497 39
                if ($this->ch === ',') {
498 6
                    $this->throwSyntaxError('Missing array element');
499
                }
500
501 33
                $arr[] = $this->value();
502
503 30
                $this->white();
504
                // If there's no comma after this value, this needs to
505
                // be the end of the array.
506 30
                if ($this->ch !== ',') {
507 21
                    $this->next(']');
508 18
                    $this->depth--;
509 18
                    return $arr;
510
                }
511 15
                $this->next(',');
512 15
                $this->white();
513 15
            }
514
        }
515
516
        $this->throwSyntaxError('Bad array');
517
    }
518
519
    /**
520
     * Parse an object value
521
     */
522 87
    private function obj()
523
    {
524 87
        $object = $this->associative ? [] : new \stdClass;
525
526 87
        if ($this->ch === '{') {
527 87
            if (++$this->depth > $this->maxDepth) {
528 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
529
            }
530
531 87
            $this->next('{');
532 87
            $this->white();
533 87
            while ($this->ch) {
534 87
                if ($this->ch === '}') {
535 21
                    $this->next('}');
536 21
                    $this->depth--;
537 21
                    return $object; // Potentially empty object
538
                }
539
540
                // Keys can be unquoted. If they are, they need to be
541
                // valid JS identifiers.
542 75
                if ($this->ch === '"' || $this->ch === "'") {
543 36
                    $key = $this->string();
544 36
                } else {
545 42
                    $key = $this->identifier();
546
                }
547
548 66
                $this->white();
549 66
                $this->next(':');
550 63
                if ($this->associative) {
551 48
                    $object[$key] = $this->value();
552 48
                } else {
553 60
                    $object->{$key} = $this->value();
554
                }
555 60
                $this->white();
556
                // If there's no comma after this pair, this needs to be
557
                // the end of the object.
558 60
                if ($this->ch !== ',') {
559 51
                    $this->next('}');
560 48
                    $this->depth--;
561 48
                    return $object;
562
                }
563 21
                $this->next(',');
564 21
                $this->white();
565 21
            }
566
        }
567
568
        $this->throwSyntaxError('Bad object');
569
    }
570
571
    /**
572
     * Parse a JSON value.
573
     *
574
     * It could be an object, an array, a string, a number,
575
     * or a word.
576
     */
577 393
    private function value()
578
    {
579 393
        $this->white();
580 390
        switch ($this->ch) {
581 390
            case '{':
582 87
                return $this->obj();
583 363
            case '[':
584 42
                return $this->arr();
585 351
            case '"':
586 351
            case "'":
587 75
                return $this->string();
588 285
            case '-':
589 285
            case '+':
590 285
            case '.':
591 102
                return $this->number();
592 186
            default:
593 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
594 186
        }
595
    }
596
597 111
    private function throwSyntaxError($message)
598
    {
599 111
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
600
    }
601
602 24
    private static function renderChar($chr)
603
    {
604 24
        return $chr === null ? 'EOF' : "'" . $chr . "'";
605
    }
606
607
    /**
608
     * @param string $ch
609
     *
610
     * @return string|null
611
     */
612 18
    private static function getEscapee($ch)
613
    {
614
        switch ($ch) {
615
            // @codingStandardsIgnoreStart
616 18
            case "'":  return "'";
617 15
            case '"':  return '"';
618 15
            case '\\': return '\\';
619 15
            case '/':  return '/';
620 15
            case "\n": return '';
621 6
            case 'b':  return chr(8);
622 6
            case 'f':  return "\f";
623 6
            case 'n':  return "\n";
624 6
            case 'r':  return "\r";
625 6
            case 't':  return "\t";
626 3
            default:   return null;
627
            // @codingStandardsIgnoreEnd
628 3
        }
629
    }
630
}
631