Completed
Pull Request — master (#2)
by Colin
01:20
created

Json5Decoder::match()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 22
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 2

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 2
eloc 11
c 4
b 0
f 0
nc 2
nop 1
dl 0
loc 22
ccs 11
cts 11
cp 1
crap 2
rs 9.2
1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $at = 0;
20
21
    private $lineNumber = 1;
22
23
    private $columnNumber = 1;
24
25
    private $ch;
26
27
    private $chArr;
28
29
    private $associative = false;
30
31
    private $maxDepth = 512;
32
33
    private $castBigIntToString = false;
34
35
    private $depth = 1;
36
37
    private $length;
38
39
    private $remainderCache;
40
41
    private $remainderCacheAt;
42
43
    /**
44
     * Private constructor.
45
     *
46
     * @param string $json
47
     * @param bool   $associative
48
     * @param int    $depth
49
     * @param bool   $castBigIntToString
50
     */
51 396
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
52
    {
53 396
        $this->associative = $associative;
54 396
        $this->maxDepth = $depth;
55 396
        $this->castBigIntToString = $castBigIntToString;
56
57 396
        $this->length = mb_strlen($json, 'utf-8');
58
59 396
        $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
60 396
        $this->ch = $this->charAt(0);
61
62 396
        $this->remainderCache = $json;
63 396
        $this->remainderCacheAt = 0;
64 396
    }
65
66
    /**
67
     * Takes a JSON encoded string and converts it into a PHP variable.
68
     *
69
     * The parameters exactly match PHP's json_decode() function - see
70
     * http://php.net/manual/en/function.json-decode.php for more information.
71
     *
72
     * @param string $source      The JSON string being decoded.
73
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
74
     * @param int    $depth       User specified recursion depth.
75
     * @param int    $options     Bitmask of JSON decode options.
76
     *
77
     * @return mixed
78
     */
79 396
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
80
    {
81 396
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
82 396
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
83
84 396
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
85
86 396
        $result = $decoder->value();
87 303
        $decoder->white();
88 300
        if ($decoder->ch) {
89 18
            $decoder->throwSyntaxError('Syntax error');
90
        }
91
92 282
        return $result;
93
    }
94
95
    /**
96
     * @param int $at
97
     *
98
     * @return string|null
99
     */
100 396
    private function charAt($at)
101
    {
102 396
        if ($at >= $this->length) {
103 297
            return null;
104
        }
105
106 393
        return $this->chArr[$at];
107
    }
108
109
    /**
110
     * Parse the next character.
111
     *
112
     * @return null|string
113
     */
114 366
    private function next()
115
    {
116
        // Get the next character. When there are no more characters,
117
        // return the empty string.
118 366
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
119 282
            $this->at++;
120 282
            $this->lineNumber++;
121 282
            $this->columnNumber = 1;
122 188
        } else {
123 327
            $this->at++;
124 327
            $this->columnNumber++;
125
        }
126
127 366
        $this->ch = $this->charAt($this->at);
128
129 366
        return $this->ch;
130
    }
131
132
    /**
133
     * Parse the next character if it matches $c or fail.
134
     *
135
     * @param string $c
136
     *
137
     * @return string|null
138
     */
139 168
    private function nextOrFail($c)
140
    {
141 168
        if ($c !== $this->ch) {
142 18
            $this->throwSyntaxError(sprintf(
143 18
              'Expected %s instead of %s',
144 18
              self::renderChar($c),
145 18
              self::renderChar($this->ch)
146 12
            ));
147
        }
148
149 168
        return $this->next();
150
    }
151
152
    /**
153
     * Get the next character without consuming it or
154
     * assigning it to the ch variable.
155
     *
156
     * @return mixed
157
     */
158 36
    private function peek()
159
    {
160 36
        return $this->charAt($this->at + 1);
161
    }
162
163
    /**
164
     * Attempt to match a regular expression at the current position on the current line.
165
     *
166
     * This function will not match across multiple lines.
167
     *
168
     * @param string $regex
169
     *
170
     * @return string|null
171
     */
172 210
    private function match($regex)
173
    {
174 210
        $subject = $this->getRemainder();
175
176 210
        $matches = [];
177 210
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
178 15
            return null;
179
        }
180
181
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
182 195
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
183
184
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
185
        // [0][1] contains the index of that match
186 195
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
187
188 195
        $this->at += $advanceBy;
189 195
        $this->columnNumber += $advanceBy;
190 195
        $this->ch = $this->charAt($this->at);
191
192 195
        return $matches[0][0];
193
    }
194
195
    /**
196
     * Parse an identifier.
197
     *
198
     * Normally, reserved words are disallowed here, but we
199
     * only use this for unquoted object keys, where reserved words are allowed,
200
     * so we don't check for those here. References:
201
     * - http://es5.github.com/#x7.6
202
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
203
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
204
     */
205 42
    private function identifier()
206
    {
207
        // @codingStandardsIgnoreStart
208
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
209 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
210
        // @codingStandardsIgnoreEnd
211
212 42
        if ($match === null) {
213 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
214
        }
215
216
        // Un-escape escaped Unicode chars
217 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
218 6
            return json_decode('"'.$m[0].'"');
219 33
        }, $match);
220
221 33
        return $unescaped;
222
    }
223
224 210
    private function number()
225
    {
226 210
        $number = null;
227 210
        $sign = '';
228 210
        $string = '';
229 210
        $base = 10;
230
231 210
        if ($this->ch === '-' || $this->ch === '+') {
232 93
            $sign = $this->ch;
233 93
            $this->next($this->ch);
0 ignored issues
show
Unused Code introduced by
The call to Json5Decoder::next() has too many arguments starting with $this->ch.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
234 62
        }
235
236
        // support for Infinity
237 210
        if ($this->ch === 'I') {
238 6
            $number = $this->word();
239 6
            if ($number === null) {
240
                $this->throwSyntaxError('Unexpected word for number');
241
            }
242
243 6
            return ($sign === '-') ? -INF : INF;
244
        }
245
246
        // support for NaN
247 204
        if ($this->ch === 'N') {
248
            $number = $this->word();
249
            if ($number !== NAN) {
250
                $this->throwSyntaxError('expected word to be NaN');
251
            }
252
253
            // ignore sign as -NaN also is NaN
254
            return $number;
255
        }
256
257 204
        if ($this->ch === '0') {
258 105
            $string .= $this->ch;
259 105
            $this->next();
260 105
            if ($this->ch === 'x' || $this->ch === 'X') {
261 33
                $string .= $this->ch;
262 33
                $this->next();
263 33
                $base = 16;
264 94
            } elseif (is_numeric($this->ch)) {
265 30
                $this->throwSyntaxError('Octal literal');
266
            }
267 50
        }
268
269
        switch ($base) {
270 174
            case 10:
271 144 View Code Duplication
                if ((is_numeric($this->ch) || $this->ch === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
272 129
                    $string .= $match;
273 86
                }
274 144 View Code Duplication
                if (($this->ch === 'E' || $this->ch === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
275 45
                    $string .= $match;
276 30
                }
277 144
                $number = $string;
278 144
                break;
279 33
            case 16:
280 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
281 30
                    $string .= $match;
282 30
                    $number = hexdec($string);
283 30
                    break;
284
                }
285 3
                $this->throwSyntaxError('Bad hex number');
286
        }
287
288 171
        if ($sign === '-') {
289 33
            $number = -$number;
290 22
        }
291
292 171
        if (!is_numeric($number) || !is_finite($number)) {
293 3
            $this->throwSyntaxError('Bad number');
294
        }
295
296 168
        if ($this->castBigIntToString) {
297 3
            return $number;
298
        }
299
300
        // Adding 0 will automatically cast this to an int or float
301 165
        return $number + 0;
302
    }
303
304 93
    private function string()
305
    {
306 93
        $string = '';
307
308 93
        $delim = $this->ch;
309 93
        $this->next();
310 93
        while ($this->ch !== null) {
311 93
            if ($this->ch === $delim) {
312 87
                $this->next();
313
314 87
                return $string;
315
            }
316
317 93
            if ($this->ch === '\\') {
318 30
                if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
319 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
320 6
                    continue;
321
                }
322
323 24
                $this->next();
324 24
                if ($this->ch === "\r") {
325 6
                    if ($this->peek() === "\n") {
326 4
                        $this->next();
327 2
                    }
328 22
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
329 15
                    $string .= $escapee;
330 10
                } else {
331 10
                    break;
332
                }
333 93
            } elseif ($this->ch === "\n") {
334
                // unescaped newlines are invalid; see:
335
                // https://github.com/json5/json5/issues/24
336
                // @todo this feels special-cased; are there other invalid unescaped chars?
337 3
                break;
338
            } else {
339 93
                $string .= $this->ch;
340
            }
341
342 93
            $this->next();
343 62
        }
344
345 6
        $this->throwSyntaxError('Bad string');
346
    }
347
348
    /**
349
     * Skip an inline comment, assuming this is one.
350
     *
351
     * The current character should be the second / character in the // pair that begins this inline comment.
352
     * To finish the inline comment, we look for a newline or the end of the text.
353
     */
354 36
    private function inlineComment()
355
    {
356
        do {
357 36
            $this->next();
358 36
            if ($this->ch === "\n" || $this->ch === "\r") {
359 33
                $this->next();
360
361 33
                return;
362
            }
363 36
        } while ($this->ch !== null);
364 3
    }
365
366
    /**
367
     * Skip a block comment, assuming this is one.
368
     *
369
     * The current character should be the * character in the /* pair that begins this block comment.
370
     * To finish the block comment, we look for an ending *​/ pair of characters,
371
     * but we also watch for the end of text before the comment is terminated.
372
     */
373 21
    private function blockComment()
374
    {
375
        do {
376 21
            $this->next();
377 21
            while ($this->ch === '*') {
378 18
                $this->nextOrFail('*');
379 18
                if ($this->ch === '/') {
380 18
                    $this->nextOrFail('/');
381
382 18
                    return;
383
                }
384 2
            }
385 21
        } while ($this->ch !== null);
386
387 3
        $this->throwSyntaxError('Unterminated block comment');
388
    }
389
390
    /**
391
     * Skip a comment, whether inline or block-level, assuming this is one.
392
     */
393 57
    private function comment()
394
    {
395
        // Comments always begin with a / character.
396 57
        $this->nextOrFail('/');
397
398 57
        if ($this->ch === '/') {
399 36
            $this->inlineComment();
400 46
        } elseif ($this->ch === '*') {
401 21
            $this->blockComment();
402 12
        } else {
403 3
            $this->throwSyntaxError('Unrecognized comment');
404
        }
405 51
    }
406
407
    /**
408
     * Skip whitespace and comments.
409
     *
410
     * Note that we're detecting comments by only a single / character.
411
     * This works since regular expressions are not valid JSON(5), but this will
412
     * break if there are other valid values that begin with a / character!
413
     */
414 396
    private function white()
415
    {
416 396
        while ($this->ch !== null) {
417 393
            if ($this->ch === '/') {
418 57
                $this->comment();
419 388
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->ch) === 1) {
420 291
                $this->next();
421 194
            } else {
422 384
                return;
423
            }
424 198
        }
425 294
    }
426
427
    /**
428
     * Matches true, false, null, etc
429
     */
430 84
    private function word()
431
    {
432 84
        switch ($this->ch) {
433 84
            case 't':
434 36
                $this->nextOrFail('t');
435 36
                $this->nextOrFail('r');
436 36
                $this->nextOrFail('u');
437 36
                $this->nextOrFail('e');
438 36
                return true;
439 63
            case 'f':
440 18
                $this->nextOrFail('f');
441 18
                $this->nextOrFail('a');
442 18
                $this->nextOrFail('l');
443 18
                $this->nextOrFail('s');
444 18
                $this->nextOrFail('e');
445 18
                return false;
446 48
            case 'n':
447 18
                $this->nextOrFail('n');
448 18
                $this->nextOrFail('u');
449 18
                $this->nextOrFail('l');
450 18
                $this->nextOrFail('l');
451 18
                return null;
452 30
            case 'I':
453 15
                $this->nextOrFail('I');
454 15
                $this->nextOrFail('n');
455 12
                $this->nextOrFail('f');
456 12
                $this->nextOrFail('i');
457 12
                $this->nextOrFail('n');
458 12
                $this->nextOrFail('i');
459 12
                $this->nextOrFail('t');
460 12
                $this->nextOrFail('y');
461 12
                return INF;
462 15
            case 'N':
463 6
                $this->nextOrFail('N');
464 6
                $this->nextOrFail('a');
465 3
                $this->nextOrFail('N');
466 3
                return NAN;
467 6
        }
468
469 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
470
    }
471
472 42
    private function arr()
473
    {
474 42
        $arr = [];
475
476 42
        if ($this->ch === '[') {
477 42
            if (++$this->depth > $this->maxDepth) {
478 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
479
            }
480
481 42
            $this->nextOrFail('[');
482 42
            $this->white();
483 42
            while ($this->ch !== null) {
484 42
                if ($this->ch === ']') {
485 12
                    $this->nextOrFail(']');
486 12
                    $this->depth--;
487 12
                    return $arr; // Potentially empty array
488
                }
489
                // ES5 allows omitting elements in arrays, e.g. [,] and
490
                // [,null]. We don't allow this in JSON5.
491 39
                if ($this->ch === ',') {
492 6
                    $this->throwSyntaxError('Missing array element');
493
                }
494
495 33
                $arr[] = $this->value();
496
497 30
                $this->white();
498
                // If there's no comma after this value, this needs to
499
                // be the end of the array.
500 30
                if ($this->ch !== ',') {
501 21
                    $this->nextOrFail(']');
502 18
                    $this->depth--;
503 18
                    return $arr;
504
                }
505 15
                $this->nextOrFail(',');
506 15
                $this->white();
507 10
            }
508
        }
509
510
        $this->throwSyntaxError('Bad array');
511
    }
512
513
    /**
514
     * Parse an object value
515
     */
516 90
    private function obj()
517
    {
518 90
        $object = $this->associative ? [] : new \stdClass;
519
520 90
        if ($this->ch === '{') {
521 90
            if (++$this->depth > $this->maxDepth) {
522 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
523
            }
524
525 90
            $this->nextOrFail('{');
526 90
            $this->white();
527 90
            while ($this->ch) {
528 90
                if ($this->ch === '}') {
529 21
                    $this->nextOrFail('}');
530 21
                    $this->depth--;
531 21
                    return $object; // Potentially empty object
532
                }
533
534
                // Keys can be unquoted. If they are, they need to be
535
                // valid JS identifiers.
536 78
                if ($this->ch === '"' || $this->ch === "'") {
537 39
                    $key = $this->string();
538 26
                } else {
539 42
                    $key = $this->identifier();
540
                }
541
542 69
                $this->white();
543 69
                $this->nextOrFail(':');
544 66
                if ($this->associative) {
545 48
                    $object[$key] = $this->value();
546 32
                } else {
547 63
                    $object->{$key} = $this->value();
548
                }
549 63
                $this->white();
550
                // If there's no comma after this pair, this needs to be
551
                // the end of the object.
552 63
                if ($this->ch !== ',') {
553 54
                    $this->nextOrFail('}');
554 48
                    $this->depth--;
555 48
                    return $object;
556
                }
557 21
                $this->nextOrFail(',');
558 21
                $this->white();
559 14
            }
560
        }
561
562
        $this->throwSyntaxError('Bad object');
563
    }
564
565
    /**
566
     * Parse a JSON value.
567
     *
568
     * It could be an object, an array, a string, a number,
569
     * or a word.
570
     */
571 396
    private function value()
572
    {
573 396
        $this->white();
574 393
        switch ($this->ch) {
575 393
            case '{':
576 90
                return $this->obj();
577 366
            case '[':
578 42
                return $this->arr();
579 354
            case '"':
580 338
            case "'":
581 78
                return $this->string();
582 285
            case '-':
583 270
            case '+':
584 255
            case '.':
585 102
                return $this->number();
586 124
            default:
587 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
588 124
        }
589
    }
590
591 114
    private function throwSyntaxError($message)
592
    {
593 114
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
594
    }
595
596 27
    private static function renderChar($chr)
597
    {
598 27
        return $chr === null ? 'EOF' : "'" . $chr . "'";
599
    }
600
601
    /**
602
     * @param string $ch
603
     *
604
     * @return string|null
605
     */
606 18
    private static function getEscapee($ch)
607
    {
608
        switch ($ch) {
609
            // @codingStandardsIgnoreStart
610 18
            case "'":  return "'";
611 15
            case '"':  return '"';
612 15
            case '\\': return '\\';
613 15
            case '/':  return '/';
614 15
            case "\n": return '';
615 6
            case 'b':  return chr(8);
616 6
            case 'f':  return "\f";
617 6
            case 'n':  return "\n";
618 6
            case 'r':  return "\r";
619 6
            case 't':  return "\t";
620 3
            default:   return null;
621
            // @codingStandardsIgnoreEnd
622 2
        }
623
    }
624
625
    /**
626
     * Returns everything from $this->at onwards.
627
     *
628
     * Utilizes a cache so we don't have to continuously parse through UTF-8
629
     * data that was earlier in the string which we don't even care about.
630
     *
631
     * @return string
632
     */
633 210
    private function getRemainder()
634
    {
635 210
        if ($this->remainderCacheAt === $this->at) {
636 66
            return $this->remainderCache;
637
        }
638
639 186
        $subject = mb_substr($this->remainderCache, $this->at - $this->remainderCacheAt);
640 186
        $this->remainderCache = $subject;
641 186
        $this->remainderCacheAt = $this->at;
642
643 186
        return $subject;
644
    }
645
}
646