Completed
Pull Request — master (#2)
by Colin
01:34
created

Json5Decoder::number()   F

Complexity

Conditions 21
Paths 460

Size

Total Lines 79
Code Lines 48

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 47
CRAP Score 23.3797

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 21
eloc 48
c 2
b 0
f 0
nc 460
nop 0
dl 0
loc 79
ccs 47
cts 57
cp 0.8246
crap 23.3797
rs 3.544

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the colinodell/json5 package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
9
 *  - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace ColinODell\Json5;
16
17
final class Json5Decoder
18
{
19
    private $json;
20
21
    private $at = 0;
22
23
    private $lineNumber = 1;
24
25
    private $columnNumber = 1;
26
27
    private $ch;
28
29
    private $chArr;
30
31
    private $associative = false;
32
33
    private $maxDepth = 512;
34
35
    private $castBigIntToString = false;
36
37
    private $depth = 1;
38
39
    private $length;
40
41
    /**
42
     * Private constructor.
43
     *
44
     * @param string $json
45
     * @param bool   $associative
46
     * @param int    $depth
47
     * @param bool   $castBigIntToString
48
     */
49 396
    private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
50
    {
51 396
        $this->json = $json;
52 396
        $this->associative = $associative;
53 396
        $this->maxDepth = $depth;
54 396
        $this->castBigIntToString = $castBigIntToString;
55
56 396
        $this->length = mb_strlen($json, 'utf-8');
57
58 396
        $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
59 396
        $this->ch = $this->charAt(0);
60 396
    }
61
62
    /**
63
     * Takes a JSON encoded string and converts it into a PHP variable.
64
     *
65
     * The parameters exactly match PHP's json_decode() function - see
66
     * http://php.net/manual/en/function.json-decode.php for more information.
67
     *
68
     * @param string $source      The JSON string being decoded.
69
     * @param bool   $associative When TRUE, returned objects will be converted into associative arrays.
70
     * @param int    $depth       User specified recursion depth.
71
     * @param int    $options     Bitmask of JSON decode options.
72
     *
73
     * @return mixed
74
     */
75 396
    public static function decode($source, $associative = false, $depth = 512, $options = 0)
76
    {
77 396
        $associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
78 396
        $castBigIntToString = $options & JSON_BIGINT_AS_STRING;
79
80 396
        $decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
0 ignored issues
show
Documentation introduced by
$castBigIntToString is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
81
82 396
        $result = $decoder->value();
83 303
        $decoder->white();
84 300
        if ($decoder->ch) {
85 18
            $decoder->throwSyntaxError('Syntax error');
86
        }
87
88 282
        return $result;
89
    }
90
91
    /**
92
     * @param int $at
93
     *
94
     * @return string|null
95
     */
96 396
    private function charAt($at)
97
    {
98 396
        if ($at < 0 || $at >= $this->length) {
99 297
            return null;
100
        }
101
102 393
        return $this->chArr[$at];
103
    }
104
105
    /**
106
     * Parse the next character.
107
     *
108
     * If $c is given, the next char will only be parsed if the current
109
     * one matches $c.
110
     *
111
     * @param string|null $c
112
     *
113
     * @return null|string
114
     */
115 366
    private function next($c = null)
116
    {
117
        // If a c parameter is provided, verify that it matches the current character.
118 366
        if ($c !== null && $c !== $this->ch) {
119 18
            $this->throwSyntaxError(sprintf(
120 18
                'Expected %s instead of %s',
121 18
                self::renderChar($c),
122 18
                self::renderChar($this->ch)
123 18
            ));
124
        }
125
126
        // Get the next character. When there are no more characters,
127
        // return the empty string.
128 366
        if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
129 282
            $this->at++;
130 282
            $this->lineNumber++;
131 282
            $this->columnNumber = 1;
132 282
        } else {
133 327
            $this->at++;
134 327
            $this->columnNumber++;
135
        }
136
137 366
        $this->ch = $this->charAt($this->at);
138
139 366
        return $this->ch;
140
    }
141
142
    /**
143
     * Get the next character without consuming it or
144
     * assigning it to the ch variable.
145
     *
146
     * @return mixed
147
     */
148 12
    private function peek()
149
    {
150 12
        return $this->charAt($this->at + 1);
151
    }
152
153
    /**
154
     * Attempt to match a regular expression at the current position on the current line.
155
     *
156
     * This function will not match across multiple lines.
157
     *
158
     * @param string $regex
159
     *
160
     * @return string|null
161
     */
162 231
    private function match($regex)
163
    {
164 231
        $subject = mb_substr($this->json, $this->at);
165
166 231
        $matches = [];
167 231
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
168 132
            return null;
169
        }
170
171
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
172 207
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');
173
174
        // [0][0] contains the matched text
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
175
        // [0][1] contains the index of that match
176 207
        $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8');
177
178 207
        $this->at += $advanceBy;
179 207
        $this->columnNumber += $advanceBy;
180 207
        $this->ch = $this->charAt($this->at);
181
182 207
        return $matches[0][0];
183
    }
184
185
    /**
186
     * Parse an identifier.
187
     *
188
     * Normally, reserved words are disallowed here, but we
189
     * only use this for unquoted object keys, where reserved words are allowed,
190
     * so we don't check for those here. References:
191
     * - http://es5.github.com/#x7.6
192
     * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
193
     * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
194
     */
195 42
    private function identifier()
196
    {
197
        // @codingStandardsIgnoreStart
198
        // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
199 42
        $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
200
        // @codingStandardsIgnoreEnd
201
202 42
        if ($match === null) {
203 9
            $this->throwSyntaxError('Bad identifier as unquoted key');
204
        }
205
206
        // Un-escape escaped Unicode chars
207 33
        $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
208 6
            return json_decode('"'.$m[0].'"');
209 33
        }, $match);
210
211 33
        return $unescaped;
212
    }
213
214 210
    private function number()
215
    {
216 210
        $number = null;
217 210
        $sign = '';
218 210
        $string = '';
219 210
        $base = 10;
220
221 210
        if ($this->ch === '-' || $this->ch === '+') {
222 93
            $sign = $this->ch;
223 93
            $this->next($this->ch);
224 93
        }
225
226
        // support for Infinity
227 210
        if ($this->ch === 'I') {
228 6
            $number = $this->word();
229 6
            if ($number === null) {
230
                $this->throwSyntaxError('Unexpected word for number');
231
            }
232
233 6
            return ($sign === '-') ? -INF : INF;
234
        }
235
236
        // support for NaN
237 204
        if ($this->ch === 'N') {
238
            $number = $this->word();
239
            if ($number !== NAN) {
240
                $this->throwSyntaxError('expected word to be NaN');
241
            }
242
243
            // ignore sign as -NaN also is NaN
244
            return $number;
245
        }
246
247 204
        if ($this->ch === '0') {
248 105
            $string .= $this->ch;
249 105
            $this->next();
250 105
            if ($this->ch === 'x' || $this->ch === 'X') {
251 33
                $string .= $this->ch;
252 33
                $this->next();
253 33
                $base = 16;
254 105
            } elseif (is_numeric($this->ch)) {
255 30
                $this->throwSyntaxError('Octal literal');
256
            }
257 75
        }
258
259
        switch ($base) {
260 174
            case 10:
261 144
                if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
262 144
                    $string .= $match;
263 144
                }
264 144
                if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
265 45
                    $string .= $match;
266 45
                }
267 144
                $number = $string;
268 144
                break;
269 33
            case 16:
270 33
                if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
271 30
                    $string .= $match;
272 30
                    $number = hexdec($string);
273 30
                    break;
274
                }
275 3
                $this->throwSyntaxError('Bad hex number');
276
        }
277
278 171
        if ($sign === '-') {
279 33
            $number = -$number;
280 33
        }
281
282 171
        if (!is_numeric($number) || !is_finite($number)) {
283 3
            $this->throwSyntaxError('Bad number');
284
        }
285
286 168
        if ($this->castBigIntToString) {
287 3
            return $number;
288
        }
289
290
        // Adding 0 will automatically cast this to an int or float
291 165
        return $number + 0;
292
    }
293
294 93
    private function string()
295
    {
296 93
        $string = '';
297
298 93
        $delim = $this->ch;
299 93
        $this->next();
300 93
        while ($this->ch !== null) {
301 93
            if ($this->ch === $delim) {
302 87
                $this->next();
303
304 87
                return $string;
305
            }
306
307 93
            if ($this->ch === '\\') {
308 30
                if ($unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
309 6
                    $string .= json_decode('"'.$unicodeEscaped.'"');
310 6
                    continue;
311
                }
312
313 24
                $this->next();
314 24
                if ($this->ch === "\r") {
315 6
                    if ($this->peek() === "\n") {
316 3
                        $this->next();
317 3
                    }
318 24
                } elseif (($escapee = self::getEscapee($this->ch)) !== null) {
319 15
                    $string .= $escapee;
320 15
                } else {
321 3
                    break;
322
                }
323 93
            } elseif ($this->ch === "\n") {
324
                // unescaped newlines are invalid; see:
325
                // https://github.com/json5/json5/issues/24
326
                // @todo this feels special-cased; are there other invalid unescaped chars?
327 3
                break;
328
            } else {
329 93
                $string .= $this->ch;
330
            }
331
332 93
            $this->next();
333 93
        }
334
335 6
        $this->throwSyntaxError('Bad string');
336
    }
337
338
    /**
339
     * Skip an inline comment, assuming this is one.
340
     *
341
     * The current character should be the second / character in the // pair that begins this inline comment.
342
     * To finish the inline comment, we look for a newline or the end of the text.
343
     */
344 36
    private function inlineComment()
345
    {
346
        do {
347 36
            $this->next();
348 36
            if ($this->ch === "\n" || $this->ch === "\r") {
349 33
                $this->next();
350
351 33
                return;
352
            }
353 36
        } while ($this->ch !== null);
354 3
    }
355
356
    /**
357
     * Skip a block comment, assuming this is one.
358
     *
359
     * The current character should be the * character in the /* pair that begins this block comment.
360
     * To finish the block comment, we look for an ending *​/ pair of characters,
361
     * but we also watch for the end of text before the comment is terminated.
362
     */
363 21
    private function blockComment()
364
    {
365
        do {
366 21
            $this->next();
367 21
            while ($this->ch === '*') {
368 18
                $this->next('*');
369 18
                if ($this->ch === '/') {
370 18
                    $this->next('/');
371
372 18
                    return;
373
                }
374 3
            }
375 21
        } while ($this->ch !== null);
376
377 3
        $this->throwSyntaxError('Unterminated block comment');
378
    }
379
380
    /**
381
     * Skip a comment, whether inline or block-level, assuming this is one.
382
     */
383 57
    private function comment()
384
    {
385
        // Comments always begin with a / character.
386 57
        $this->next('/');
387
388 57
        if ($this->ch === '/') {
389 36
            $this->inlineComment();
390 57
        } elseif ($this->ch === '*') {
391 21
            $this->blockComment();
392 18
        } else {
393 3
            $this->throwSyntaxError('Unrecognized comment');
394
        }
395 51
    }
396
397
    /**
398
     * Skip whitespace and comments.
399
     *
400
     * Note that we're detecting comments by only a single / character.
401
     * This works since regular expressions are not valid JSON(5), but this will
402
     * break if there are other valid values that begin with a / character!
403
     */
404 396
    private function white()
405
    {
406 396
        while ($this->ch !== null) {
407 393
            if ($this->ch === '/') {
408 57
                $this->comment();
409 390
            } elseif (preg_match('/[ \t\r\n\v\f\xA0\x{FEFF}]/u', $this->ch) === 1) {
410 291
                $this->next();
411 291
            } else {
412 384
                return;
413
            }
414 297
        }
415 294
    }
416
417
    /**
418
     * Matches true, false, null, etc
419
     */
420 84
    private function word()
421
    {
422 84
        switch ($this->ch) {
423 84
            case 't':
424 36
                $this->next('t');
425 36
                $this->next('r');
426 36
                $this->next('u');
427 36
                $this->next('e');
428 36
                return true;
429 63
            case 'f':
430 18
                $this->next('f');
431 18
                $this->next('a');
432 18
                $this->next('l');
433 18
                $this->next('s');
434 18
                $this->next('e');
435 18
                return false;
436 48
            case 'n':
437 18
                $this->next('n');
438 18
                $this->next('u');
439 18
                $this->next('l');
440 18
                $this->next('l');
441 18
                return null;
442 30
            case 'I':
443 15
                $this->next('I');
444 15
                $this->next('n');
445 12
                $this->next('f');
446 12
                $this->next('i');
447 12
                $this->next('n');
448 12
                $this->next('i');
449 12
                $this->next('t');
450 12
                $this->next('y');
451 12
                return INF;
452 15
            case 'N':
453 6
                $this->next('N');
454 6
                $this->next('a');
455 3
                $this->next('N');
456 3
                return NAN;
457 9
        }
458
459 9
        $this->throwSyntaxError('Unexpected ' . self::renderChar($this->ch));
460
    }
461
462 42
    private function arr()
463
    {
464 42
        $arr = [];
465
466 42
        if ($this->ch === '[') {
467 42
            if (++$this->depth > $this->maxDepth) {
468 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
469
            }
470
471 42
            $this->next('[');
472 42
            $this->white();
473 42
            while ($this->ch !== null) {
474 42
                if ($this->ch === ']') {
475 12
                    $this->next(']');
476 12
                    $this->depth--;
477 12
                    return $arr; // Potentially empty array
478
                }
479
                // ES5 allows omitting elements in arrays, e.g. [,] and
480
                // [,null]. We don't allow this in JSON5.
481 39
                if ($this->ch === ',') {
482 6
                    $this->throwSyntaxError('Missing array element');
483
                }
484
485 33
                $arr[] = $this->value();
486
487 30
                $this->white();
488
                // If there's no comma after this value, this needs to
489
                // be the end of the array.
490 30
                if ($this->ch !== ',') {
491 21
                    $this->next(']');
492 18
                    $this->depth--;
493 18
                    return $arr;
494
                }
495 15
                $this->next(',');
496 15
                $this->white();
497 15
            }
498
        }
499
500
        $this->throwSyntaxError('Bad array');
501
    }
502
503
    /**
504
     * Parse an object value
505
     */
506 90
    private function obj()
507
    {
508 90
        $object = $this->associative ? [] : new \stdClass;
509
510 90
        if ($this->ch === '{') {
511 90
            if (++$this->depth > $this->maxDepth) {
512 3
                $this->throwSyntaxError('Maximum stack depth exceeded');
513
            }
514
515 90
            $this->next('{');
516 90
            $this->white();
517 90
            while ($this->ch) {
518 90
                if ($this->ch === '}') {
519 21
                    $this->next('}');
520 21
                    $this->depth--;
521 21
                    return $object; // Potentially empty object
522
                }
523
524
                // Keys can be unquoted. If they are, they need to be
525
                // valid JS identifiers.
526 78
                if ($this->ch === '"' || $this->ch === "'") {
527 39
                    $key = $this->string();
528 39
                } else {
529 42
                    $key = $this->identifier();
530
                }
531
532 69
                $this->white();
533 69
                $this->next(':');
534 66
                if ($this->associative) {
535 48
                    $object[$key] = $this->value();
536 48
                } else {
537 63
                    $object->{$key} = $this->value();
538
                }
539 63
                $this->white();
540
                // If there's no comma after this pair, this needs to be
541
                // the end of the object.
542 63
                if ($this->ch !== ',') {
543 54
                    $this->next('}');
544 48
                    $this->depth--;
545 48
                    return $object;
546
                }
547 21
                $this->next(',');
548 21
                $this->white();
549 21
            }
550
        }
551
552
        $this->throwSyntaxError('Bad object');
553
    }
554
555
    /**
556
     * Parse a JSON value.
557
     *
558
     * It could be an object, an array, a string, a number,
559
     * or a word.
560
     */
561 396
    private function value()
562
    {
563 396
        $this->white();
564 393
        switch ($this->ch) {
565 393
            case '{':
566 90
                return $this->obj();
567 366
            case '[':
568 42
                return $this->arr();
569 354
            case '"':
570 354
            case "'":
571 78
                return $this->string();
572 285
            case '-':
573 285
            case '+':
574 285
            case '.':
575 102
                return $this->number();
576 186
            default:
577 186
                return is_numeric($this->ch) ? $this->number() : $this->word();
578 186
        }
579
    }
580
581 114
    private function throwSyntaxError($message)
582
    {
583 114
        throw new SyntaxError($message, $this->lineNumber, $this->columnNumber);
584
    }
585
586 27
    private static function renderChar($chr)
587
    {
588 27
        return $chr === null ? 'EOF' : "'" . $chr . "'";
589
    }
590
591
    /**
592
     * @param string $ch
593
     *
594
     * @return string|null
595
     */
596 18
    private static function getEscapee($ch)
597
    {
598
        switch ($ch) {
599
            // @codingStandardsIgnoreStart
600 18
            case "'":  return "'";
601 15
            case '"':  return '"';
602 15
            case '\\': return '\\';
603 15
            case '/':  return '/';
604 15
            case "\n": return '';
605 6
            case 'b':  return chr(8);
606 6
            case 'f':  return "\f";
607 6
            case 'n':  return "\n";
608 6
            case 'r':  return "\r";
609 6
            case 't':  return "\t";
610 3
            default:   return null;
611
            // @codingStandardsIgnoreEnd
612 3
        }
613
    }
614
}
615