Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Json5Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Json5Decoder, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | final class Json5Decoder |
||
18 | { |
||
19 | private $json; |
||
20 | |||
21 | private $length; |
||
22 | |||
23 | private $at = 0; |
||
24 | |||
25 | private $currentByte; |
||
26 | |||
27 | private $lineNumber = 1; |
||
28 | |||
29 | private $associative = false; |
||
30 | |||
31 | private $maxDepth = 512; |
||
32 | |||
33 | private $castBigIntToString = false; |
||
34 | |||
35 | private $depth = 1; |
||
36 | |||
37 | private $currentLineStartsAt = 0; |
||
38 | |||
39 | /** |
||
40 | * Private constructor. |
||
41 | * |
||
42 | * @param string $json |
||
43 | * @param bool $associative |
||
44 | * @param int $depth |
||
45 | * @param bool $castBigIntToString |
||
46 | */ |
||
47 | 315 | private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false) |
|
57 | |||
58 | /** |
||
59 | * Takes a JSON encoded string and converts it into a PHP variable. |
||
60 | * |
||
61 | * The parameters exactly match PHP's json_decode() function - see |
||
62 | * http://php.net/manual/en/function.json-decode.php for more information. |
||
63 | * |
||
64 | * @param string $source The JSON string being decoded. |
||
65 | * @param bool $associative When TRUE, returned objects will be converted into associative arrays. |
||
66 | * @param int $depth User specified recursion depth. |
||
67 | * @param int $options Bitmask of JSON decode options. |
||
68 | * |
||
69 | * @return mixed |
||
70 | */ |
||
71 | 423 | public static function decode($source, $associative = false, $depth = 512, $options = 0) |
|
72 | { |
||
73 | // Try parsing with json_decode first, since that's much faster |
||
74 | // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly |
||
75 | 423 | if (PHP_VERSION_ID >= 70000) { |
|
76 | 423 | $result = \json_decode($source, $associative, $depth, $options); |
|
77 | 423 | if (\json_last_error() === \JSON_ERROR_NONE) { |
|
78 | 108 | return $result; |
|
79 | } |
||
80 | } |
||
81 | |||
82 | // Fall back to JSON5 if that fails |
||
83 | 315 | $associative = $associative === true || ($associative === null && $options & \JSON_OBJECT_AS_ARRAY); |
|
84 | 315 | $castBigIntToString = $options & \JSON_BIGINT_AS_STRING; |
|
85 | |||
86 | 315 | $decoder = new self((string)$source, $associative, $depth, $castBigIntToString); |
|
87 | |||
88 | 315 | $result = $decoder->value(); |
|
89 | 207 | $decoder->white(); |
|
90 | 204 | if ($decoder->currentByte) { |
|
91 | 18 | $decoder->throwSyntaxError('Syntax error'); |
|
92 | } |
||
93 | |||
94 | 186 | return $result; |
|
95 | } |
||
96 | |||
97 | /** |
||
98 | * @param int $at |
||
99 | * |
||
100 | * @return null |
||
101 | */ |
||
102 | 315 | private function getByte($at) |
|
110 | |||
111 | /** |
||
112 | * @return string|null |
||
113 | */ |
||
114 | 33 | private function currentChar() |
|
122 | |||
123 | /** |
||
124 | * Parse the next character. |
||
125 | * |
||
126 | * @return null|string |
||
127 | */ |
||
128 | 291 | private function next() |
|
129 | { |
||
130 | // Get the next character. When there are no more characters, |
||
131 | // return the empty string. |
||
132 | 291 | if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) { |
|
133 | 210 | $this->lineNumber++; |
|
134 | 210 | $this->currentLineStartsAt = $this->at + 1; |
|
135 | } |
||
136 | |||
137 | 291 | $this->at++; |
|
138 | |||
139 | 291 | return $this->currentByte = $this->getByte($this->at); |
|
140 | } |
||
141 | |||
142 | /** |
||
143 | * Parse the next character if it matches $c or fail. |
||
144 | * |
||
145 | * @param string $c |
||
146 | * |
||
147 | * @return string|null |
||
148 | */ |
||
149 | 156 | private function nextOrFail($c) |
|
150 | { |
||
151 | 156 | if ($c !== $this->currentByte) { |
|
152 | 24 | $this->throwSyntaxError(\sprintf( |
|
153 | 24 | 'Expected %s instead of %s', |
|
154 | 24 | self::renderChar($c), |
|
155 | 24 | self::renderChar($this->currentChar()) |
|
156 | )); |
||
157 | } |
||
158 | |||
159 | 156 | return $this->next(); |
|
160 | } |
||
161 | |||
162 | /** |
||
163 | * Get the next character without consuming it or |
||
164 | * assigning it to the ch variable. |
||
165 | * |
||
166 | * @return mixed |
||
167 | */ |
||
168 | 33 | private function peek() |
|
172 | |||
173 | /** |
||
174 | * Attempt to match a regular expression at the current position on the current line. |
||
175 | * |
||
176 | * This function will not match across multiple lines. |
||
177 | * |
||
178 | * @param string $regex |
||
179 | * |
||
180 | * @return string|null |
||
181 | */ |
||
182 | 153 | private function match($regex) |
|
183 | { |
||
184 | 153 | $subject = \substr($this->json, $this->at); |
|
185 | // Only match on the current line |
||
186 | 153 | if ($pos = \strpos($subject, "\n")) { |
|
187 | 144 | $subject = \substr($subject, 0, $pos); |
|
188 | } |
||
189 | |||
190 | 153 | if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) { |
|
191 | 15 | return null; |
|
192 | } |
||
193 | |||
194 | 138 | $this->at += $matches[0][1] + \strlen($matches[0][0]); |
|
195 | 138 | $this->currentByte = $this->getByte($this->at); |
|
196 | |||
197 | 138 | return $matches[0][0]; |
|
198 | } |
||
199 | |||
200 | /** |
||
201 | * Parse an identifier. |
||
202 | * |
||
203 | * Normally, reserved words are disallowed here, but we |
||
204 | * only use this for unquoted object keys, where reserved words are allowed, |
||
205 | * so we don't check for those here. References: |
||
206 | * - http://es5.github.com/#x7.6 |
||
207 | * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables |
||
208 | * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm |
||
209 | */ |
||
210 | 42 | private function identifier() |
|
211 | { |
||
212 | // @codingStandardsIgnoreStart |
||
213 | // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv |
||
214 | 42 | $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]|\\\\u[0-9A-Fa-f]{4})*/u'); |
|
215 | // @codingStandardsIgnoreEnd |
||
216 | |||
217 | 42 | if ($match === null) { |
|
218 | 9 | $this->throwSyntaxError('Bad identifier as unquoted key'); |
|
219 | } |
||
220 | |||
221 | // Un-escape escaped Unicode chars |
||
222 | $unescaped = \preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) { |
||
223 | 6 | return \json_decode('"'.$m[0].'"'); |
|
224 | 33 | }, $match); |
|
225 | |||
226 | 33 | return $unescaped; |
|
227 | } |
||
228 | |||
229 | 156 | private function number() |
|
230 | { |
||
231 | 156 | $number = null; |
|
232 | 156 | $sign = ''; |
|
233 | 156 | $string = ''; |
|
234 | 156 | $base = 10; |
|
235 | |||
236 | 156 | if ($this->currentByte === '-' || $this->currentByte === '+') { |
|
237 | 84 | $sign = $this->currentByte; |
|
238 | 84 | $this->next(); |
|
239 | } |
||
240 | |||
241 | // support for Infinity |
||
242 | 156 | if ($this->currentByte === 'I') { |
|
243 | 9 | $this->word(); |
|
244 | |||
245 | 6 | return ($sign === '-') ? -INF : INF; |
|
246 | } |
||
247 | |||
248 | // support for NaN |
||
249 | 147 | if ($this->currentByte === 'N') { |
|
250 | 3 | $number = $this->word(); |
|
251 | |||
252 | // ignore sign as -NaN also is NaN |
||
253 | 3 | return $number; |
|
254 | } |
||
255 | |||
256 | 144 | if ($this->currentByte === '0') { |
|
257 | 81 | $string .= $this->currentByte; |
|
258 | 81 | $this->next(); |
|
259 | 81 | if ($this->currentByte === 'x' || $this->currentByte === 'X') { |
|
260 | 33 | $string .= $this->currentByte; |
|
261 | 33 | $this->next(); |
|
262 | 33 | $base = 16; |
|
263 | 48 | } elseif (\is_numeric($this->currentByte)) { |
|
264 | 30 | $this->throwSyntaxError('Octal literal'); |
|
265 | } |
||
266 | } |
||
267 | |||
268 | switch ($base) { |
||
269 | 114 | case 10: |
|
270 | // @codingStandardsIgnoreStart |
||
271 | 84 | View Code Duplication | if ((\is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) { |
|
|||
272 | 81 | $string .= $match; |
|
273 | } |
||
274 | 84 | View Code Duplication | if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) { |
275 | 21 | $string .= $match; |
|
276 | } |
||
277 | // @codingStandardsIgnoreEnd |
||
278 | 84 | $number = $string; |
|
279 | 84 | break; |
|
280 | 33 | case 16: |
|
281 | 33 | if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) { |
|
282 | 30 | $string .= $match; |
|
283 | 30 | $number = \hexdec($string); |
|
284 | 30 | break; |
|
285 | } |
||
286 | 3 | $this->throwSyntaxError('Bad hex number'); |
|
287 | } |
||
288 | |||
289 | 111 | if ($sign === '-') { |
|
290 | 18 | $number = -1 * $number; |
|
291 | } |
||
292 | |||
293 | 111 | if (!\is_numeric($number) || !\is_finite($number)) { |
|
294 | 3 | $this->throwSyntaxError('Bad number'); |
|
295 | } |
||
296 | |||
297 | 108 | if ($this->castBigIntToString) { |
|
298 | return $number; |
||
299 | } |
||
300 | |||
301 | // Adding 0 will automatically cast this to an int or float |
||
302 | 108 | return $number + 0; |
|
303 | } |
||
304 | |||
305 | 63 | private function string() |
|
306 | { |
||
307 | 63 | $string = ''; |
|
308 | |||
309 | 63 | $delim = $this->currentByte; |
|
310 | 63 | $this->next(); |
|
311 | 63 | while ($this->currentByte !== null) { |
|
312 | 63 | if ($this->currentByte === $delim) { |
|
313 | 57 | $this->next(); |
|
314 | |||
315 | 57 | return $string; |
|
316 | } |
||
317 | |||
318 | 63 | if ($this->currentByte === '\\') { |
|
319 | 24 | if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) { |
|
320 | $string .= \json_decode('"'.$unicodeEscaped.'"'); |
||
321 | continue; |
||
322 | } |
||
323 | |||
324 | 24 | $this->next(); |
|
325 | 24 | if ($this->currentByte === "\r") { |
|
326 | 6 | if ($this->peek() === "\n") { |
|
327 | 6 | $this->next(); |
|
328 | } |
||
329 | 18 | } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) { |
|
330 | 15 | $string .= $escapee; |
|
331 | } else { |
||
332 | 24 | break; |
|
333 | } |
||
334 | 63 | } elseif ($this->currentByte === "\n") { |
|
335 | // unescaped newlines are invalid; see: |
||
336 | // https://github.com/json5/json5/issues/24 |
||
337 | // @todo this feels special-cased; are there other invalid unescaped chars? |
||
338 | 3 | break; |
|
339 | } else { |
||
340 | 63 | $string .= $this->currentByte; |
|
341 | } |
||
342 | |||
343 | 63 | $this->next(); |
|
344 | } |
||
345 | |||
346 | 6 | $this->throwSyntaxError('Bad string'); |
|
347 | } |
||
348 | |||
349 | /** |
||
350 | * Skip an inline comment, assuming this is one. |
||
351 | * |
||
352 | * The current character should be the second / character in the // pair that begins this inline comment. |
||
353 | * To finish the inline comment, we look for a newline or the end of the text. |
||
354 | */ |
||
355 | 36 | private function inlineComment() |
|
366 | |||
367 | /** |
||
368 | * Skip a block comment, assuming this is one. |
||
369 | * |
||
370 | * The current character should be the * character in the /* pair that begins this block comment. |
||
371 | * To finish the block comment, we look for an ending */ pair of characters, |
||
372 | * but we also watch for the end of text before the comment is terminated. |
||
373 | */ |
||
374 | 21 | private function blockComment() |
|
375 | { |
||
376 | do { |
||
377 | 21 | $this->next(); |
|
378 | 21 | while ($this->currentByte === '*') { |
|
379 | 18 | $this->nextOrFail('*'); |
|
380 | 18 | if ($this->currentByte === '/') { |
|
381 | 18 | $this->nextOrFail('/'); |
|
382 | |||
383 | 18 | return; |
|
384 | } |
||
385 | } |
||
386 | 21 | } while ($this->currentByte !== null); |
|
387 | |||
388 | 3 | $this->throwSyntaxError('Unterminated block comment'); |
|
389 | } |
||
390 | |||
391 | /** |
||
392 | * Skip a comment, whether inline or block-level, assuming this is one. |
||
393 | */ |
||
394 | 57 | private function comment() |
|
395 | { |
||
396 | // Comments always begin with a / character. |
||
397 | 57 | $this->nextOrFail('/'); |
|
398 | |||
399 | 57 | if ($this->currentByte === '/') { |
|
400 | 36 | $this->inlineComment(); |
|
401 | 24 | } elseif ($this->currentByte === '*') { |
|
402 | 21 | $this->blockComment(); |
|
403 | } else { |
||
404 | 3 | $this->throwSyntaxError('Unrecognized comment'); |
|
405 | } |
||
406 | 51 | } |
|
407 | |||
408 | /** |
||
409 | * Skip whitespace and comments. |
||
410 | * |
||
411 | * Note that we're detecting comments by only a single / character. |
||
412 | * This works since regular expressions are not valid JSON(5), but this will |
||
413 | * break if there are other valid values that begin with a / character! |
||
414 | */ |
||
415 | 315 | private function white() |
|
416 | { |
||
417 | 315 | while ($this->currentByte !== null) { |
|
418 | 312 | if ($this->currentByte === '/') { |
|
419 | 57 | $this->comment(); |
|
420 | 303 | } elseif (\preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) { |
|
421 | 216 | $this->next(); |
|
422 | 303 | } elseif (\ord($this->currentByte) === 0xC2 && \ord($this->peek()) === 0xA0) { |
|
423 | // Non-breaking space in UTF-8 |
||
424 | 3 | $this->next(); |
|
425 | 3 | $this->next(); |
|
426 | } else { |
||
427 | 303 | return; |
|
428 | } |
||
429 | } |
||
430 | 207 | } |
|
431 | |||
432 | /** |
||
433 | * Matches true, false, null, etc |
||
434 | */ |
||
435 | 78 | private function word() |
|
436 | { |
||
437 | 78 | switch ($this->currentByte) { |
|
438 | 78 | case 't': |
|
439 | 21 | $this->nextOrFail('t'); |
|
440 | 21 | $this->nextOrFail('r'); |
|
441 | 21 | $this->nextOrFail('u'); |
|
442 | 21 | $this->nextOrFail('e'); |
|
443 | 21 | return true; |
|
444 | 63 | case 'f': |
|
445 | 9 | $this->nextOrFail('f'); |
|
446 | 9 | $this->nextOrFail('a'); |
|
447 | 9 | $this->nextOrFail('l'); |
|
448 | 9 | $this->nextOrFail('s'); |
|
449 | 9 | $this->nextOrFail('e'); |
|
450 | 9 | return false; |
|
451 | 54 | case 'n': |
|
452 | 15 | $this->nextOrFail('n'); |
|
453 | 15 | $this->nextOrFail('u'); |
|
454 | 15 | $this->nextOrFail('l'); |
|
455 | 15 | $this->nextOrFail('l'); |
|
456 | 15 | return null; |
|
457 | 39 | case 'I': |
|
458 | 18 | $this->nextOrFail('I'); |
|
459 | 18 | $this->nextOrFail('n'); |
|
460 | 15 | $this->nextOrFail('f'); |
|
461 | 12 | $this->nextOrFail('i'); |
|
462 | 12 | $this->nextOrFail('n'); |
|
463 | 12 | $this->nextOrFail('i'); |
|
464 | 12 | $this->nextOrFail('t'); |
|
465 | 12 | $this->nextOrFail('y'); |
|
466 | 12 | return INF; |
|
467 | 21 | case 'N': |
|
468 | 12 | $this->nextOrFail('N'); |
|
469 | 12 | $this->nextOrFail('a'); |
|
470 | 6 | $this->nextOrFail('N'); |
|
471 | 6 | return NAN; |
|
472 | } |
||
473 | |||
474 | 9 | $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar())); |
|
475 | } |
||
476 | |||
477 | 30 | private function arr() |
|
478 | { |
||
479 | 30 | $arr = []; |
|
480 | |||
481 | 30 | if (++$this->depth > $this->maxDepth) { |
|
482 | 3 | $this->throwSyntaxError('Maximum stack depth exceeded'); |
|
483 | } |
||
484 | |||
485 | 30 | $this->nextOrFail('['); |
|
486 | 30 | $this->white(); |
|
487 | 30 | while ($this->currentByte !== null) { |
|
488 | 27 | if ($this->currentByte === ']') { |
|
489 | 9 | $this->nextOrFail(']'); |
|
490 | 9 | $this->depth--; |
|
491 | 9 | return $arr; // Potentially empty array |
|
492 | } |
||
493 | // ES5 allows omitting elements in arrays, e.g. [,] and |
||
494 | // [,null]. We don't allow this in JSON5. |
||
495 | 27 | if ($this->currentByte === ',') { |
|
496 | 6 | $this->throwSyntaxError('Missing array element'); |
|
497 | } |
||
498 | |||
499 | 21 | $arr[] = $this->value(); |
|
500 | |||
501 | 18 | $this->white(); |
|
502 | // If there's no comma after this value, this needs to |
||
503 | // be the end of the array. |
||
504 | 18 | if ($this->currentByte !== ',') { |
|
505 | 9 | $this->nextOrFail(']'); |
|
506 | 6 | $this->depth--; |
|
507 | 6 | return $arr; |
|
508 | } |
||
509 | 9 | $this->nextOrFail(','); |
|
510 | 9 | $this->white(); |
|
511 | } |
||
512 | |||
513 | 3 | $this->throwSyntaxError('Invalid array'); |
|
514 | } |
||
515 | |||
516 | /** |
||
517 | * Parse an object value |
||
518 | */ |
||
519 | 78 | private function obj() |
|
520 | { |
||
521 | 78 | $object = $this->associative ? [] : new \stdClass; |
|
522 | |||
523 | 78 | if (++$this->depth > $this->maxDepth) { |
|
524 | 3 | $this->throwSyntaxError('Maximum stack depth exceeded'); |
|
525 | } |
||
526 | |||
527 | 78 | $this->nextOrFail('{'); |
|
528 | 78 | $this->white(); |
|
529 | 78 | while ($this->currentByte !== null) { |
|
530 | 72 | if ($this->currentByte === '}') { |
|
531 | 18 | $this->nextOrFail('}'); |
|
532 | 18 | $this->depth--; |
|
533 | 18 | return $object; // Potentially empty object |
|
534 | } |
||
535 | |||
536 | // Keys can be unquoted. If they are, they need to be |
||
537 | // valid JS identifiers. |
||
538 | 63 | if ($this->currentByte === '"' || $this->currentByte === "'") { |
|
539 | 24 | $key = $this->string(); |
|
540 | } else { |
||
541 | 42 | $key = $this->identifier(); |
|
542 | } |
||
543 | |||
544 | 54 | $this->white(); |
|
545 | 54 | $this->nextOrFail(':'); |
|
546 | 51 | if ($this->associative) { |
|
547 | 39 | $object[$key] = $this->value(); |
|
548 | } else { |
||
549 | 51 | $object->{$key} = $this->value(); |
|
550 | } |
||
551 | 48 | $this->white(); |
|
552 | // If there's no comma after this pair, this needs to be |
||
553 | // the end of the object. |
||
554 | 48 | if ($this->currentByte !== ',') { |
|
555 | 39 | $this->nextOrFail('}'); |
|
556 | 33 | $this->depth--; |
|
557 | 33 | return $object; |
|
558 | } |
||
559 | 15 | $this->nextOrFail(','); |
|
560 | 15 | $this->white(); |
|
561 | } |
||
562 | |||
563 | 6 | $this->throwSyntaxError('Invalid object'); |
|
564 | } |
||
565 | |||
566 | /** |
||
567 | * Parse a JSON value. |
||
568 | * |
||
569 | * It could be an object, an array, a string, a number, |
||
570 | * or a word. |
||
571 | */ |
||
572 | 315 | private function value() |
|
573 | { |
||
574 | 315 | $this->white(); |
|
575 | 312 | switch ($this->currentByte) { |
|
576 | 312 | case '{': |
|
577 | 78 | return $this->obj(); |
|
578 | 282 | case '[': |
|
579 | 30 | return $this->arr(); |
|
580 | 270 | case '"': |
|
581 | 240 | case "'": |
|
582 | 57 | return $this->string(); |
|
583 | 219 | case '-': |
|
584 | 189 | case '+': |
|
585 | 138 | case '.': |
|
586 | 93 | return $this->number(); |
|
587 | default: |
||
588 | 129 | return \is_numeric($this->currentByte) ? $this->number() : $this->word(); |
|
589 | } |
||
590 | } |
||
591 | |||
592 | 129 | private function throwSyntaxError($message) |
|
600 | |||
601 | 33 | private static function renderChar($chr) |
|
605 | |||
606 | /** |
||
607 | * @param string $ch |
||
608 | * |
||
609 | * @return string|null |
||
610 | */ |
||
611 | 18 | private static function getEscapee($ch) |
|
629 | } |
||
630 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.