Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Json5Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Json5Decoder, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | final class Json5Decoder |
||
18 | { |
||
19 | private $at = 0; |
||
20 | |||
21 | private $lineNumber = 1; |
||
22 | |||
23 | private $columnNumber = 1; |
||
24 | |||
25 | private $ch; |
||
26 | |||
27 | private $chArr; |
||
28 | |||
29 | private $associative = false; |
||
30 | |||
31 | private $maxDepth = 512; |
||
32 | |||
33 | private $castBigIntToString = false; |
||
34 | |||
35 | private $depth = 1; |
||
36 | |||
37 | private $length; |
||
38 | |||
39 | private $remainderCache; |
||
40 | |||
41 | private $remainderCacheAt; |
||
42 | |||
43 | /** |
||
44 | * Private constructor. |
||
45 | * |
||
46 | * @param string $json |
||
47 | * @param bool $associative |
||
48 | * @param int $depth |
||
49 | * @param bool $castBigIntToString |
||
50 | */ |
||
51 | 405 | private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false) |
|
52 | { |
||
53 | 405 | $this->associative = $associative; |
|
54 | 405 | $this->maxDepth = $depth; |
|
55 | 405 | $this->castBigIntToString = $castBigIntToString; |
|
56 | |||
57 | 405 | $this->length = mb_strlen($json, 'utf-8'); |
|
58 | |||
59 | 405 | $this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY); |
|
60 | 405 | $this->ch = $this->charAt(0); |
|
61 | |||
62 | 405 | $this->remainderCache = $json; |
|
63 | 405 | $this->remainderCacheAt = 0; |
|
64 | 405 | } |
|
65 | |||
66 | /** |
||
67 | * Takes a JSON encoded string and converts it into a PHP variable. |
||
68 | * |
||
69 | * The parameters exactly match PHP's json_decode() function - see |
||
70 | * http://php.net/manual/en/function.json-decode.php for more information. |
||
71 | * |
||
72 | * @param string $source The JSON string being decoded. |
||
73 | * @param bool $associative When TRUE, returned objects will be converted into associative arrays. |
||
74 | * @param int $depth User specified recursion depth. |
||
75 | * @param int $options Bitmask of JSON decode options. |
||
76 | * |
||
77 | * @return mixed |
||
78 | */ |
||
79 | 405 | public static function decode($source, $associative = false, $depth = 512, $options = 0) |
|
80 | { |
||
81 | // Try parsing with json_decode first, since that's much faster |
||
82 | // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly |
||
83 | 405 | if (PHP_VERSION_ID >= 700000) { |
|
84 | $result = json_decode($source, $associative, $depth, $options); |
||
85 | if (json_last_error() === JSON_ERROR_NONE) { |
||
86 | return $result; |
||
87 | } |
||
88 | } |
||
89 | |||
90 | // Fall back to JSON5 if that fails |
||
91 | 405 | $associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY); |
|
92 | 405 | $castBigIntToString = $options & JSON_BIGINT_AS_STRING; |
|
93 | |||
94 | 405 | $decoder = new self((string)$source, $associative, $depth, $castBigIntToString); |
|
|
|||
95 | |||
96 | 405 | $result = $decoder->value(); |
|
97 | 306 | $decoder->white(); |
|
98 | 303 | if ($decoder->ch) { |
|
99 | 18 | $decoder->throwSyntaxError('Syntax error'); |
|
100 | } |
||
101 | |||
102 | 285 | return $result; |
|
103 | } |
||
104 | |||
105 | /** |
||
106 | * @param int $at |
||
107 | * |
||
108 | * @return string|null |
||
109 | */ |
||
110 | 405 | private function charAt($at) |
|
111 | { |
||
112 | 405 | if ($at >= $this->length) { |
|
113 | 300 | return null; |
|
114 | } |
||
115 | |||
116 | 402 | return $this->chArr[$at]; |
|
117 | } |
||
118 | |||
119 | /** |
||
120 | * Parse the next character. |
||
121 | * |
||
122 | * @return null|string |
||
123 | */ |
||
124 | 375 | private function next() |
|
125 | { |
||
126 | // Get the next character. When there are no more characters, |
||
127 | // return the empty string. |
||
128 | 375 | if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) { |
|
129 | 282 | $this->at++; |
|
130 | 282 | $this->lineNumber++; |
|
131 | 282 | $this->columnNumber = 1; |
|
132 | 282 | } else { |
|
133 | 336 | $this->at++; |
|
134 | 336 | $this->columnNumber++; |
|
135 | } |
||
136 | |||
137 | 375 | $this->ch = $this->charAt($this->at); |
|
138 | |||
139 | 375 | return $this->ch; |
|
140 | } |
||
141 | |||
142 | /** |
||
143 | * Parse the next character if it matches $c or fail. |
||
144 | * |
||
145 | * @param string $c |
||
146 | * |
||
147 | * @return string|null |
||
148 | */ |
||
149 | 177 | private function nextOrFail($c) |
|
150 | { |
||
151 | 177 | if ($c !== $this->ch) { |
|
152 | 24 | $this->throwSyntaxError(sprintf( |
|
153 | 24 | 'Expected %s instead of %s', |
|
154 | 24 | self::renderChar($c), |
|
155 | 24 | self::renderChar($this->ch) |
|
156 | 24 | )); |
|
157 | } |
||
158 | |||
159 | 177 | return $this->next(); |
|
160 | } |
||
161 | |||
162 | /** |
||
163 | * Get the next character without consuming it or |
||
164 | * assigning it to the ch variable. |
||
165 | * |
||
166 | * @return mixed |
||
167 | */ |
||
168 | 36 | private function peek() |
|
169 | { |
||
170 | 36 | return $this->charAt($this->at + 1); |
|
171 | } |
||
172 | |||
173 | /** |
||
174 | * Attempt to match a regular expression at the current position on the current line. |
||
175 | * |
||
176 | * This function will not match across multiple lines. |
||
177 | * |
||
178 | * @param string $regex |
||
179 | * |
||
180 | * @return string|null |
||
181 | */ |
||
182 | 210 | private function match($regex) |
|
183 | { |
||
184 | 210 | $subject = $this->getRemainder(); |
|
185 | |||
186 | 210 | $matches = []; |
|
187 | 210 | if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) { |
|
188 | 15 | return null; |
|
189 | } |
||
190 | |||
191 | // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying |
||
192 | 195 | $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8'); |
|
193 | |||
194 | // [0][0] contains the matched text |
||
195 | // [0][1] contains the index of that match |
||
196 | 195 | $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8'); |
|
197 | |||
198 | 195 | $this->at += $advanceBy; |
|
199 | 195 | $this->columnNumber += $advanceBy; |
|
200 | 195 | $this->ch = $this->charAt($this->at); |
|
201 | |||
202 | 195 | return $matches[0][0]; |
|
203 | } |
||
204 | |||
205 | /** |
||
206 | * Parse an identifier. |
||
207 | * |
||
208 | * Normally, reserved words are disallowed here, but we |
||
209 | * only use this for unquoted object keys, where reserved words are allowed, |
||
210 | * so we don't check for those here. References: |
||
211 | * - http://es5.github.com/#x7.6 |
||
212 | * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables |
||
213 | * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm |
||
214 | */ |
||
215 | 42 | private function identifier() |
|
216 | { |
||
217 | // @codingStandardsIgnoreStart |
||
218 | // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv |
||
219 | 42 | $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]|\\\\u[0-9A-Fa-f]{4})*/u'); |
|
220 | // @codingStandardsIgnoreEnd |
||
221 | |||
222 | 42 | if ($match === null) { |
|
223 | 9 | $this->throwSyntaxError('Bad identifier as unquoted key'); |
|
224 | } |
||
225 | |||
226 | // Un-escape escaped Unicode chars |
||
227 | 33 | $unescaped = preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) { |
|
228 | 6 | return json_decode('"'.$m[0].'"'); |
|
229 | 33 | }, $match); |
|
230 | |||
231 | 33 | return $unescaped; |
|
232 | } |
||
233 | |||
234 | 216 | private function number() |
|
235 | { |
||
236 | 216 | $number = null; |
|
237 | 216 | $sign = ''; |
|
238 | 216 | $string = ''; |
|
239 | 216 | $base = 10; |
|
240 | |||
241 | 216 | if ($this->ch === '-' || $this->ch === '+') { |
|
242 | 99 | $sign = $this->ch; |
|
243 | 99 | $this->next(); |
|
244 | 99 | } |
|
245 | |||
246 | // support for Infinity |
||
247 | 216 | if ($this->ch === 'I') { |
|
248 | 9 | $this->word(); |
|
249 | |||
250 | 6 | return ($sign === '-') ? -INF : INF; |
|
251 | } |
||
252 | |||
253 | // support for NaN |
||
254 | 207 | if ($this->ch === 'N') { |
|
255 | 3 | $number = $this->word(); |
|
256 | |||
257 | // ignore sign as -NaN also is NaN |
||
258 | 3 | return $number; |
|
259 | } |
||
260 | |||
261 | 204 | if ($this->ch === '0') { |
|
262 | 105 | $string .= $this->ch; |
|
263 | 105 | $this->next(); |
|
264 | 105 | if ($this->ch === 'x' || $this->ch === 'X') { |
|
265 | 33 | $string .= $this->ch; |
|
266 | 33 | $this->next(); |
|
267 | 33 | $base = 16; |
|
268 | 105 | } elseif (is_numeric($this->ch)) { |
|
269 | 30 | $this->throwSyntaxError('Octal literal'); |
|
270 | } |
||
271 | 75 | } |
|
272 | |||
273 | switch ($base) { |
||
274 | 174 | case 10: |
|
275 | 144 | View Code Duplication | if ((is_numeric($this->ch) || $this->ch === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) { |
276 | 129 | $string .= $match; |
|
277 | 129 | } |
|
278 | 144 | View Code Duplication | if (($this->ch === 'E' || $this->ch === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) { |
279 | 45 | $string .= $match; |
|
280 | 45 | } |
|
281 | 144 | $number = $string; |
|
282 | 144 | break; |
|
283 | 33 | case 16: |
|
284 | 33 | if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) { |
|
285 | 30 | $string .= $match; |
|
286 | 30 | $number = hexdec($string); |
|
287 | 30 | break; |
|
288 | } |
||
289 | 3 | $this->throwSyntaxError('Bad hex number'); |
|
290 | } |
||
291 | |||
292 | 171 | if ($sign === '-') { |
|
293 | 33 | $number = -$number; |
|
294 | 33 | } |
|
295 | |||
296 | 171 | if (!is_numeric($number) || !is_finite($number)) { |
|
297 | 3 | $this->throwSyntaxError('Bad number'); |
|
298 | } |
||
299 | |||
300 | 168 | if ($this->castBigIntToString) { |
|
301 | 3 | return $number; |
|
302 | } |
||
303 | |||
304 | // Adding 0 will automatically cast this to an int or float |
||
305 | 165 | return $number + 0; |
|
306 | } |
||
307 | |||
308 | 93 | private function string() |
|
309 | { |
||
310 | 93 | $string = ''; |
|
311 | |||
312 | 93 | $delim = $this->ch; |
|
313 | 93 | $this->next(); |
|
314 | 93 | while ($this->ch !== null) { |
|
315 | 93 | if ($this->ch === $delim) { |
|
316 | 87 | $this->next(); |
|
317 | |||
318 | 87 | return $string; |
|
319 | } |
||
320 | |||
321 | 93 | if ($this->ch === '\\') { |
|
322 | 30 | if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) { |
|
323 | 6 | $string .= json_decode('"'.$unicodeEscaped.'"'); |
|
324 | 6 | continue; |
|
325 | } |
||
326 | |||
327 | 24 | $this->next(); |
|
328 | 24 | if ($this->ch === "\r") { |
|
329 | 6 | if ($this->peek() === "\n") { |
|
330 | 3 | $this->next(); |
|
331 | 3 | } |
|
332 | 24 | } elseif (($escapee = self::getEscapee($this->ch)) !== null) { |
|
333 | 15 | $string .= $escapee; |
|
334 | 15 | } else { |
|
335 | 3 | break; |
|
336 | } |
||
337 | 93 | } elseif ($this->ch === "\n") { |
|
338 | // unescaped newlines are invalid; see: |
||
339 | // https://github.com/json5/json5/issues/24 |
||
340 | // @todo this feels special-cased; are there other invalid unescaped chars? |
||
341 | 3 | break; |
|
342 | } else { |
||
343 | 93 | $string .= $this->ch; |
|
344 | } |
||
345 | |||
346 | 93 | $this->next(); |
|
347 | 93 | } |
|
348 | |||
349 | 6 | $this->throwSyntaxError('Bad string'); |
|
350 | } |
||
351 | |||
352 | /** |
||
353 | * Skip an inline comment, assuming this is one. |
||
354 | * |
||
355 | * The current character should be the second / character in the // pair that begins this inline comment. |
||
356 | * To finish the inline comment, we look for a newline or the end of the text. |
||
357 | */ |
||
358 | 36 | private function inlineComment() |
|
369 | |||
370 | /** |
||
371 | * Skip a block comment, assuming this is one. |
||
372 | * |
||
373 | * The current character should be the * character in the /* pair that begins this block comment. |
||
374 | * To finish the block comment, we look for an ending */ pair of characters, |
||
375 | * but we also watch for the end of text before the comment is terminated. |
||
376 | */ |
||
377 | 21 | private function blockComment() |
|
393 | |||
394 | /** |
||
395 | * Skip a comment, whether inline or block-level, assuming this is one. |
||
396 | */ |
||
397 | 57 | private function comment() |
|
410 | |||
411 | /** |
||
412 | * Skip whitespace and comments. |
||
413 | * |
||
414 | * Note that we're detecting comments by only a single / character. |
||
415 | * This works since regular expressions are not valid JSON(5), but this will |
||
416 | * break if there are other valid values that begin with a / character! |
||
417 | */ |
||
418 | 405 | private function white() |
|
430 | |||
431 | /** |
||
432 | * Matches true, false, null, etc |
||
433 | */ |
||
434 | 93 | private function word() |
|
475 | |||
476 | 42 | private function arr() |
|
477 | { |
||
478 | 42 | $arr = []; |
|
479 | |||
480 | 42 | if (++$this->depth > $this->maxDepth) { |
|
481 | 3 | $this->throwSyntaxError('Maximum stack depth exceeded'); |
|
482 | } |
||
483 | |||
484 | 42 | $this->nextOrFail('['); |
|
512 | |||
513 | /** |
||
514 | * Parse an object value |
||
515 | */ |
||
516 | 90 | private function obj() |
|
560 | |||
561 | /** |
||
562 | * Parse a JSON value. |
||
563 | * |
||
564 | * It could be an object, an array, a string, a number, |
||
565 | * or a word. |
||
566 | */ |
||
567 | 405 | private function value() |
|
586 | |||
587 | 120 | private function throwSyntaxError($message) |
|
591 | |||
592 | 33 | private static function renderChar($chr) |
|
596 | |||
597 | /** |
||
598 | * @param string $ch |
||
599 | * |
||
600 | * @return string|null |
||
601 | */ |
||
602 | 18 | private static function getEscapee($ch) |
|
620 | |||
621 | /** |
||
622 | * Returns everything from $this->at onwards. |
||
623 | * |
||
624 | * Utilizes a cache so we don't have to continuously parse through UTF-8 |
||
625 | * data that was earlier in the string which we don't even care about. |
||
626 | * |
||
627 | * @return string |
||
628 | */ |
||
629 | 210 | private function getRemainder() |
|
641 | } |
||
642 |
It seems like the type of the argument is not accepted by the function/method which you are calling.
In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.
We suggest to add an explicit type cast like in the following example: