Complex classes like Json5Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Json5Decoder, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | final class Json5Decoder |
||
18 | { |
||
19 | const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u'; |
||
20 | |||
21 | private $json; |
||
22 | |||
23 | private $at = 0; |
||
24 | |||
25 | private $lineNumber = 1; |
||
26 | |||
27 | private $columnNumber = 1; |
||
28 | |||
29 | private $ch; |
||
30 | |||
31 | private $associative = false; |
||
32 | |||
33 | private $maxDepth = 512; |
||
34 | |||
35 | private $castBigIntToString = false; |
||
36 | |||
37 | private $depth = 1; |
||
38 | |||
39 | private $length; |
||
40 | |||
41 | private $lineCache; |
||
42 | |||
43 | /** |
||
44 | * Private constructor. |
||
45 | * |
||
46 | * @param string $json |
||
47 | * @param bool $associative |
||
48 | * @param int $depth |
||
49 | * @param bool $castBigIntToString |
||
50 | */ |
||
51 | 378 | private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false) |
|
62 | |||
63 | /** |
||
64 | * Takes a JSON encoded string and converts it into a PHP variable. |
||
65 | * |
||
66 | * The parameters exactly match PHP's json_decode() function - see |
||
67 | * http://php.net/manual/en/function.json-decode.php for more information. |
||
68 | * |
||
69 | * @param string $source The JSON string being decoded. |
||
70 | * @param bool $associative When TRUE, returned objects will be converted into associative arrays. |
||
71 | * @param int $depth User specified recursion depth. |
||
72 | * @param int $options Bitmask of JSON decode options. |
||
73 | * |
||
74 | * @return mixed |
||
75 | */ |
||
76 | 378 | public static function decode($source, $associative = false, $depth = 512, $options = 0) |
|
91 | |||
92 | /** |
||
93 | * @param int $at |
||
94 | * |
||
95 | * @return string|null |
||
96 | */ |
||
97 | 378 | private function charAt($at) |
|
105 | |||
106 | /** |
||
107 | * Parse the next character. |
||
108 | * |
||
109 | * If $c is given, the next char will only be parsed if the current |
||
110 | * one matches $c. |
||
111 | * |
||
112 | * @param string|null $c |
||
113 | * |
||
114 | * @return null|string |
||
115 | */ |
||
116 | 348 | private function next($c = null) |
|
142 | |||
143 | /** |
||
144 | * Get the next character without consuming it or |
||
145 | * assigning it to the ch variable. |
||
146 | * |
||
147 | * @return mixed |
||
148 | */ |
||
149 | 12 | private function peek() |
|
153 | |||
154 | /** |
||
155 | * @return string |
||
156 | */ |
||
157 | 216 | private function getLineRemainder() |
|
168 | |||
169 | /** |
||
170 | * Attempt to match a regular expression at the current position on the current line. |
||
171 | * |
||
172 | * This function will not match across multiple lines. |
||
173 | * |
||
174 | * @param string $regex |
||
175 | * |
||
176 | * @return string|null |
||
177 | */ |
||
178 | 216 | private function match($regex) |
|
179 | { |
||
180 | 216 | $subject = $this->getLineRemainder(); |
|
181 | |||
182 | 216 | $matches = []; |
|
183 | 216 | if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) { |
|
184 | 114 | return null; |
|
185 | } |
||
186 | |||
187 | // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying |
||
188 | 201 | $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8'); |
|
189 | |||
190 | // [0][0] contains the matched text |
||
191 | // [0][1] contains the index of that match |
||
192 | 201 | $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8'); |
|
193 | |||
194 | 201 | $this->at += $advanceBy; |
|
195 | 201 | $this->columnNumber += $advanceBy; |
|
196 | 201 | $this->ch = $this->charAt($this->at); |
|
197 | |||
198 | 201 | return $matches[0][0]; |
|
199 | } |
||
200 | |||
201 | /** |
||
202 | * Parse an identifier. |
||
203 | * |
||
204 | * Normally, reserved words are disallowed here, but we |
||
205 | * only use this for unquoted object keys, where reserved words are allowed, |
||
206 | * so we don't check for those here. References: |
||
207 | * - http://es5.github.com/#x7.6 |
||
208 | * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables |
||
209 | * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm |
||
210 | */ |
||
211 | 39 | private function identifier() |
|
212 | { |
||
213 | // @codingStandardsIgnoreStart |
||
214 | // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv |
||
215 | 39 | $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]|\\\\u[0-9A-Fa-f]{4})*/u'); |
|
216 | // @codingStandardsIgnoreEnd |
||
217 | |||
218 | 39 | if ($match === null) { |
|
219 | 9 | $this->throwSyntaxError('Bad identifier as unquoted key'); |
|
220 | } |
||
221 | |||
222 | // Un-escape escaped Unicode chars |
||
223 | 30 | $unescaped = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', function ($m) { |
|
224 | 3 | return self::fromCharCode($m[1]); |
|
225 | 30 | }, $match); |
|
226 | |||
227 | 30 | return $unescaped; |
|
228 | } |
||
229 | |||
230 | 210 | private function number() |
|
231 | { |
||
232 | 210 | $number = null; |
|
233 | 210 | $sign = ''; |
|
234 | 210 | $string = ''; |
|
235 | 210 | $base = 10; |
|
236 | |||
237 | 210 | if ($this->ch === '-' || $this->ch === '+') { |
|
238 | 93 | $sign = $this->ch; |
|
239 | 93 | $this->next($this->ch); |
|
240 | 62 | } |
|
241 | |||
242 | // support for Infinity |
||
243 | 210 | if ($this->ch === 'I') { |
|
244 | 6 | $number = $this->word(); |
|
245 | 6 | if ($number === null) { |
|
246 | $this->throwSyntaxError('Unexpected word for number'); |
||
247 | } |
||
248 | |||
249 | 6 | return ($sign === '-') ? -INF : INF; |
|
250 | } |
||
251 | |||
252 | // support for NaN |
||
253 | 204 | if ($this->ch === 'N') { |
|
254 | $number = $this->word(); |
||
255 | if ($number !== NAN) { |
||
256 | $this->throwSyntaxError('expected word to be NaN'); |
||
257 | } |
||
258 | |||
259 | // ignore sign as -NaN also is NaN |
||
260 | return $number; |
||
261 | } |
||
262 | |||
263 | 204 | if ($this->ch === '0') { |
|
264 | 105 | $string .= $this->ch; |
|
265 | 105 | $this->next(); |
|
266 | 105 | if ($this->ch === 'x' || $this->ch === 'X') { |
|
267 | 33 | $string .= $this->ch; |
|
268 | 33 | $this->next(); |
|
269 | 33 | $base = 16; |
|
270 | 94 | } elseif (is_numeric($this->ch)) { |
|
271 | 30 | $this->throwSyntaxError('Octal literal'); |
|
272 | } |
||
273 | 50 | } |
|
274 | |||
275 | switch ($base) { |
||
276 | 174 | case 10: |
|
277 | 144 | if (($match = $this->match('/^\d*\.?\d*/')) !== null) { |
|
278 | 144 | $string .= $match; |
|
279 | 96 | } |
|
280 | 144 | if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) { |
|
281 | 45 | $string .= $match; |
|
282 | 30 | } |
|
283 | 144 | $number = $string; |
|
284 | 144 | break; |
|
285 | 33 | case 16: |
|
286 | 33 | if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) { |
|
287 | 30 | $string .= $match; |
|
288 | 30 | $number = hexdec($string); |
|
289 | 30 | break; |
|
290 | } |
||
291 | 3 | $this->throwSyntaxError('Bad hex number'); |
|
292 | } |
||
293 | |||
294 | 171 | if ($sign === '-') { |
|
295 | 33 | $number = -$number; |
|
296 | 22 | } |
|
297 | |||
298 | 171 | if (!is_numeric($number) || !is_finite($number)) { |
|
299 | 3 | $this->throwSyntaxError('Bad number'); |
|
300 | } |
||
301 | |||
302 | 168 | if ($this->castBigIntToString) { |
|
303 | 3 | return $number; |
|
304 | } |
||
305 | |||
306 | // Adding 0 will automatically cast this to an int or float |
||
307 | 165 | return $number + 0; |
|
308 | } |
||
309 | |||
310 | 75 | private function string() |
|
311 | { |
||
312 | 75 | if (!($this->ch === '"' || $this->ch === "'")) { |
|
313 | $this->throwSyntaxError('Bad string'); |
||
314 | } |
||
315 | |||
316 | 75 | $string = ''; |
|
317 | |||
318 | 75 | $delim = $this->ch; |
|
319 | 75 | $this->next(); |
|
320 | 75 | while ($this->ch !== null) { |
|
321 | 75 | if ($this->ch === $delim) { |
|
322 | 69 | $this->next(); |
|
323 | |||
324 | 69 | return $string; |
|
325 | 75 | } elseif ($this->ch === '\\') { |
|
326 | 27 | $this->next(); |
|
327 | 27 | if ($this->ch === 'u') { |
|
328 | 6 | $this->next(); |
|
329 | 6 | $hex = $this->match('/^[A-Fa-f0-9]{4}/'); |
|
330 | 6 | if ($hex === null) { |
|
331 | 3 | break; |
|
332 | } |
||
333 | 3 | $string .= self::fromCharCode($hex); |
|
334 | 3 | continue; |
|
335 | 21 | } elseif ($this->ch === "\r") { |
|
336 | 6 | if ($this->peek() === "\n") { |
|
337 | 4 | $this->next(); |
|
338 | 2 | } |
|
339 | 19 | } elseif (($escapee = self::getEscapee($this->ch)) !== null) { |
|
340 | 15 | $string .= $escapee; |
|
341 | 10 | } else { |
|
342 | 7 | break; |
|
343 | } |
||
344 | 75 | } elseif ($this->ch === "\n") { |
|
345 | // unescaped newlines are invalid; see: |
||
346 | // https://github.com/json5/json5/issues/24 |
||
347 | // @todo this feels special-cased; are there other invalid unescaped chars? |
||
348 | 3 | break; |
|
349 | } else { |
||
350 | 75 | $string .= $this->ch; |
|
351 | } |
||
352 | |||
353 | 75 | $this->next(); |
|
354 | 50 | } |
|
355 | |||
356 | 6 | $this->throwSyntaxError('Bad string'); |
|
357 | } |
||
358 | |||
359 | /** |
||
360 | * Skip an inline comment, assuming this is one. |
||
361 | * |
||
362 | * The current character should be the second / character in the // pair that begins this inline comment. |
||
363 | * To finish the inline comment, we look for a newline or the end of the text. |
||
364 | */ |
||
365 | 36 | private function inlineComment() |
|
366 | { |
||
367 | do { |
||
368 | 36 | $this->next(); |
|
369 | 36 | if ($this->ch === "\n" || $this->ch === "\r") { |
|
370 | 33 | $this->next(); |
|
371 | |||
372 | 33 | return; |
|
373 | } |
||
374 | 36 | } while ($this->ch !== null); |
|
375 | 3 | } |
|
376 | |||
377 | /** |
||
378 | * Skip a block comment, assuming this is one. |
||
379 | * |
||
380 | * The current character should be the * character in the /* pair that begins this block comment. |
||
381 | * To finish the block comment, we look for an ending */ pair of characters, |
||
382 | * but we also watch for the end of text before the comment is terminated. |
||
383 | */ |
||
384 | 21 | private function blockComment() |
|
385 | { |
||
386 | do { |
||
387 | 21 | $this->next(); |
|
388 | 21 | while ($this->ch === '*') { |
|
389 | 18 | $this->next('*'); |
|
390 | 18 | if ($this->ch === '/') { |
|
391 | 18 | $this->next('/'); |
|
392 | |||
393 | 18 | return; |
|
394 | } |
||
395 | 2 | } |
|
396 | 21 | } while ($this->ch !== null); |
|
397 | |||
398 | 3 | $this->throwSyntaxError('Unterminated block comment'); |
|
399 | } |
||
400 | |||
401 | /** |
||
402 | * Skip a comment, whether inline or block-level, assuming this is one. |
||
403 | */ |
||
404 | 57 | private function comment() |
|
405 | { |
||
406 | // Comments always begin with a / character. |
||
407 | 57 | if ($this->ch !== '/') { |
|
408 | $this->throwSyntaxError('Not a comment'); |
||
409 | } |
||
410 | |||
411 | 57 | $this->next('/'); |
|
412 | |||
413 | 57 | if ($this->ch === '/') { |
|
414 | 36 | $this->inlineComment(); |
|
415 | 46 | } elseif ($this->ch === '*') { |
|
416 | 21 | $this->blockComment(); |
|
417 | 12 | } else { |
|
418 | 3 | $this->throwSyntaxError('Unrecognized comment'); |
|
419 | } |
||
420 | 51 | } |
|
421 | |||
422 | /** |
||
423 | * Skip whitespace and comments. |
||
424 | * |
||
425 | * Note that we're detecting comments by only a single / character. |
||
426 | * This works since regular expressions are not valid JSON(5), but this will |
||
427 | * break if there are other valid values that begin with a / character! |
||
428 | */ |
||
429 | 378 | private function white() |
|
441 | |||
442 | /** |
||
443 | * Matches true, false, null, etc |
||
444 | */ |
||
445 | 84 | private function word() |
|
486 | |||
487 | 42 | private function arr() |
|
526 | |||
527 | /** |
||
528 | * Parse an object value |
||
529 | */ |
||
530 | 78 | private function obj() |
|
531 | { |
||
532 | 78 | $object = $this->associative ? [] : new \stdClass; |
|
533 | |||
578 | |||
579 | /** |
||
580 | * Parse a JSON value. |
||
581 | * |
||
582 | * It could be an object, an array, a string, a number, |
||
583 | * or a word. |
||
584 | */ |
||
585 | 378 | private function value() |
|
604 | |||
605 | 108 | private function throwSyntaxError($message) |
|
609 | |||
610 | 24 | private static function renderChar($chr) |
|
614 | |||
615 | /** |
||
616 | * @param string $hex Hex code |
||
617 | * |
||
618 | * @return string Unicode character |
||
619 | */ |
||
620 | 6 | private static function fromCharCode($hex) |
|
624 | |||
625 | /** |
||
626 | * @param string $ch |
||
627 | * |
||
628 | * @return string|null |
||
629 | */ |
||
630 | 15 | private static function getEscapee($ch) |
|
648 | } |
||
649 |
It seems like the type of the argument is not accepted by the function/method which you are calling.
In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.
We suggest to add an explicit type cast like in the following example: