Complex classes like Json5Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Json5Decoder, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | final class Json5Decoder |
||
| 18 | { |
||
| 19 | const REGEX_WHITESPACE = '/[ \t\r\n\v\f\xA0\x{FEFF}]/u'; |
||
| 20 | |||
| 21 | private $json; |
||
| 22 | |||
| 23 | private $at = 0; |
||
| 24 | |||
| 25 | private $lineNumber = 1; |
||
| 26 | |||
| 27 | private $columnNumber = 1; |
||
| 28 | |||
| 29 | private $ch; |
||
| 30 | |||
| 31 | private $associative = false; |
||
| 32 | |||
| 33 | private $maxDepth = 512; |
||
| 34 | |||
| 35 | private $castBigIntToString = false; |
||
| 36 | |||
| 37 | private $depth = 1; |
||
| 38 | |||
| 39 | private $length; |
||
| 40 | |||
| 41 | private $lineCache; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Private constructor. |
||
| 45 | * |
||
| 46 | * @param string $json |
||
| 47 | * @param bool $associative |
||
| 48 | * @param int $depth |
||
| 49 | * @param bool $castBigIntToString |
||
| 50 | */ |
||
| 51 | 378 | private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false) |
|
| 62 | |||
| 63 | /** |
||
| 64 | * Takes a JSON encoded string and converts it into a PHP variable. |
||
| 65 | * |
||
| 66 | * The parameters exactly match PHP's json_decode() function - see |
||
| 67 | * http://php.net/manual/en/function.json-decode.php for more information. |
||
| 68 | * |
||
| 69 | * @param string $source The JSON string being decoded. |
||
| 70 | * @param bool $associative When TRUE, returned objects will be converted into associative arrays. |
||
| 71 | * @param int $depth User specified recursion depth. |
||
| 72 | * @param int $options Bitmask of JSON decode options. |
||
| 73 | * |
||
| 74 | * @return mixed |
||
| 75 | */ |
||
| 76 | 378 | public static function decode($source, $associative = false, $depth = 512, $options = 0) |
|
| 91 | |||
| 92 | /** |
||
| 93 | * @param int $at |
||
| 94 | * |
||
| 95 | * @return string|null |
||
| 96 | */ |
||
| 97 | 378 | private function charAt($at) |
|
| 105 | |||
| 106 | /** |
||
| 107 | * Parse the next character. |
||
| 108 | * |
||
| 109 | * If $c is given, the next char will only be parsed if the current |
||
| 110 | * one matches $c. |
||
| 111 | * |
||
| 112 | * @param string|null $c |
||
| 113 | * |
||
| 114 | * @return null|string |
||
| 115 | */ |
||
| 116 | 348 | private function next($c = null) |
|
| 142 | |||
| 143 | /** |
||
| 144 | * Get the next character without consuming it or |
||
| 145 | * assigning it to the ch variable. |
||
| 146 | * |
||
| 147 | * @return mixed |
||
| 148 | */ |
||
| 149 | 12 | private function peek() |
|
| 153 | |||
| 154 | /** |
||
| 155 | * @return string |
||
| 156 | */ |
||
| 157 | 216 | private function getLineRemainder() |
|
| 168 | |||
| 169 | /** |
||
| 170 | * Attempt to match a regular expression at the current position on the current line. |
||
| 171 | * |
||
| 172 | * This function will not match across multiple lines. |
||
| 173 | * |
||
| 174 | * @param string $regex |
||
| 175 | * |
||
| 176 | * @return string|null |
||
| 177 | */ |
||
| 178 | 216 | private function match($regex) |
|
| 179 | { |
||
| 180 | 216 | $subject = $this->getLineRemainder(); |
|
| 181 | |||
| 182 | 216 | $matches = []; |
|
| 183 | 216 | if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) { |
|
| 184 | 114 | return null; |
|
| 185 | } |
||
| 186 | |||
| 187 | // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying |
||
| 188 | 201 | $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8'); |
|
| 189 | |||
| 190 | // [0][0] contains the matched text |
||
| 191 | // [0][1] contains the index of that match |
||
| 192 | 201 | $advanceBy = $offset + mb_strlen($matches[0][0], 'utf-8'); |
|
| 193 | |||
| 194 | 201 | $this->at += $advanceBy; |
|
| 195 | 201 | $this->columnNumber += $advanceBy; |
|
| 196 | 201 | $this->ch = $this->charAt($this->at); |
|
| 197 | |||
| 198 | 201 | return $matches[0][0]; |
|
| 199 | } |
||
| 200 | |||
| 201 | /** |
||
| 202 | * Parse an identifier. |
||
| 203 | * |
||
| 204 | * Normally, reserved words are disallowed here, but we |
||
| 205 | * only use this for unquoted object keys, where reserved words are allowed, |
||
| 206 | * so we don't check for those here. References: |
||
| 207 | * - http://es5.github.com/#x7.6 |
||
| 208 | * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables |
||
| 209 | * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm |
||
| 210 | */ |
||
| 211 | 39 | private function identifier() |
|
| 212 | { |
||
| 213 | // @codingStandardsIgnoreStart |
||
| 214 | // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv |
||
| 215 | 39 | $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]|\\\\u[0-9A-Fa-f]{4})*/u'); |
|
| 216 | // @codingStandardsIgnoreEnd |
||
| 217 | |||
| 218 | 39 | if ($match === null) { |
|
| 219 | 9 | $this->throwSyntaxError('Bad identifier as unquoted key'); |
|
| 220 | } |
||
| 221 | |||
| 222 | // Un-escape escaped Unicode chars |
||
| 223 | 30 | $unescaped = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', function ($m) { |
|
| 224 | 3 | return self::fromCharCode($m[1]); |
|
| 225 | 30 | }, $match); |
|
| 226 | |||
| 227 | 30 | return $unescaped; |
|
| 228 | } |
||
| 229 | |||
| 230 | 210 | private function number() |
|
| 231 | { |
||
| 232 | 210 | $number = null; |
|
| 233 | 210 | $sign = ''; |
|
| 234 | 210 | $string = ''; |
|
| 235 | 210 | $base = 10; |
|
| 236 | |||
| 237 | 210 | if ($this->ch === '-' || $this->ch === '+') { |
|
| 238 | 93 | $sign = $this->ch; |
|
| 239 | 93 | $this->next($this->ch); |
|
| 240 | 62 | } |
|
| 241 | |||
| 242 | // support for Infinity |
||
| 243 | 210 | if ($this->ch === 'I') { |
|
| 244 | 6 | $number = $this->word(); |
|
| 245 | 6 | if ($number === null) { |
|
| 246 | $this->throwSyntaxError('Unexpected word for number'); |
||
| 247 | } |
||
| 248 | |||
| 249 | 6 | return ($sign === '-') ? -INF : INF; |
|
| 250 | } |
||
| 251 | |||
| 252 | // support for NaN |
||
| 253 | 204 | if ($this->ch === 'N') { |
|
| 254 | $number = $this->word(); |
||
| 255 | if ($number !== NAN) { |
||
| 256 | $this->throwSyntaxError('expected word to be NaN'); |
||
| 257 | } |
||
| 258 | |||
| 259 | // ignore sign as -NaN also is NaN |
||
| 260 | return $number; |
||
| 261 | } |
||
| 262 | |||
| 263 | 204 | if ($this->ch === '0') { |
|
| 264 | 105 | $string .= $this->ch; |
|
| 265 | 105 | $this->next(); |
|
| 266 | 105 | if ($this->ch === 'x' || $this->ch === 'X') { |
|
| 267 | 33 | $string .= $this->ch; |
|
| 268 | 33 | $this->next(); |
|
| 269 | 33 | $base = 16; |
|
| 270 | 94 | } elseif (is_numeric($this->ch)) { |
|
| 271 | 30 | $this->throwSyntaxError('Octal literal'); |
|
| 272 | } |
||
| 273 | 50 | } |
|
| 274 | |||
| 275 | switch ($base) { |
||
| 276 | 174 | case 10: |
|
| 277 | 144 | if (($match = $this->match('/^\d*\.?\d*/')) !== null) { |
|
| 278 | 144 | $string .= $match; |
|
| 279 | 96 | } |
|
| 280 | 144 | if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) { |
|
| 281 | 45 | $string .= $match; |
|
| 282 | 30 | } |
|
| 283 | 144 | $number = $string; |
|
| 284 | 144 | break; |
|
| 285 | 33 | case 16: |
|
| 286 | 33 | if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) { |
|
| 287 | 30 | $string .= $match; |
|
| 288 | 30 | $number = hexdec($string); |
|
| 289 | 30 | break; |
|
| 290 | } |
||
| 291 | 3 | $this->throwSyntaxError('Bad hex number'); |
|
| 292 | } |
||
| 293 | |||
| 294 | 171 | if ($sign === '-') { |
|
| 295 | 33 | $number = -$number; |
|
| 296 | 22 | } |
|
| 297 | |||
| 298 | 171 | if (!is_numeric($number) || !is_finite($number)) { |
|
| 299 | 3 | $this->throwSyntaxError('Bad number'); |
|
| 300 | } |
||
| 301 | |||
| 302 | 168 | if ($this->castBigIntToString) { |
|
| 303 | 3 | return $number; |
|
| 304 | } |
||
| 305 | |||
| 306 | // Adding 0 will automatically cast this to an int or float |
||
| 307 | 165 | return $number + 0; |
|
| 308 | } |
||
| 309 | |||
| 310 | 75 | private function string() |
|
| 311 | { |
||
| 312 | 75 | if (!($this->ch === '"' || $this->ch === "'")) { |
|
| 313 | $this->throwSyntaxError('Bad string'); |
||
| 314 | } |
||
| 315 | |||
| 316 | 75 | $string = ''; |
|
| 317 | |||
| 318 | 75 | $delim = $this->ch; |
|
| 319 | 75 | $this->next(); |
|
| 320 | 75 | while ($this->ch !== null) { |
|
| 321 | 75 | if ($this->ch === $delim) { |
|
| 322 | 69 | $this->next(); |
|
| 323 | |||
| 324 | 69 | return $string; |
|
| 325 | 75 | } elseif ($this->ch === '\\') { |
|
| 326 | 27 | $this->next(); |
|
| 327 | 27 | if ($this->ch === 'u') { |
|
| 328 | 6 | $this->next(); |
|
| 329 | 6 | $hex = $this->match('/^[A-Fa-f0-9]{4}/'); |
|
| 330 | 6 | if ($hex === null) { |
|
| 331 | 3 | break; |
|
| 332 | } |
||
| 333 | 3 | $string .= self::fromCharCode($hex); |
|
| 334 | 3 | continue; |
|
| 335 | 21 | } elseif ($this->ch === "\r") { |
|
| 336 | 6 | if ($this->peek() === "\n") { |
|
| 337 | 4 | $this->next(); |
|
| 338 | 2 | } |
|
| 339 | 19 | } elseif (($escapee = self::getEscapee($this->ch)) !== null) { |
|
| 340 | 15 | $string .= $escapee; |
|
| 341 | 10 | } else { |
|
| 342 | 7 | break; |
|
| 343 | } |
||
| 344 | 75 | } elseif ($this->ch === "\n") { |
|
| 345 | // unescaped newlines are invalid; see: |
||
| 346 | // https://github.com/json5/json5/issues/24 |
||
| 347 | // @todo this feels special-cased; are there other invalid unescaped chars? |
||
| 348 | 3 | break; |
|
| 349 | } else { |
||
| 350 | 75 | $string .= $this->ch; |
|
| 351 | } |
||
| 352 | |||
| 353 | 75 | $this->next(); |
|
| 354 | 50 | } |
|
| 355 | |||
| 356 | 6 | $this->throwSyntaxError('Bad string'); |
|
| 357 | } |
||
| 358 | |||
| 359 | /** |
||
| 360 | * Skip an inline comment, assuming this is one. |
||
| 361 | * |
||
| 362 | * The current character should be the second / character in the // pair that begins this inline comment. |
||
| 363 | * To finish the inline comment, we look for a newline or the end of the text. |
||
| 364 | */ |
||
| 365 | 36 | private function inlineComment() |
|
| 366 | { |
||
| 367 | do { |
||
| 368 | 36 | $this->next(); |
|
| 369 | 36 | if ($this->ch === "\n" || $this->ch === "\r") { |
|
| 370 | 33 | $this->next(); |
|
| 371 | |||
| 372 | 33 | return; |
|
| 373 | } |
||
| 374 | 36 | } while ($this->ch !== null); |
|
| 375 | 3 | } |
|
| 376 | |||
| 377 | /** |
||
| 378 | * Skip a block comment, assuming this is one. |
||
| 379 | * |
||
| 380 | * The current character should be the * character in the /* pair that begins this block comment. |
||
| 381 | * To finish the block comment, we look for an ending */ pair of characters, |
||
| 382 | * but we also watch for the end of text before the comment is terminated. |
||
| 383 | */ |
||
| 384 | 21 | private function blockComment() |
|
| 385 | { |
||
| 386 | do { |
||
| 387 | 21 | $this->next(); |
|
| 388 | 21 | while ($this->ch === '*') { |
|
| 389 | 18 | $this->next('*'); |
|
| 390 | 18 | if ($this->ch === '/') { |
|
| 391 | 18 | $this->next('/'); |
|
| 392 | |||
| 393 | 18 | return; |
|
| 394 | } |
||
| 395 | 2 | } |
|
| 396 | 21 | } while ($this->ch !== null); |
|
| 397 | |||
| 398 | 3 | $this->throwSyntaxError('Unterminated block comment'); |
|
| 399 | } |
||
| 400 | |||
| 401 | /** |
||
| 402 | * Skip a comment, whether inline or block-level, assuming this is one. |
||
| 403 | */ |
||
| 404 | 57 | private function comment() |
|
| 405 | { |
||
| 406 | // Comments always begin with a / character. |
||
| 407 | 57 | if ($this->ch !== '/') { |
|
| 408 | $this->throwSyntaxError('Not a comment'); |
||
| 409 | } |
||
| 410 | |||
| 411 | 57 | $this->next('/'); |
|
| 412 | |||
| 413 | 57 | if ($this->ch === '/') { |
|
| 414 | 36 | $this->inlineComment(); |
|
| 415 | 46 | } elseif ($this->ch === '*') { |
|
| 416 | 21 | $this->blockComment(); |
|
| 417 | 12 | } else { |
|
| 418 | 3 | $this->throwSyntaxError('Unrecognized comment'); |
|
| 419 | } |
||
| 420 | 51 | } |
|
| 421 | |||
| 422 | /** |
||
| 423 | * Skip whitespace and comments. |
||
| 424 | * |
||
| 425 | * Note that we're detecting comments by only a single / character. |
||
| 426 | * This works since regular expressions are not valid JSON(5), but this will |
||
| 427 | * break if there are other valid values that begin with a / character! |
||
| 428 | */ |
||
| 429 | 378 | private function white() |
|
| 441 | |||
| 442 | /** |
||
| 443 | * Matches true, false, null, etc |
||
| 444 | */ |
||
| 445 | 84 | private function word() |
|
| 486 | |||
| 487 | 42 | private function arr() |
|
| 526 | |||
| 527 | /** |
||
| 528 | * Parse an object value |
||
| 529 | */ |
||
| 530 | 78 | private function obj() |
|
| 531 | { |
||
| 532 | 78 | $object = $this->associative ? [] : new \stdClass; |
|
| 533 | |||
| 578 | |||
| 579 | /** |
||
| 580 | * Parse a JSON value. |
||
| 581 | * |
||
| 582 | * It could be an object, an array, a string, a number, |
||
| 583 | * or a word. |
||
| 584 | */ |
||
| 585 | 378 | private function value() |
|
| 604 | |||
| 605 | 108 | private function throwSyntaxError($message) |
|
| 609 | |||
| 610 | 24 | private static function renderChar($chr) |
|
| 614 | |||
| 615 | /** |
||
| 616 | * @param string $hex Hex code |
||
| 617 | * |
||
| 618 | * @return string Unicode character |
||
| 619 | */ |
||
| 620 | 6 | private static function fromCharCode($hex) |
|
| 624 | |||
| 625 | /** |
||
| 626 | * @param string $ch |
||
| 627 | * |
||
| 628 | * @return string|null |
||
| 629 | */ |
||
| 630 | 15 | private static function getEscapee($ch) |
|
| 648 | } |
||
| 649 |
It seems like the type of the argument is not accepted by the function/method which you are calling.
In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.
We suggest to add an explicit type cast like in the following example: