| Total Complexity | 129 |
| Total Lines | 652 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like ServicesJSON often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ServicesJSON, and based on these observations, apply Extract Interface, too.
| 1 | <?php declare(strict_types=1); |
||
| 77 | class ServicesJSON |
||
| 78 | { |
||
| 79 | /** |
||
| 80 | * constructs a new JSON instance |
||
| 81 | * |
||
| 82 | * @param int $use object behavior flags; combine with boolean-OR |
||
| 83 | * |
||
| 84 | * possible values: |
||
| 85 | * - SERVICES_JSON_LOOSE_TYPE: loose typing. |
||
| 86 | * "{...}" syntax creates associative arrays |
||
| 87 | * instead of objects in decode(). |
||
| 88 | * - SERVICES_JSON_SUPPRESS_ERRORS: error suppression. |
||
| 89 | * Values which can't be encoded (e.g. resources) |
||
| 90 | * appear as NULL instead of throwing errors. |
||
| 91 | * By default, a deeply-nested resource will |
||
| 92 | * bubble up with an error, so all return values |
||
| 93 | * from encode() should be checked with isError() |
||
| 94 | */ |
||
| 95 | public function __construct($use = 0) |
||
| 96 | { |
||
| 97 | $this->use = $use; |
||
|
|
|||
| 98 | } |
||
| 99 | |||
| 100 | /** |
||
| 101 | * convert a string from one UTF-16 char to one UTF-8 char |
||
| 102 | * |
||
| 103 | * Normally should be handled by mb_convert_encoding, but |
||
| 104 | * provides a slower PHP-only method for installations |
||
| 105 | * that lack the multibye string extension. |
||
| 106 | * |
||
| 107 | * @param string $utf16 UTF-16 character |
||
| 108 | * @return string UTF-8 character |
||
| 109 | */ |
||
| 110 | public function utf162utf8($utf16): string |
||
| 111 | { |
||
| 112 | // oh please oh please oh please oh please oh please |
||
| 113 | if (function_exists('mb_convert_encoding')) { |
||
| 114 | return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); |
||
| 115 | } |
||
| 116 | |||
| 117 | $bytes = (ord($utf16[0]) << 8) | ord($utf16[1]); |
||
| 118 | |||
| 119 | switch (true) { |
||
| 120 | case ((0x7F & $bytes) == $bytes): |
||
| 121 | // this case should never be reached, because we are in ASCII range |
||
| 122 | // see: https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 123 | return chr(0x7F & $bytes); |
||
| 124 | case (0x07FF & $bytes) == $bytes: |
||
| 125 | // return a 2-byte UTF-8 character |
||
| 126 | // see: https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 127 | return chr(0xC0 | (($bytes >> 6) & 0x1F)) . chr(0x80 | ($bytes & 0x3F)); |
||
| 128 | case (0xFFFF & $bytes) == $bytes: |
||
| 129 | // return a 3-byte UTF-8 character |
||
| 130 | // see: https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 131 | return chr(0xE0 | (($bytes >> 12) & 0x0F)) . chr(0x80 | (($bytes >> 6) & 0x3F)) . chr(0x80 | ($bytes & 0x3F)); |
||
| 132 | } |
||
| 133 | |||
| 134 | // ignoring UTF-32 for now, sorry |
||
| 135 | return ''; |
||
| 136 | } |
||
| 137 | |||
| 138 | /** |
||
| 139 | * convert a string from one UTF-8 char to one UTF-16 char |
||
| 140 | * |
||
| 141 | * Normally should be handled by mb_convert_encoding, but |
||
| 142 | * provides a slower PHP-only method for installations |
||
| 143 | * that lack the multibye string extension. |
||
| 144 | * |
||
| 145 | * @param string $utf8 UTF-8 character |
||
| 146 | * @return string UTF-16 character |
||
| 147 | */ |
||
| 148 | public function utf82utf16($utf8): string |
||
| 149 | { |
||
| 150 | // oh please oh please oh please oh please oh please |
||
| 151 | if (function_exists('mb_convert_encoding')) { |
||
| 152 | return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8'); |
||
| 153 | } |
||
| 154 | |||
| 155 | switch (mb_strlen($utf8)) { |
||
| 156 | case 1: |
||
| 157 | // this case should never be reached, because we are in ASCII range |
||
| 158 | // see: https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 159 | return $utf8; |
||
| 160 | case 2: |
||
| 161 | // return a UTF-16 character from a 2-byte UTF-8 char |
||
| 162 | // see: https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 163 | return chr(0x07 & (ord($utf8[0]) >> 2)) . chr((0xC0 & (ord($utf8[0]) << 6)) | (0x3F & ord($utf8[1]))); |
||
| 164 | case 3: |
||
| 165 | // return a UTF-16 character from a 3-byte UTF-8 char |
||
| 166 | // see: https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 167 | return chr((0xF0 & (ord($utf8[0]) << 4)) | (0x0F & (ord($utf8[1]) >> 2))) . chr((0xC0 & (ord($utf8[1]) << 6)) | (0x7F & ord($utf8[2]))); |
||
| 168 | } |
||
| 169 | |||
| 170 | // ignoring UTF-32 for now, sorry |
||
| 171 | return ''; |
||
| 172 | } |
||
| 173 | |||
| 174 | /** |
||
| 175 | * encodes an arbitrary variable into JSON format (and sends JSON Header) |
||
| 176 | * |
||
| 177 | * @param mixed $var any number, boolean, string, array, or object to be encoded. |
||
| 178 | * see argument 1 to ServicesJSON() above for array-parsing behavior. |
||
| 179 | * if var is a strng, note that encode() always expects it |
||
| 180 | * to be in ASCII or UTF-8 format! |
||
| 181 | * |
||
| 182 | * @return mixed JSON string representation of input var or an error if a problem occurs |
||
| 183 | */ |
||
| 184 | public function encode($var) |
||
| 185 | { |
||
| 186 | header('Document-type: application/json'); |
||
| 187 | |||
| 188 | return $this->encodeUnsafe($var); |
||
| 189 | } |
||
| 190 | |||
| 191 | /** |
||
| 192 | * encodes an arbitrary variable into JSON format without JSON Header - warning - may allow CSS!!!!) |
||
| 193 | * |
||
| 194 | * @param mixed $var any number, boolean, string, array, or object to be encoded. |
||
| 195 | * see argument 1 to ServicesJSON() above for array-parsing behavior. |
||
| 196 | * if var is a strng, note that encode() always expects it |
||
| 197 | * to be in ASCII or UTF-8 format! |
||
| 198 | * |
||
| 199 | * @return mixed JSON string representation of input var or an error if a problem occurs |
||
| 200 | */ |
||
| 201 | public function encodeUnsafe($var) |
||
| 202 | { |
||
| 203 | // see bug #16908 - regarding numeric locale printing |
||
| 204 | $lc = setlocale(LC_NUMERIC, 0); |
||
| 205 | setlocale(LC_NUMERIC, 'C'); |
||
| 206 | $ret = $this->_encode($var); |
||
| 207 | setlocale(LC_NUMERIC, $lc); |
||
| 208 | |||
| 209 | return $ret; |
||
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * PRIVATE CODE that does the work of encodes an arbitrary variable into JSON format |
||
| 214 | * |
||
| 215 | * @param mixed $var any number, boolean, string, array, or object to be encoded. |
||
| 216 | * see argument 1 to ServicesJSON() above for array-parsing behavior. |
||
| 217 | * if var is a strng, note that encode() always expects it |
||
| 218 | * to be in ASCII or UTF-8 format! |
||
| 219 | * |
||
| 220 | * @return mixed JSON string representation of input var or an error if a problem occurs |
||
| 221 | */ |
||
| 222 | public function _encode($var) |
||
| 223 | { |
||
| 224 | switch (gettype($var)) { |
||
| 225 | case 'boolean': |
||
| 226 | return $var ? 'true' : 'false'; |
||
| 227 | case 'NULL': |
||
| 228 | return 'null'; |
||
| 229 | case 'integer': |
||
| 230 | return (int)$var; |
||
| 231 | case 'double': |
||
| 232 | case 'float': |
||
| 233 | return (float)$var; |
||
| 234 | case 'string': |
||
| 235 | // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT |
||
| 236 | $ascii = ''; |
||
| 237 | $strlen_var = mb_strlen($var); |
||
| 238 | |||
| 239 | /* |
||
| 240 | * Iterate over every character in the string, |
||
| 241 | * escaping with a slash or encoding to UTF-8 where necessary |
||
| 242 | */ |
||
| 243 | for ($c = 0; $c < $strlen_var; ++$c) { |
||
| 244 | $ord_var_c = ord($var[$c]); |
||
| 245 | |||
| 246 | switch (true) { |
||
| 247 | case 0x08 == $ord_var_c: |
||
| 248 | $ascii .= '\b'; |
||
| 249 | break; |
||
| 250 | case 0x09 == $ord_var_c: |
||
| 251 | $ascii .= '\t'; |
||
| 252 | break; |
||
| 253 | case 0x0A == $ord_var_c: |
||
| 254 | $ascii .= '\n'; |
||
| 255 | break; |
||
| 256 | case 0x0C == $ord_var_c: |
||
| 257 | $ascii .= '\f'; |
||
| 258 | break; |
||
| 259 | case 0x0D == $ord_var_c: |
||
| 260 | $ascii .= '\r'; |
||
| 261 | break; |
||
| 262 | case 0x22 == $ord_var_c: |
||
| 263 | case 0x2F == $ord_var_c: |
||
| 264 | case 0x5C == $ord_var_c: |
||
| 265 | // double quote, slash, slosh |
||
| 266 | $ascii .= '\\' . $var[$c]; |
||
| 267 | break; |
||
| 268 | case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)): |
||
| 269 | // characters U-00000000 - U-0000007F (same as ASCII) |
||
| 270 | $ascii .= $var[$c]; |
||
| 271 | break; |
||
| 272 | case (0xC0 == ($ord_var_c & 0xE0)): |
||
| 273 | // characters U-00000080 - U-000007FF, mask 110SONGLIST |
||
| 274 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 275 | if ($c + 1 >= $strlen_var) { |
||
| 276 | ++$c; |
||
| 277 | $ascii .= '?'; |
||
| 278 | break; |
||
| 279 | } |
||
| 280 | |||
| 281 | $char = pack('C*', $ord_var_c, ord($var[$c + 1])); |
||
| 282 | ++$c; |
||
| 283 | $utf16 = $this->utf82utf16($char); |
||
| 284 | $ascii .= sprintf('\u%04s', bin2hex($utf16)); |
||
| 285 | break; |
||
| 286 | case (0xE0 == ($ord_var_c & 0xF0)): |
||
| 287 | if ($c + 2 >= $strlen_var) { |
||
| 288 | $c += 2; |
||
| 289 | $ascii .= '?'; |
||
| 290 | break; |
||
| 291 | } |
||
| 292 | // characters U-00000800 - U-0000FFFF, mask 1110XXXX |
||
| 293 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 294 | $char = pack('C*', $ord_var_c, @ord($var[$c + 1]), @ord($var[$c + 2])); |
||
| 295 | $c += 2; |
||
| 296 | $utf16 = $this->utf82utf16($char); |
||
| 297 | $ascii .= sprintf('\u%04s', bin2hex($utf16)); |
||
| 298 | break; |
||
| 299 | case (0xF0 == ($ord_var_c & 0xF8)): |
||
| 300 | if ($c + 3 >= $strlen_var) { |
||
| 301 | $c += 3; |
||
| 302 | $ascii .= '?'; |
||
| 303 | break; |
||
| 304 | } |
||
| 305 | // characters U-00010000 - U-001FFFFF, mask 11110XXX |
||
| 306 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 307 | $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2]), ord($var[$c + 3])); |
||
| 308 | $c += 3; |
||
| 309 | $utf16 = $this->utf82utf16($char); |
||
| 310 | $ascii .= sprintf('\u%04s', bin2hex($utf16)); |
||
| 311 | break; |
||
| 312 | case (0xF8 == ($ord_var_c & 0xFC)): |
||
| 313 | // characters U-00200000 - U-03FFFFFF, mask 111110XX |
||
| 314 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 315 | if ($c + 4 >= $strlen_var) { |
||
| 316 | $c += 4; |
||
| 317 | $ascii .= '?'; |
||
| 318 | break; |
||
| 319 | } |
||
| 320 | $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2]), ord($var[$c + 3]), ord($var[$c + 4])); |
||
| 321 | $c += 4; |
||
| 322 | $utf16 = $this->utf82utf16($char); |
||
| 323 | $ascii .= sprintf('\u%04s', bin2hex($utf16)); |
||
| 324 | break; |
||
| 325 | case (0xFC == ($ord_var_c & 0xFE)): |
||
| 326 | if ($c + 5 >= $strlen_var) { |
||
| 327 | $c += 5; |
||
| 328 | $ascii .= '?'; |
||
| 329 | break; |
||
| 330 | } |
||
| 331 | // characters U-04000000 - U-7FFFFFFF, mask 1111110X |
||
| 332 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 333 | $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2]), ord($var[$c + 3]), ord($var[$c + 4]), ord($var[$c + 5])); |
||
| 334 | $c += 5; |
||
| 335 | $utf16 = $this->utf82utf16($char); |
||
| 336 | $ascii .= sprintf('\u%04s', bin2hex($utf16)); |
||
| 337 | break; |
||
| 338 | } |
||
| 339 | } |
||
| 340 | |||
| 341 | return '"' . $ascii . '"'; |
||
| 342 | case 'array': |
||
| 343 | /* |
||
| 344 | * As per JSON spec if any array key is not an integer |
||
| 345 | * we must treat the the whole array as an object. We |
||
| 346 | * also try to catch a sparsely populated associative |
||
| 347 | * array with numeric keys here because some JS engines |
||
| 348 | * will create an array with empty indexes up to |
||
| 349 | * max_index which can cause memory issues and because |
||
| 350 | * the keys, which may be relevant, will be remapped |
||
| 351 | * otherwise. |
||
| 352 | * |
||
| 353 | * As per the ECMA and JSON specification an object may |
||
| 354 | * have any string as a property. Unfortunately due to |
||
| 355 | * a hole in the ECMA specification if the key is a |
||
| 356 | * ECMA reserved word or starts with a digit the |
||
| 357 | * parameter is only accessible using ECMAScript's |
||
| 358 | * bracket notation. |
||
| 359 | */ |
||
| 360 | |||
| 361 | // treat as a JSON object |
||
| 362 | if (is_array($var) && count($var) && (array_keys($var) !== range(0, count($var) - 1))) { |
||
| 363 | $properties = array_map([$this, 'name_value'], array_keys($var), array_values($var)); |
||
| 364 | |||
| 365 | foreach ($properties as $property) { |
||
| 366 | if ($this->isError($property)) { |
||
| 367 | return $property; |
||
| 368 | } |
||
| 369 | } |
||
| 370 | |||
| 371 | return '{' . implode(',', $properties) . '}'; |
||
| 372 | } |
||
| 373 | |||
| 374 | // treat it like a regular array |
||
| 375 | $elements = array_map([$this, '_encode'], $var); |
||
| 376 | |||
| 377 | foreach ($elements as $element) { |
||
| 378 | if ($this->isError($element)) { |
||
| 379 | return $element; |
||
| 380 | } |
||
| 381 | } |
||
| 382 | |||
| 383 | return '[' . implode(',', $elements) . ']'; |
||
| 384 | case 'object': |
||
| 385 | $vars = get_object_vars($var); |
||
| 386 | |||
| 387 | $properties = array_map([$this, 'name_value'], array_keys($vars), array_values($vars)); |
||
| 388 | |||
| 389 | foreach ($properties as $property) { |
||
| 390 | if ($this->isError($property)) { |
||
| 391 | return $property; |
||
| 392 | } |
||
| 393 | } |
||
| 394 | |||
| 395 | return '{' . implode(',', $properties) . '}'; |
||
| 396 | default: |
||
| 397 | return ($this->use & SERVICES_JSON_SUPPRESS_ERRORS) ? 'null' : new ServicesJSON_Error(gettype($var) . ' can not be encoded as JSON string'); |
||
| 398 | } |
||
| 399 | } |
||
| 400 | |||
| 401 | /** |
||
| 402 | * array-walking function for use in generating JSON-formatted name-value pairs |
||
| 403 | * |
||
| 404 | * @param string $name name of key to use |
||
| 405 | * @param mixed $value reference to an array element to be encoded |
||
| 406 | * |
||
| 407 | * @return string JSON-formatted name-value pair, like '"name":value' |
||
| 408 | */ |
||
| 409 | public function name_value($name, $value) |
||
| 410 | { |
||
| 411 | $encoded_value = $this->_encode($value); |
||
| 412 | |||
| 413 | if ($this->isError($encoded_value)) { |
||
| 414 | return $encoded_value; |
||
| 415 | } |
||
| 416 | |||
| 417 | return $this->_encode((string)$name) . ':' . $encoded_value; |
||
| 418 | } |
||
| 419 | |||
| 420 | /** |
||
| 421 | * reduce a string by removing leading and trailing comments and whitespace |
||
| 422 | * |
||
| 423 | * @param string $str string value to strip of comments and whitespace |
||
| 424 | * |
||
| 425 | * @return string string value stripped of comments and whitespace |
||
| 426 | */ |
||
| 427 | public function reduce_string($str): string |
||
| 446 | } |
||
| 447 | |||
| 448 | /** |
||
| 449 | * decodes a JSON string into appropriate variable |
||
| 450 | * |
||
| 451 | * @param string $str JSON-formatted string |
||
| 452 | * |
||
| 453 | * @return array|bool|float|int|\stdClass|string|void|null number, boolean, string, array, or object |
||
| 454 | * corresponding to given JSON input string. |
||
| 455 | * See argument 1 to ServicesJSON() above for object-output behavior. |
||
| 456 | * Note that decode() always returns strings |
||
| 457 | * in ASCII or UTF-8 format! |
||
| 458 | */ |
||
| 459 | public function decode($str) |
||
| 460 | { |
||
| 461 | $str = $this->reduce_string($str); |
||
| 462 | |||
| 463 | switch (mb_strtolower($str)) { |
||
| 464 | case 'true': |
||
| 465 | return true; |
||
| 466 | case 'false': |
||
| 467 | return false; |
||
| 468 | case 'null': |
||
| 469 | return null; |
||
| 470 | default: |
||
| 471 | $m = []; |
||
| 472 | |||
| 473 | if (is_numeric($str)) { |
||
| 474 | // Lookie-loo, it's a number |
||
| 475 | |||
| 476 | // This would work on its own, but I'm trying to be |
||
| 477 | // good about returning integers where appropriate: |
||
| 478 | // return (float)$str; |
||
| 479 | |||
| 480 | // Return float or int, as appropriate |
||
| 481 | return ((float)$str == (int)$str) ? (int)$str : (float)$str; |
||
| 482 | } |
||
| 483 | |||
| 484 | if (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) { |
||
| 485 | // STRINGS RETURNED IN UTF-8 FORMAT |
||
| 486 | $delim = mb_substr($str, 0, 1); |
||
| 487 | $chrs = mb_substr($str, 1, -1); |
||
| 488 | $utf8 = ''; |
||
| 489 | $strlen_chrs = mb_strlen($chrs); |
||
| 490 | |||
| 491 | for ($c = 0; $c < $strlen_chrs; ++$c) { |
||
| 492 | $substr_chrs_c_2 = mb_substr($chrs, $c, 2); |
||
| 493 | $ord_chrs_c = ord($chrs[$c]); |
||
| 494 | |||
| 495 | switch (true) { |
||
| 496 | case '\b' === $substr_chrs_c_2: |
||
| 497 | $utf8 .= chr(0x08); |
||
| 498 | ++$c; |
||
| 499 | break; |
||
| 500 | case '\t' === $substr_chrs_c_2: |
||
| 501 | $utf8 .= chr(0x09); |
||
| 502 | ++$c; |
||
| 503 | break; |
||
| 504 | case '\n' === $substr_chrs_c_2: |
||
| 505 | $utf8 .= chr(0x0A); |
||
| 506 | ++$c; |
||
| 507 | break; |
||
| 508 | case '\f' === $substr_chrs_c_2: |
||
| 509 | $utf8 .= chr(0x0C); |
||
| 510 | ++$c; |
||
| 511 | break; |
||
| 512 | case '\r' === $substr_chrs_c_2: |
||
| 513 | $utf8 .= chr(0x0D); |
||
| 514 | ++$c; |
||
| 515 | break; |
||
| 516 | case '\\"' === $substr_chrs_c_2: |
||
| 517 | case '\\\'' === $substr_chrs_c_2: |
||
| 518 | case '\\\\' === $substr_chrs_c_2: |
||
| 519 | case '\\/' === $substr_chrs_c_2: |
||
| 520 | if (('"' === $delim && '\\\'' !== $substr_chrs_c_2) |
||
| 521 | || ("'" === $delim && '\\"' !== $substr_chrs_c_2)) { |
||
| 522 | $utf8 .= $chrs[++$c]; |
||
| 523 | } |
||
| 524 | break; |
||
| 525 | case preg_match('/\\\u[0-9A-F]{4}/i', mb_substr($chrs, $c, 6)): |
||
| 526 | // single, escaped unicode character |
||
| 527 | $utf16 = chr(hexdec(mb_substr($chrs, $c + 2, 2))) . chr(hexdec(mb_substr($chrs, $c + 4, 2))); |
||
| 528 | $utf8 .= $this->utf162utf8($utf16); |
||
| 529 | $c += 5; |
||
| 530 | break; |
||
| 531 | case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): |
||
| 532 | $utf8 .= $chrs[$c]; |
||
| 533 | break; |
||
| 534 | case 0xC0 == ($ord_chrs_c & 0xE0): |
||
| 535 | // characters U-00000080 - U-000007FF, mask 110SONGLIST |
||
| 536 | //see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 537 | $utf8 .= mb_substr($chrs, $c, 2); |
||
| 538 | ++$c; |
||
| 539 | break; |
||
| 540 | case 0xE0 == ($ord_chrs_c & 0xF0): |
||
| 541 | // characters U-00000800 - U-0000FFFF, mask 1110XXXX |
||
| 542 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 543 | $utf8 .= mb_substr($chrs, $c, 3); |
||
| 544 | $c += 2; |
||
| 545 | break; |
||
| 546 | case 0xF0 == ($ord_chrs_c & 0xF8): |
||
| 547 | // characters U-00010000 - U-001FFFFF, mask 11110XXX |
||
| 548 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 549 | $utf8 .= mb_substr($chrs, $c, 4); |
||
| 550 | $c += 3; |
||
| 551 | break; |
||
| 552 | case 0xF8 == ($ord_chrs_c & 0xFC): |
||
| 553 | // characters U-00200000 - U-03FFFFFF, mask 111110XX |
||
| 554 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 555 | $utf8 .= mb_substr($chrs, $c, 5); |
||
| 556 | $c += 4; |
||
| 557 | break; |
||
| 558 | case 0xFC == ($ord_chrs_c & 0xFE): |
||
| 559 | // characters U-04000000 - U-7FFFFFFF, mask 1111110X |
||
| 560 | // see https://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
| 561 | $utf8 .= mb_substr($chrs, $c, 6); |
||
| 562 | $c += 5; |
||
| 563 | break; |
||
| 564 | } |
||
| 565 | } |
||
| 566 | |||
| 567 | return $utf8; |
||
| 568 | } |
||
| 569 | |||
| 570 | if (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) { |
||
| 571 | // array, or object notation |
||
| 572 | |||
| 573 | if ('[' === $str[0]) { |
||
| 574 | $stk = [SERVICES_JSON_IN_ARR]; |
||
| 575 | $arr = []; |
||
| 576 | } elseif ($this->use & SERVICES_JSON_LOOSE_TYPE) { |
||
| 577 | $stk = [SERVICES_JSON_IN_OBJ]; |
||
| 578 | $obj = []; |
||
| 579 | } else { |
||
| 580 | $stk = [SERVICES_JSON_IN_OBJ]; |
||
| 581 | $obj = new stdClass(); |
||
| 582 | } |
||
| 583 | |||
| 584 | array_push( |
||
| 585 | $stk, |
||
| 586 | [ |
||
| 587 | 'what' => SERVICES_JSON_SLICE, |
||
| 588 | 'where' => 0, |
||
| 589 | 'delim' => false, |
||
| 590 | ] |
||
| 591 | ); |
||
| 592 | |||
| 593 | $chrs = mb_substr($str, 1, -1); |
||
| 594 | $chrs = $this->reduce_string($chrs); |
||
| 595 | |||
| 596 | if ('' == $chrs) { |
||
| 597 | if (SERVICES_JSON_IN_ARR == reset($stk)) { |
||
| 598 | return $arr; |
||
| 599 | } |
||
| 600 | |||
| 601 | return $obj; |
||
| 602 | } |
||
| 603 | |||
| 604 | //print("\nparsing {$chrs}\n"); |
||
| 605 | |||
| 606 | $strlen_chrs = mb_strlen($chrs); |
||
| 607 | |||
| 608 | for ($c = 0; $c <= $strlen_chrs; ++$c) { |
||
| 609 | $top = end($stk); |
||
| 610 | $substr_chrs_c_2 = mb_substr($chrs, $c, 2); |
||
| 611 | |||
| 612 | if (($c == $strlen_chrs) || ((',' === $chrs[$c]) && (SERVICES_JSON_SLICE == $top['what']))) { |
||
| 613 | // found a comma that is not inside a string, array, etc., |
||
| 614 | // OR we've reached the end of the character list |
||
| 615 | $slice = mb_substr($chrs, $top['where'], $c - $top['where']); |
||
| 616 | array_push($stk, ['what' => SERVICES_JSON_SLICE, 'where' => $c + 1, 'delim' => false]); |
||
| 617 | //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
| 618 | |||
| 619 | if (SERVICES_JSON_IN_ARR == reset($stk)) { |
||
| 620 | // we are in an array, so just push an element onto the stack |
||
| 621 | $arr[] = $this->decode($slice); |
||
| 622 | } elseif (SERVICES_JSON_IN_OBJ == reset($stk)) { |
||
| 623 | // we are in an object, so figure |
||
| 624 | // out the property name and set an |
||
| 625 | // element in an associative array, |
||
| 626 | // for now |
||
| 627 | $parts = []; |
||
| 628 | |||
| 629 | if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { |
||
| 630 | // "name":value pair |
||
| 631 | $key = $this->decode($parts[1]); |
||
| 632 | $val = $this->decode($parts[2]); |
||
| 633 | |||
| 634 | if ($this->use & SERVICES_JSON_LOOSE_TYPE) { |
||
| 635 | $obj[$key] = $val; |
||
| 636 | } else { |
||
| 637 | $obj->$key = $val; |
||
| 638 | } |
||
| 639 | } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { |
||
| 640 | // name:value pair, where name is unquoted |
||
| 641 | $key = $parts[1]; |
||
| 642 | $val = $this->decode($parts[2]); |
||
| 643 | |||
| 644 | if ($this->use & SERVICES_JSON_LOOSE_TYPE) { |
||
| 645 | $obj[$key] = $val; |
||
| 646 | } else { |
||
| 647 | $obj->$key = $val; |
||
| 648 | } |
||
| 649 | } |
||
| 650 | } |
||
| 651 | } elseif ((('"' === $chrs[$c]) || ("'" === $chrs[$c])) && (SERVICES_JSON_IN_STR != $top['what'])) { |
||
| 652 | // found a quote, and we are not inside a string |
||
| 653 | array_push($stk, ['what' => SERVICES_JSON_IN_STR, 'where' => $c, 'delim' => $chrs[$c]]); |
||
| 654 | //print("Found start of string at {$c}\n"); |
||
| 655 | } elseif (($chrs[$c] == $top['delim']) |
||
| 656 | && (SERVICES_JSON_IN_STR == $top['what']) |
||
| 657 | && (1 != (mb_strlen(mb_substr($chrs, 0, $c)) - mb_strlen(rtrim(mb_substr($chrs, 0, $c), '\\'))) % 2)) { |
||
| 658 | // found a quote, we're in a string, and it's not escaped |
||
| 659 | // we know that it's not escaped becase there is _not_ an |
||
| 660 | // odd number of backslashes at the end of the string so far |
||
| 661 | array_pop($stk); |
||
| 662 | //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n"); |
||
| 663 | } elseif (('[' === $chrs[$c]) |
||
| 664 | && in_array($top['what'], [SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ], true)) { |
||
| 665 | // found a left-bracket, and we are in an array, object, or slice |
||
| 666 | array_push($stk, ['what' => SERVICES_JSON_IN_ARR, 'where' => $c, 'delim' => false]); |
||
| 667 | //print("Found start of array at {$c}\n"); |
||
| 668 | } elseif ((']' === $chrs[$c]) && (SERVICES_JSON_IN_ARR == $top['what'])) { |
||
| 669 | // found a right-bracket, and we're in an array |
||
| 670 | array_pop($stk); |
||
| 671 | //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
| 672 | } elseif (('{' === $chrs[$c]) |
||
| 673 | && in_array($top['what'], [SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ], true)) { |
||
| 674 | // found a left-brace, and we are in an array, object, or slice |
||
| 675 | array_push($stk, ['what' => SERVICES_JSON_IN_OBJ, 'where' => $c, 'delim' => false]); |
||
| 676 | //print("Found start of object at {$c}\n"); |
||
| 677 | } elseif (('}' === $chrs[$c]) && (SERVICES_JSON_IN_OBJ == $top['what'])) { |
||
| 678 | // found a right-brace, and we're in an object |
||
| 679 | array_pop($stk); |
||
| 680 | //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
| 681 | } elseif (('/*' === $substr_chrs_c_2) |
||
| 682 | && in_array($top['what'], [SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ], true)) { |
||
| 683 | // found a comment start, and we are in an array, object, or slice |
||
| 684 | array_push($stk, ['what' => SERVICES_JSON_IN_CMT, 'where' => $c, 'delim' => false]); |
||
| 685 | ++$c; |
||
| 686 | //print("Found start of comment at {$c}\n"); |
||
| 687 | } elseif (('*/' === $substr_chrs_c_2) && (SERVICES_JSON_IN_CMT == $top['what'])) { |
||
| 688 | // found a comment end, and we're in one now |
||
| 689 | array_pop($stk); |
||
| 690 | ++$c; |
||
| 691 | |||
| 692 | for ($i = $top['where']; $i <= $c; ++$i) { |
||
| 693 | $chrs = substr_replace($chrs, ' ', $i, 1); |
||
| 694 | } |
||
| 695 | //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
| 696 | } |
||
| 697 | } |
||
| 698 | |||
| 699 | if (SERVICES_JSON_IN_ARR == reset($stk)) { |
||
| 700 | return $arr; |
||
| 701 | } |
||
| 702 | |||
| 703 | if (SERVICES_JSON_IN_OBJ == reset($stk)) { |
||
| 704 | return $obj; |
||
| 705 | } |
||
| 706 | } |
||
| 707 | } |
||
| 708 | } |
||
| 709 | |||
| 710 | /** |
||
| 711 | * @param $data |
||
| 712 | * @param null $code |
||
| 713 | * @return bool |
||
| 714 | * @todo Ultimately, this should just call PEAR::isError() |
||
| 715 | */ |
||
| 716 | public function isError($data, $code = null): bool |
||
| 729 | } |
||
| 730 | } |
||
| 731 | |||
| 780 |