Total Complexity | 122 |
Total Lines | 664 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Services_JSON often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Services_JSON, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
115 | class Services_JSON |
||
116 | { |
||
117 | /** |
||
118 | * constructs a new JSON instance |
||
119 | * |
||
120 | * @param int $use object behavior flags; combine with boolean-OR |
||
121 | * |
||
122 | * possible values: |
||
123 | * - SERVICES_JSON_LOOSE_TYPE: loose typing. |
||
124 | * "{...}" syntax creates associative arrays |
||
125 | * instead of objects in decode(). |
||
126 | * - SERVICES_JSON_SUPPRESS_ERRORS: error suppression. |
||
127 | * Values which can't be encoded (e.g. resources) |
||
128 | * appear as NULL instead of throwing errors. |
||
129 | * By default, a deeply-nested resource will |
||
130 | * bubble up with an error, so all return values |
||
131 | * from encode() should be checked with isError() |
||
132 | */ |
||
133 | public function __construct($use = 0) |
||
134 | { |
||
135 | $this->use = $use; |
||
|
|||
136 | } |
||
137 | |||
138 | /** |
||
139 | * convert a string from one UTF-16 char to one UTF-8 char |
||
140 | * |
||
141 | * Normally should be handled by mb_convert_encoding, but |
||
142 | * provides a slower PHP-only method for installations |
||
143 | * that lack the multibye string extension. |
||
144 | * |
||
145 | * @param string $utf16 UTF-16 character |
||
146 | * @return string UTF-8 character |
||
147 | * @access private |
||
148 | */ |
||
149 | public function utf162utf8($utf16) |
||
150 | { |
||
151 | // oh please oh please oh please oh please oh please |
||
152 | if (function_exists('mb_convert_encoding')) { |
||
153 | return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); |
||
154 | } |
||
155 | |||
156 | $bytes = (ord($utf16{0}) << 8) | ord($utf16{1}); |
||
157 | |||
158 | switch (true) { |
||
159 | case ((0x7F & $bytes) == $bytes): |
||
160 | // this case should never be reached, because we are in ASCII range |
||
161 | // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
162 | return chr(0x7F & $bytes); |
||
163 | |||
164 | case (0x07FF & $bytes) == $bytes: |
||
165 | // return a 2-byte UTF-8 character |
||
166 | // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
167 | return chr(0xC0 | (($bytes >> 6) & 0x1F)) . chr(0x80 | ($bytes & 0x3F)); |
||
168 | |||
169 | case (0xFFFF & $bytes) == $bytes: |
||
170 | // return a 3-byte UTF-8 character |
||
171 | // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
172 | return chr(0xE0 | (($bytes >> 12) & 0x0F)) . chr(0x80 | (($bytes >> 6) & 0x3F)) . chr(0x80 | ($bytes & 0x3F)); |
||
173 | } |
||
174 | |||
175 | // ignoring UTF-32 for now, sorry |
||
176 | return ''; |
||
177 | } |
||
178 | |||
179 | /** |
||
180 | * convert a string from one UTF-8 char to one UTF-16 char |
||
181 | * |
||
182 | * Normally should be handled by mb_convert_encoding, but |
||
183 | * provides a slower PHP-only method for installations |
||
184 | * that lack the multibye string extension. |
||
185 | * |
||
186 | * @param string $utf8 UTF-8 character |
||
187 | * @return string UTF-16 character |
||
188 | * @access private |
||
189 | */ |
||
190 | public function utf82utf16($utf8) |
||
191 | { |
||
192 | // oh please oh please oh please oh please oh please |
||
193 | if (function_exists('mb_convert_encoding')) { |
||
194 | return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8'); |
||
195 | } |
||
196 | |||
197 | switch (strlen($utf8)) { |
||
198 | case 1: |
||
199 | // this case should never be reached, because we are in ASCII range |
||
200 | // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
201 | return $utf8; |
||
202 | |||
203 | case 2: |
||
204 | // return a UTF-16 character from a 2-byte UTF-8 char |
||
205 | // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
206 | return chr(0x07 & (ord($utf8{0}) >> 2)) . chr( |
||
207 | (0xC0 & (ord($utf8{0}) << 6)) | (0x3F & ord($utf8{1})) |
||
208 | ); |
||
209 | |||
210 | case 3: |
||
211 | // return a UTF-16 character from a 3-byte UTF-8 char |
||
212 | // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
213 | return chr( |
||
214 | (0xF0 & (ord($utf8{0}) << 4)) | (0x0F & (ord($utf8{1}) >> 2)) |
||
215 | ) . chr( |
||
216 | (0xC0 & (ord($utf8{1}) << 6)) | (0x7F & ord($utf8{2})) |
||
217 | ); |
||
218 | } |
||
219 | |||
220 | // ignoring UTF-32 for now, sorry |
||
221 | return ''; |
||
222 | } |
||
223 | |||
224 | /** |
||
225 | * encodes an arbitrary variable into JSON format |
||
226 | * |
||
227 | * @param mixed $var any number, boolean, string, array, or object to be encoded. |
||
228 | * see argument 1 to Services_JSON() above for array-parsing behavior. |
||
229 | * if var is a strng, note that encode() always expects it |
||
230 | * to be in ASCII or UTF-8 format! |
||
231 | * |
||
232 | * @return mixed JSON string representation of input var or an error if a problem occurs |
||
233 | * @access public |
||
234 | */ |
||
235 | public function encode($var) |
||
433 | } |
||
434 | } |
||
435 | |||
436 | /** |
||
437 | * array-walking function for use in generating JSON-formatted name-value pairs |
||
438 | * |
||
439 | * @param string $name name of key to use |
||
440 | * @param mixed $value reference to an array element to be encoded |
||
441 | * |
||
442 | * @return string JSON-formatted name-value pair, like '"name":value' |
||
443 | * @access private |
||
444 | */ |
||
445 | public function name_value($name, $value) |
||
454 | } |
||
455 | |||
456 | /** |
||
457 | * reduce a string by removing leading and trailing comments and whitespace |
||
458 | * |
||
459 | * @param $str string string value to strip of comments and whitespace |
||
460 | * |
||
461 | * @return string string value stripped of comments and whitespace |
||
462 | * @access private |
||
463 | */ |
||
464 | public function reduce_string($str) |
||
465 | { |
||
466 | $str = preg_replace( |
||
467 | [ |
||
468 | |||
469 | // eliminate single line comments in '// ...' form |
||
470 | '#^\s*//(.+)$#m', |
||
471 | |||
472 | // eliminate multi-line comments in '/* ... */' form, at start of string |
||
473 | '#^\s*/\*(.+)\*/#Us', |
||
474 | |||
475 | // eliminate multi-line comments in '/* ... */' form, at end of string |
||
476 | '#/\*(.+)\*/\s*$#Us' |
||
477 | |||
478 | ], |
||
479 | '', |
||
480 | $str |
||
481 | ); |
||
482 | |||
483 | // eliminate extraneous space |
||
484 | return trim($str); |
||
485 | } |
||
486 | |||
487 | /** |
||
488 | * decodes a JSON string into appropriate variable |
||
489 | * |
||
490 | * @param string $str JSON-formatted string |
||
491 | * |
||
492 | * @return mixed number, boolean, string, array, or object |
||
493 | * corresponding to given JSON input string. |
||
494 | * See argument 1 to Services_JSON() above for object-output behavior. |
||
495 | * Note that decode() always returns strings |
||
496 | * in ASCII or UTF-8 format! |
||
497 | * @access public |
||
498 | */ |
||
499 | public function decode($str) |
||
500 | { |
||
501 | $str = $this->reduce_string($str); |
||
502 | |||
503 | switch (strtolower($str)) { |
||
504 | case 'true': |
||
505 | return true; |
||
506 | |||
507 | case 'false': |
||
508 | return false; |
||
509 | |||
510 | case 'null': |
||
511 | return null; |
||
512 | |||
513 | default: |
||
514 | $m = []; |
||
515 | |||
516 | if (is_numeric($str)) { |
||
517 | // Lookie-loo, it's a number |
||
518 | |||
519 | // This would work on its own, but I'm trying to be |
||
520 | // good about returning integers where appropriate: |
||
521 | // return (float)$str; |
||
522 | |||
523 | // Return float or int, as appropriate |
||
524 | return ((float)$str == (integer)$str) ? (integer)$str : (float)$str; |
||
525 | } elseif (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) { |
||
526 | // STRINGS RETURNED IN UTF-8 FORMAT |
||
527 | $delim = substr($str, 0, 1); |
||
528 | $chrs = substr($str, 1, -1); |
||
529 | $utf8 = ''; |
||
530 | $strlen_chrs = strlen($chrs); |
||
531 | |||
532 | for ($c = 0; $c < $strlen_chrs; ++$c) { |
||
533 | $substr_chrs_c_2 = substr($chrs, $c, 2); |
||
534 | $ord_chrs_c = ord($chrs{$c}); |
||
535 | |||
536 | switch (true) { |
||
537 | case '\b' == $substr_chrs_c_2: |
||
538 | $utf8 .= chr(0x08); |
||
539 | ++$c; |
||
540 | break; |
||
541 | case '\t' == $substr_chrs_c_2: |
||
542 | $utf8 .= chr(0x09); |
||
543 | ++$c; |
||
544 | break; |
||
545 | case '\n' == $substr_chrs_c_2: |
||
546 | $utf8 .= chr(0x0A); |
||
547 | ++$c; |
||
548 | break; |
||
549 | case '\f' == $substr_chrs_c_2: |
||
550 | $utf8 .= chr(0x0C); |
||
551 | ++$c; |
||
552 | break; |
||
553 | case '\r' == $substr_chrs_c_2: |
||
554 | $utf8 .= chr(0x0D); |
||
555 | ++$c; |
||
556 | break; |
||
557 | |||
558 | case '\\"' == $substr_chrs_c_2: |
||
559 | case '\\\'' == $substr_chrs_c_2: |
||
560 | case '\\\\' == $substr_chrs_c_2: |
||
561 | case '\\/' == $substr_chrs_c_2: |
||
562 | if (('"' == $delim && '\\\'' != $substr_chrs_c_2) |
||
563 | || ("'" == $delim && '\\"' != $substr_chrs_c_2)) { |
||
564 | $utf8 .= $chrs{++$c}; |
||
565 | } |
||
566 | break; |
||
567 | |||
568 | case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)): |
||
569 | // single, escaped unicode character |
||
570 | $utf16 = chr(hexdec(substr($chrs, ($c + 2), 2))) . chr(hexdec(substr($chrs, ($c + 4), 2))); |
||
571 | $utf8 .= $this->utf162utf8($utf16); |
||
572 | $c += 5; |
||
573 | break; |
||
574 | |||
575 | case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): |
||
576 | $utf8 .= $chrs{$c}; |
||
577 | break; |
||
578 | |||
579 | case 0xC0 == ($ord_chrs_c & 0xE0): |
||
580 | // characters U-00000080 - U-000007FF, mask 110XXXXX |
||
581 | //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
582 | $utf8 .= substr($chrs, $c, 2); |
||
583 | ++$c; |
||
584 | break; |
||
585 | |||
586 | case 0xE0 == ($ord_chrs_c & 0xF0): |
||
587 | // characters U-00000800 - U-0000FFFF, mask 1110XXXX |
||
588 | // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
589 | $utf8 .= substr($chrs, $c, 3); |
||
590 | $c += 2; |
||
591 | break; |
||
592 | |||
593 | case 0xF0 == ($ord_chrs_c & 0xF8): |
||
594 | // characters U-00010000 - U-001FFFFF, mask 11110XXX |
||
595 | // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
596 | $utf8 .= substr($chrs, $c, 4); |
||
597 | $c += 3; |
||
598 | break; |
||
599 | |||
600 | case 0xF8 == ($ord_chrs_c & 0xFC): |
||
601 | // characters U-00200000 - U-03FFFFFF, mask 111110XX |
||
602 | // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
603 | $utf8 .= substr($chrs, $c, 5); |
||
604 | $c += 4; |
||
605 | break; |
||
606 | |||
607 | case 0xFC == ($ord_chrs_c & 0xFE): |
||
608 | // characters U-04000000 - U-7FFFFFFF, mask 1111110X |
||
609 | // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
||
610 | $utf8 .= substr($chrs, $c, 6); |
||
611 | $c += 5; |
||
612 | break; |
||
613 | } |
||
614 | } |
||
615 | |||
616 | return $utf8; |
||
617 | } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) { |
||
618 | // array, or object notation |
||
619 | |||
620 | if ('[' == $str{0}) { |
||
621 | $stk = [SERVICES_JSON_IN_ARR]; |
||
622 | $arr = []; |
||
623 | } else { |
||
624 | if ($this->use & SERVICES_JSON_LOOSE_TYPE) { |
||
625 | $stk = [SERVICES_JSON_IN_OBJ]; |
||
626 | $obj = []; |
||
627 | } else { |
||
628 | $stk = [SERVICES_JSON_IN_OBJ]; |
||
629 | $obj = new stdClass(); |
||
630 | } |
||
631 | } |
||
632 | |||
633 | array_push( |
||
634 | $stk, |
||
635 | [ |
||
636 | 'what' => SERVICES_JSON_SLICE, |
||
637 | 'where' => 0, |
||
638 | 'delim' => false |
||
639 | ] |
||
640 | ); |
||
641 | |||
642 | $chrs = substr($str, 1, -1); |
||
643 | $chrs = $this->reduce_string($chrs); |
||
644 | |||
645 | if ('' == $chrs) { |
||
646 | if (SERVICES_JSON_IN_ARR == reset($stk)) { |
||
647 | return $arr; |
||
648 | } else { |
||
649 | return $obj; |
||
650 | } |
||
651 | } |
||
652 | |||
653 | //print("\nparsing {$chrs}\n"); |
||
654 | |||
655 | $strlen_chrs = strlen($chrs); |
||
656 | |||
657 | for ($c = 0; $c <= $strlen_chrs; ++$c) { |
||
658 | $top = end($stk); |
||
659 | $substr_chrs_c_2 = substr($chrs, $c, 2); |
||
660 | |||
661 | if (($c == $strlen_chrs) || ((',' == $chrs{$c}) && (SERVICES_JSON_SLICE == $top['what']))) { |
||
662 | // found a comma that is not inside a string, array, etc., |
||
663 | // OR we've reached the end of the character list |
||
664 | $slice = substr($chrs, $top['where'], ($c - $top['where'])); |
||
665 | array_push($stk, ['what' => SERVICES_JSON_SLICE, 'where' => ($c + 1), 'delim' => false]); |
||
666 | //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
667 | |||
668 | if (SERVICES_JSON_IN_ARR == reset($stk)) { |
||
669 | // we are in an array, so just push an element onto the stack |
||
670 | array_push($arr, $this->decode($slice)); |
||
671 | } elseif (SERVICES_JSON_IN_OBJ == reset($stk)) { |
||
672 | // we are in an object, so figure |
||
673 | // out the property name and set an |
||
674 | // element in an associative array, |
||
675 | // for now |
||
676 | $parts = []; |
||
677 | |||
678 | if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { |
||
679 | // "name":value pair |
||
680 | $key = $this->decode($parts[1]); |
||
681 | $val = $this->decode($parts[2]); |
||
682 | |||
683 | if ($this->use & SERVICES_JSON_LOOSE_TYPE) { |
||
684 | $obj[$key] = $val; |
||
685 | } else { |
||
686 | $obj->$key = $val; |
||
687 | } |
||
688 | } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { |
||
689 | // name:value pair, where name is unquoted |
||
690 | $key = $parts[1]; |
||
691 | $val = $this->decode($parts[2]); |
||
692 | |||
693 | if ($this->use & SERVICES_JSON_LOOSE_TYPE) { |
||
694 | $obj[$key] = $val; |
||
695 | } else { |
||
696 | $obj->$key = $val; |
||
697 | } |
||
698 | } |
||
699 | } |
||
700 | } elseif ((('"' == $chrs{$c}) || ("'" == $chrs{$c})) && (SERVICES_JSON_IN_STR != $top['what'])) { |
||
701 | // found a quote, and we are not inside a string |
||
702 | array_push($stk, ['what' => SERVICES_JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}]); |
||
703 | //print("Found start of string at {$c}\n"); |
||
704 | |||
705 | } elseif (($chrs{$c} == $top['delim']) |
||
706 | && (SERVICES_JSON_IN_STR == $top['what']) |
||
707 | && (1 != (strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2)) { |
||
708 | // found a quote, we're in a string, and it's not escaped |
||
709 | // we know that it's not escaped becase there is _not_ an |
||
710 | // odd number of backslashes at the end of the string so far |
||
711 | array_pop($stk); |
||
712 | //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n"); |
||
713 | |||
714 | } elseif (('[' == $chrs{$c}) |
||
715 | && in_array($top['what'], [SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ])) { |
||
716 | // found a left-bracket, and we are in an array, object, or slice |
||
717 | array_push($stk, ['what' => SERVICES_JSON_IN_ARR, 'where' => $c, 'delim' => false]); |
||
718 | //print("Found start of array at {$c}\n"); |
||
719 | |||
720 | } elseif ((']' == $chrs{$c}) && (SERVICES_JSON_IN_ARR == $top['what'])) { |
||
721 | // found a right-bracket, and we're in an array |
||
722 | array_pop($stk); |
||
723 | //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
724 | |||
725 | } elseif (('{' == $chrs{$c}) |
||
726 | && in_array($top['what'], [SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ])) { |
||
727 | // found a left-brace, and we are in an array, object, or slice |
||
728 | array_push($stk, ['what' => SERVICES_JSON_IN_OBJ, 'where' => $c, 'delim' => false]); |
||
729 | //print("Found start of object at {$c}\n"); |
||
730 | |||
731 | } elseif (('}' == $chrs{$c}) && (SERVICES_JSON_IN_OBJ == $top['what'])) { |
||
732 | // found a right-brace, and we're in an object |
||
733 | array_pop($stk); |
||
734 | //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
735 | |||
736 | } elseif (('/*' == $substr_chrs_c_2) |
||
737 | && in_array($top['what'], [SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ])) { |
||
738 | // found a comment start, and we are in an array, object, or slice |
||
739 | array_push($stk, ['what' => SERVICES_JSON_IN_CMT, 'where' => $c, 'delim' => false]); |
||
740 | $c++; |
||
741 | //print("Found start of comment at {$c}\n"); |
||
742 | |||
743 | } elseif (('*/' == $substr_chrs_c_2) && (SERVICES_JSON_IN_CMT == $top['what'])) { |
||
744 | // found a comment end, and we're in one now |
||
745 | array_pop($stk); |
||
746 | $c++; |
||
747 | |||
748 | for ($i = $top['where']; $i <= $c; ++$i) { |
||
749 | $chrs = substr_replace($chrs, ' ', $i, 1); |
||
750 | } |
||
751 | //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); |
||
752 | |||
753 | } |
||
754 | } |
||
755 | |||
756 | if (SERVICES_JSON_IN_ARR == reset($stk)) { |
||
757 | return $arr; |
||
758 | } elseif (SERVICES_JSON_IN_OBJ == reset($stk)) { |
||
759 | return $obj; |
||
760 | } |
||
761 | } |
||
762 | } |
||
763 | } |
||
764 | |||
765 | /** |
||
766 | * @todo Ultimately, this should just call PEAR::isError() |
||
767 | */ |
||
768 | public function isError($data, $code = null) |
||
779 | } |
||
780 | } |
||
781 | |||
782 | if (class_exists('PEAR_Error')) { |
||
783 | class Services_JSON_Error extends PEAR_Error |
||
813 |