| Total Complexity | 40 |
| Total Lines | 175 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like similar_text often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use similar_text, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 15 | class similar_text |
||
| 16 | { |
||
| 17 | private function __construct() |
||
| 18 | { |
||
| 19 | } |
||
| 20 | public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition = false) |
||
| 21 | { |
||
| 22 | if (!is_string($a) || !is_string($b)) { |
||
| 23 | return false; |
||
| 24 | } |
||
| 25 | if ($insensitive) { |
||
| 26 | $a = self::strtolower($a); |
||
| 27 | $b = self::strtolower($b); |
||
| 28 | } else { |
||
| 29 | $a = self::split($a); |
||
| 30 | $b = self::split($b); |
||
| 31 | } |
||
| 32 | /* prevent bad types and useless memory usage due to for example array instead of simple boolean */ |
||
| 33 | unset($insensitive); |
||
| 34 | $getParts = (bool) $getParts; |
||
| 35 | /* ******************************************************************************************** */ |
||
| 36 | $ca = count($a); |
||
| 37 | $cb = count($b); |
||
| 38 | if ($ca < $cb) { |
||
| 39 | $stats = self::getStats($cb, $a, self::_check($a, $b, $getParts, $round, $checkposition), $getParts, $round); |
||
| 40 | } else { |
||
| 41 | $stats = self::getStats($ca, $b, self::_check($b, $a, $getParts, $round, $checkposition), $getParts, $round); |
||
| 42 | } |
||
| 43 | return $stats['similar']; |
||
| 44 | } |
||
| 45 | |||
| 46 | protected static function _check($a, $b, $getParts, $round, $checkposition = false) |
||
| 47 | { |
||
| 48 | $diff = array(); |
||
| 49 | if ($getParts) { |
||
| 50 | $diff[] = array_diff($a, $b); |
||
| 51 | $diff[] = array_diff($b, $a); |
||
| 52 | } |
||
| 53 | $diff[] = $checkposition ?array_intersect_assoc($a, $b) : array_intersect($a, $b); |
||
| 54 | $diff[] = round(count(array_intersect(self::getParts($a, $c), self::getParts($b))) / $c * 100, $round); |
||
| 55 | $diff[] = $a === $b; |
||
| 56 | return $diff; |
||
| 57 | } |
||
| 58 | |||
| 59 | protected static function getStats($ca, $b, $diff, $getParts, $round) |
||
| 77 | } |
||
| 78 | |||
| 79 | protected static function getParts($b, &$c = 0, $lengthCapture = false) |
||
| 80 | { |
||
| 81 | $parts = array(); |
||
| 82 | $tmp = ''; |
||
| 83 | $c = 0; |
||
| 84 | $length = 0; |
||
| 85 | $lengthCapture = (bool) $lengthCapture; |
||
| 86 | if ($lengthCapture) { |
||
| 87 | self::capturePartsWithLength($b, $length, $tmp, $c, $parts); |
||
| 88 | } else { |
||
| 89 | self::capturePartsWithoutLength($b, $tmp, $c, $parts); |
||
| 90 | } |
||
| 91 | return $parts; |
||
| 92 | } |
||
| 93 | |||
| 94 | private static function capturePartsWithoutLength(&$b, $tmp, &$c, &$parts) |
||
| 95 | { |
||
| 96 | foreach ($b as $k=>$v) { |
||
| 97 | if (ctype_space($v) || ctype_punct($v)) { |
||
| 98 | $parts[] = $tmp; |
||
| 99 | $parts[] = $v; |
||
| 100 | $c += 2; |
||
| 101 | $tmp = ''; |
||
| 102 | continue; |
||
| 103 | } |
||
| 104 | $tmp .= $v; |
||
| 105 | } |
||
| 106 | if (!empty($tmp)) { |
||
| 107 | $parts[] = $tmp; |
||
| 108 | $c++; |
||
| 109 | } |
||
| 110 | } |
||
| 111 | |||
| 112 | private static function capturePartsWithLength(&$b, $length, $tmp, &$c, &$parts) |
||
| 129 | } |
||
| 130 | } |
||
| 131 | |||
| 132 | |||
| 133 | |||
| 134 | protected static function is_ascii($str) |
||
| 141 | } |
||
| 142 | |||
| 143 | protected static function strtolower($str) |
||
| 161 | } |
||
| 162 | } |
||
| 163 | |||
| 164 | protected static function split($str, $grams = false) |
||
| 165 | { |
||
| 166 | if (!is_string($str)) { |
||
| 167 | return array(); |
||
| 168 | } |
||
| 169 | static $split = []; |
||
| 170 | static $old = ''; |
||
| 171 | static $oldGrams = 1; |
||
| 172 | $grams = is_int($grams) && $grams >= 1 && $grams <= strlen($str) ? $grams : false; |
||
| 173 | return self::getSplit($str, $split, $old, $oldGrams, $grams); |
||
| 174 | } |
||
| 175 | |||
| 176 | private static function _split(&$str, &$split, &$old, &$oldGrams, $grams) |
||
| 177 | { |
||
| 178 | $old = $str; |
||
| 179 | $oldGrams = $grams; |
||
| 180 | $split = !$grams ? preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY) : preg_split('/(.{'.$grams.'})/su', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); |
||
| 181 | return $split; |
||
| 182 | } |
||
| 183 | |||
| 184 | private static function getSplit(&$str, &$split, &$old, &$oldGrams, $grams) |
||
| 190 | } |
||
| 191 | } |
||
| 192 | } |
||
| 195 |