Total Complexity | 40 |
Total Lines | 175 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like similar_text often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use similar_text, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
15 | class similar_text |
||
16 | { |
||
17 | private function __construct() |
||
18 | { |
||
19 | } |
||
20 | public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition = false) |
||
21 | { |
||
22 | if (!is_string($a) || !is_string($b)) { |
||
23 | return false; |
||
24 | } |
||
25 | if ($insensitive) { |
||
26 | $a = self::strtolower($a); |
||
27 | $b = self::strtolower($b); |
||
28 | } else { |
||
29 | $a = self::split($a); |
||
30 | $b = self::split($b); |
||
31 | } |
||
32 | /* prevent bad types and useless memory usage due to for example array instead of simple boolean */ |
||
33 | unset($insensitive); |
||
34 | $getParts = (bool) $getParts; |
||
35 | /* ******************************************************************************************** */ |
||
36 | $ca = count($a); |
||
37 | $cb = count($b); |
||
38 | if ($ca < $cb) { |
||
39 | $stats = self::getStats($cb, $a, self::_check($a, $b, $getParts, $round, $checkposition), $getParts, $round); |
||
40 | } else { |
||
41 | $stats = self::getStats($ca, $b, self::_check($b, $a, $getParts, $round, $checkposition), $getParts, $round); |
||
42 | } |
||
43 | return $stats['similar']; |
||
44 | } |
||
45 | |||
46 | protected static function _check($a, $b, $getParts, $round, $checkposition = false) |
||
47 | { |
||
48 | $diff = array(); |
||
49 | if ($getParts) { |
||
50 | $diff[] = array_diff($a, $b); |
||
51 | $diff[] = array_diff($b, $a); |
||
52 | } |
||
53 | $diff[] = $checkposition ?array_intersect_assoc($a, $b) : array_intersect($a, $b); |
||
54 | $diff[] = round(count(array_intersect(self::getParts($a, $c), self::getParts($b))) / $c * 100, $round); |
||
55 | $diff[] = $a === $b; |
||
56 | return $diff; |
||
57 | } |
||
58 | |||
59 | protected static function getStats($ca, $b, $diff, $getParts, $round) |
||
77 | } |
||
78 | |||
79 | protected static function getParts($b, &$c = 0, $lengthCapture = false) |
||
80 | { |
||
81 | $parts = array(); |
||
82 | $tmp = ''; |
||
83 | $c = 0; |
||
84 | $length = 0; |
||
85 | $lengthCapture = (bool) $lengthCapture; |
||
86 | if ($lengthCapture) { |
||
87 | self::capturePartsWithLength($b, $length, $tmp, $c, $parts); |
||
88 | } else { |
||
89 | self::capturePartsWithoutLength($b, $tmp, $c, $parts); |
||
90 | } |
||
91 | return $parts; |
||
92 | } |
||
93 | |||
94 | private static function capturePartsWithoutLength(&$b, $tmp, &$c, &$parts) |
||
95 | { |
||
96 | foreach ($b as $k=>$v) { |
||
97 | if (ctype_space($v) || ctype_punct($v)) { |
||
98 | $parts[] = $tmp; |
||
99 | $parts[] = $v; |
||
100 | $c += 2; |
||
101 | $tmp = ''; |
||
102 | continue; |
||
103 | } |
||
104 | $tmp .= $v; |
||
105 | } |
||
106 | if (!empty($tmp)) { |
||
107 | $parts[] = $tmp; |
||
108 | $c++; |
||
109 | } |
||
110 | } |
||
111 | |||
112 | private static function capturePartsWithLength(&$b, $length, $tmp, &$c, &$parts) |
||
129 | } |
||
130 | } |
||
131 | |||
132 | |||
133 | |||
134 | protected static function is_ascii($str) |
||
141 | } |
||
142 | |||
143 | protected static function strtolower($str) |
||
161 | } |
||
162 | } |
||
163 | |||
164 | protected static function split($str, $grams = false) |
||
165 | { |
||
166 | if (!is_string($str)) { |
||
167 | return array(); |
||
168 | } |
||
169 | static $split = []; |
||
170 | static $old = ''; |
||
171 | static $oldGrams = 1; |
||
172 | $grams = is_int($grams) && $grams >= 1 && $grams <= strlen($str) ? $grams : false; |
||
173 | return self::getSplit($str, $split, $old, $oldGrams, $grams); |
||
174 | } |
||
175 | |||
176 | private static function _split(&$str, &$split, &$old, &$oldGrams, $grams) |
||
177 | { |
||
178 | $old = $str; |
||
179 | $oldGrams = $grams; |
||
180 | $split = !$grams ? preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY) : preg_split('/(.{'.$grams.'})/su', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); |
||
181 | return $split; |
||
182 | } |
||
183 | |||
184 | private static function getSplit(&$str, &$split, &$old, &$oldGrams, $grams) |
||
190 | } |
||
191 | } |
||
192 | } |
||
195 |