Total Complexity | 56 |
Total Lines | 159 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like commonTextSimilarities often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use commonTextSimilarities, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
16 | class commonTextSimilarities extends similar_text |
||
17 | { |
||
18 | const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol |
||
19 | '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username |
||
20 | '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password |
||
21 | '@)?(?#'. // auth requires @ |
||
22 | ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND |
||
23 | '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR |
||
24 | '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'. |
||
25 | '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address |
||
26 | ')(:\d+)?'. // port |
||
27 | ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path |
||
28 | '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string |
||
29 | '?)?)?'. // path and query string optional |
||
30 | '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment |
||
31 | '$/i'; |
||
32 | |||
33 | |||
34 | |||
35 | |||
36 | const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
||
37 | |||
38 | protected static function isUrl($url, &$getDomain='') |
||
44 | } |
||
45 | |||
46 | public static function strippedUrl($a, $b) |
||
47 | { |
||
48 | if (self::isUrl($a, $domain)&&is_string($b)) { |
||
49 | return $domain===trim($b); |
||
50 | } elseif (self::isUrl($b, $domain)&&is_string($a)) { |
||
51 | return $domain===trim($a); |
||
52 | } else { |
||
53 | return false; |
||
54 | } |
||
55 | } |
||
56 | |||
57 | public static function areAnagrams($a, $b) |
||
58 | { |
||
59 | return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true; |
||
60 | } |
||
61 | |||
62 | public static function similarButNotEqual($a, $b) |
||
65 | } |
||
66 | |||
67 | public static function aIsSuperStringOfB($a, $b) |
||
68 | { |
||
69 | if (strlen($a)>strlen($b)) { |
||
70 | return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0; |
||
71 | } else { |
||
72 | return false; |
||
73 | } |
||
74 | } |
||
75 | |||
76 | public static function haveSameRoot($a, $b) |
||
77 | { |
||
78 | return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/; |
||
79 | } |
||
80 | |||
81 | public static function areStems($a, $b) |
||
95 | } |
||
96 | |||
97 | public static function wordsReorderOccured($a, $b, $considerPunctuation=true) |
||
111 | } |
||
112 | |||
113 | public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true) |
||
123 | } |
||
124 | |||
125 | |||
126 | public static function acronymOrExpanded($a, $b) |
||
127 | { |
||
128 | if (!is_string($a) || !is_string($b)) { |
||
129 | return false; |
||
130 | } |
||
131 | $filter=function ($v) { |
||
132 | return !(ctype_space($v)||ctype_punct($v)); |
||
133 | }; |
||
134 | |||
135 | self::filter($a, $b, $filter, true); |
||
136 | return self::aoeStemming($a, $b); |
||
137 | } |
||
138 | |||
139 | private static function aoeStemming($a, $b) |
||
140 | { |
||
141 | foreach ($a as $index=>$word) { |
||
142 | if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) { |
||
143 | return false; |
||
144 | } |
||
145 | } |
||
146 | return true; |
||
147 | } |
||
148 | |||
149 | public static function wordsAddedOrRemoved($a, $b) |
||
150 | { |
||
151 | if (!is_string($a) || !is_string($b)) { |
||
152 | return false; |
||
153 | } |
||
154 | $filter=function ($v) { |
||
155 | return !(ctype_space($v)); |
||
156 | }; |
||
157 | self::filter($a, $b, $filter, true); |
||
158 | return self::waorDiff($a, $b, count($a), count($b)); |
||
159 | } |
||
160 | |||
161 | private static function filter(&$a, &$b, $filter, $insensitive=true) |
||
162 | { |
||
163 | if ($insensitive) { |
||
164 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
||
165 | $b = array_filter(self::getParts(self::strtolower($b)), $filter); |
||
166 | } else { |
||
167 | $a = array_filter(self::getParts(self::split($a)), $filter); |
||
168 | $b = array_filter(self::getParts(self::split($b)), $filter); |
||
169 | } |
||
170 | } |
||
171 | |||
172 | private static function waorDiff($a, $b, $ca, $cb) |
||
175 | } |
||
176 | } |
||
177 | } |
||
178 |