Total Complexity | 53 |
Total Lines | 149 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like commonTextSimilarities often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use commonTextSimilarities, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
16 | class commonTextSimilarities extends similar_text |
||
17 | { |
||
18 | const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol |
||
19 | '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username |
||
20 | '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password |
||
21 | '@)?(?#'. // auth requires @ |
||
22 | ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND |
||
23 | '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR |
||
24 | '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'. |
||
25 | '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address |
||
26 | ')(:\d+)?'. // port |
||
27 | ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path |
||
28 | '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string |
||
29 | '?)?)?'. // path and query string optional |
||
30 | '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment |
||
31 | '$/i'; |
||
32 | |||
33 | |||
34 | |||
35 | |||
36 | const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
||
37 | |||
38 | protected static function isUrl($url, &$getDomain='') |
||
43 | } |
||
44 | |||
45 | public static function strippedUrl($a, $b) |
||
46 | { |
||
47 | if (self::isUrl($a, $domain)&&is_string($b)) { |
||
48 | return $domain===trim($b); |
||
49 | } elseif (self::isUrl($b, $domain)&&is_string($a)) { |
||
50 | return $domain===trim($a); |
||
51 | } else { |
||
52 | return false; |
||
53 | } |
||
54 | } |
||
55 | |||
56 | public static function areAnagrams($a, $b) |
||
57 | { |
||
58 | return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true; |
||
59 | } |
||
60 | |||
61 | public static function similarButNotEqual($a, $b) |
||
64 | } |
||
65 | |||
66 | public static function aIsSuperStringOfB($a, $b) |
||
67 | { |
||
68 | if (strlen($a)>strlen($b)) { |
||
69 | return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0; |
||
70 | } else { |
||
71 | return false; |
||
72 | } |
||
73 | } |
||
74 | |||
75 | public static function haveSameRoot($a, $b) |
||
76 | { |
||
77 | return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/; |
||
78 | } |
||
79 | |||
80 | public static function areStems($a, $b) |
||
81 | { |
||
82 | if (!is_string($a) || !is_string($b)) { |
||
83 | return false; |
||
84 | } |
||
85 | |||
86 | $a = self::getParts(self::strtolower($a)); |
||
87 | $b = self::getParts(self::strtolower($b)); |
||
88 | foreach ($a as $index=>$word) { |
||
89 | if (!self::haveSameRoot($word, $b[$index])) { |
||
90 | return false; |
||
91 | } |
||
92 | } |
||
93 | return true; |
||
94 | } |
||
95 | |||
96 | public static function wordsReorderOccured($a, $b, $considerPunctuation=true) |
||
97 | { |
||
98 | $filter=function ($v) use ($considerPunctuation) { |
||
99 | return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v); |
||
100 | }; |
||
101 | return |
||
102 | self::similarText($a, $b, 2, true, $check, true) && |
||
103 | is_array($check) && |
||
104 | empty(array_filter($check['a-b'], $filter)) && |
||
105 | empty(array_filter($check['b-a'], $filter)) && |
||
106 | $check['substr'] && |
||
107 | !$check['equal'] |
||
108 | ?true |
||
109 | :false; |
||
110 | } |
||
111 | |||
112 | public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true) |
||
113 | { |
||
114 | $filter=function ($v) use ($considerSpace) { |
||
115 | return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v); |
||
116 | }; |
||
117 | if (!is_string($a) || !is_string($b)) { |
||
118 | return false; |
||
119 | } |
||
120 | if ($insensitive) { |
||
121 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
||
122 | $b = array_filter(self::getParts(self::strtolower($b)), $filter); |
||
123 | } else { |
||
124 | $a = array_filter(self::getParts(self::split($a)), $filter); |
||
125 | $b = array_filter(self::getParts(self::split($b)), $filter); |
||
126 | } |
||
127 | |||
128 | return empty(array_diff($a, $b)); |
||
129 | } |
||
130 | |||
131 | |||
132 | public static function acronymOrExpanded($a, $b) |
||
133 | { |
||
134 | if (!is_string($a) || !is_string($b)) { |
||
135 | return false; |
||
136 | } |
||
137 | $filter=function ($v) { |
||
138 | return !(ctype_space($v)||ctype_punct($v)); |
||
139 | }; |
||
140 | |||
141 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
||
142 | $b = array_filter(self::getParts(self::strtolower($b)), $filter); |
||
143 | foreach ($a as $index=>$word) { |
||
144 | if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) { |
||
145 | return false; |
||
146 | } |
||
147 | } |
||
148 | return true; |
||
149 | } |
||
150 | |||
151 | public static function wordsAddedOrRemoved($a, $b) |
||
165 | } |
||
166 | } |
||
167 | } |
||
168 |