| Total Complexity | 56 |
| Total Lines | 159 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like commonTextSimilarities often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use commonTextSimilarities, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 16 | class commonTextSimilarities extends similar_text |
||
| 17 | { |
||
| 18 | const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol |
||
| 19 | '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username |
||
| 20 | '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password |
||
| 21 | '@)?(?#'. // auth requires @ |
||
| 22 | ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND |
||
| 23 | '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR |
||
| 24 | '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'. |
||
| 25 | '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address |
||
| 26 | ')(:\d+)?'. // port |
||
| 27 | ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path |
||
| 28 | '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string |
||
| 29 | '?)?)?'. // path and query string optional |
||
| 30 | '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment |
||
| 31 | '$/i'; |
||
| 32 | |||
| 33 | |||
| 34 | |||
| 35 | |||
| 36 | const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
||
| 37 | |||
| 38 | protected static function isUrl($url, &$getDomain='') |
||
| 44 | } |
||
| 45 | |||
| 46 | public static function strippedUrl($a, $b) |
||
| 47 | { |
||
| 48 | if (self::isUrl($a, $domain)&&is_string($b)) { |
||
| 49 | return $domain===trim($b); |
||
| 50 | } elseif (self::isUrl($b, $domain)&&is_string($a)) { |
||
| 51 | return $domain===trim($a); |
||
| 52 | } else { |
||
| 53 | return false; |
||
| 54 | } |
||
| 55 | } |
||
| 56 | |||
| 57 | public static function areAnagrams($a, $b) |
||
| 58 | { |
||
| 59 | return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true; |
||
| 60 | } |
||
| 61 | |||
| 62 | public static function similarButNotEqual($a, $b) |
||
| 65 | } |
||
| 66 | |||
| 67 | public static function aIsSuperStringOfB($a, $b) |
||
| 68 | { |
||
| 69 | if (strlen($a)>strlen($b)) { |
||
| 70 | return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0; |
||
| 71 | } else { |
||
| 72 | return false; |
||
| 73 | } |
||
| 74 | } |
||
| 75 | |||
| 76 | public static function haveSameRoot($a, $b) |
||
| 77 | { |
||
| 78 | return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/; |
||
| 79 | } |
||
| 80 | |||
| 81 | public static function areStems($a, $b) |
||
| 95 | } |
||
| 96 | |||
| 97 | public static function wordsReorderOccured($a, $b, $considerPunctuation=true) |
||
| 111 | } |
||
| 112 | |||
| 113 | public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true) |
||
| 123 | } |
||
| 124 | |||
| 125 | |||
| 126 | public static function acronymOrExpanded($a, $b) |
||
| 127 | { |
||
| 128 | if (!is_string($a) || !is_string($b)) { |
||
| 129 | return false; |
||
| 130 | } |
||
| 131 | $filter=function ($v) { |
||
| 132 | return !(ctype_space($v)||ctype_punct($v)); |
||
| 133 | }; |
||
| 134 | |||
| 135 | self::filter($a, $b, $filter, true); |
||
| 136 | return self::aoeStemming($a, $b); |
||
| 137 | } |
||
| 138 | |||
| 139 | private static function aoeStemming($a, $b) |
||
| 140 | { |
||
| 141 | foreach ($a as $index=>$word) { |
||
| 142 | if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) { |
||
| 143 | return false; |
||
| 144 | } |
||
| 145 | } |
||
| 146 | return true; |
||
| 147 | } |
||
| 148 | |||
| 149 | public static function wordsAddedOrRemoved($a, $b) |
||
| 150 | { |
||
| 151 | if (!is_string($a) || !is_string($b)) { |
||
| 152 | return false; |
||
| 153 | } |
||
| 154 | $filter=function ($v) { |
||
| 155 | return !(ctype_space($v)); |
||
| 156 | }; |
||
| 157 | self::filter($a, $b, $filter, true); |
||
| 158 | return self::waorDiff($a, $b, count($a), count($b)); |
||
| 159 | } |
||
| 160 | |||
| 161 | private static function filter(&$a, &$b, $filter, $insensitive=true) |
||
| 162 | { |
||
| 163 | if ($insensitive) { |
||
| 164 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
||
| 165 | $b = array_filter(self::getParts(self::strtolower($b)), $filter); |
||
| 166 | } else { |
||
| 167 | $a = array_filter(self::getParts(self::split($a)), $filter); |
||
| 168 | $b = array_filter(self::getParts(self::split($b)), $filter); |
||
| 169 | } |
||
| 170 | } |
||
| 171 | |||
| 172 | private static function waorDiff($a, $b, $ca, $cb) |
||
| 175 | } |
||
| 176 | } |
||
| 177 | } |
||
| 178 |