| Total Complexity | 54 |
| Total Lines | 149 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like commonTextSimilarities often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use commonTextSimilarities, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 16 | class commonTextSimilarities extends similar_text |
||
| 17 | { |
||
| 18 | const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol |
||
| 19 | '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username |
||
| 20 | '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password |
||
| 21 | '@)?(?#'. // auth requires @ |
||
| 22 | ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND |
||
| 23 | '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR |
||
| 24 | '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'. |
||
| 25 | '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address |
||
| 26 | ')(:\d+)?'. // port |
||
| 27 | ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path |
||
| 28 | '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string |
||
| 29 | '?)?)?'. // path and query string optional |
||
| 30 | '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment |
||
| 31 | '$/i'; |
||
| 32 | |||
| 33 | |||
| 34 | |||
| 35 | |||
| 36 | const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
||
| 37 | |||
| 38 | protected static function isUrl($url, &$getDomain='') |
||
| 44 | } |
||
| 45 | |||
| 46 | public static function strippedUrl($a, $b) |
||
| 47 | { |
||
| 48 | if (self::isUrl($a, $domain)&&is_string($b)) { |
||
| 49 | return $domain===trim($b); |
||
| 50 | } elseif (self::isUrl($b, $domain)&&is_string($a)) { |
||
| 51 | return $domain===trim($a); |
||
| 52 | } else { |
||
| 53 | return false; |
||
| 54 | } |
||
| 55 | } |
||
| 56 | |||
| 57 | public static function areAnagrams($a, $b) |
||
| 58 | { |
||
| 59 | return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true; |
||
| 60 | } |
||
| 61 | |||
| 62 | public static function similarButNotEqual($a, $b) |
||
| 65 | } |
||
| 66 | |||
| 67 | public static function aIsSuperStringOfB($a, $b) |
||
| 68 | { |
||
| 69 | if (strlen($a)>strlen($b)) { |
||
| 70 | return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0; |
||
| 71 | } else { |
||
| 72 | return false; |
||
| 73 | } |
||
| 74 | } |
||
| 75 | |||
| 76 | public static function haveSameRoot($a, $b) |
||
| 77 | { |
||
| 78 | return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/; |
||
| 79 | } |
||
| 80 | |||
| 81 | public static function areStems($a, $b) |
||
| 82 | { |
||
| 83 | if (!is_string($a) || !is_string($b)) { |
||
| 84 | return false; |
||
| 85 | } |
||
| 86 | |||
| 87 | $a = self::getParts(self::strtolower($a)); |
||
| 88 | $b = self::getParts(self::strtolower($b)); |
||
| 89 | foreach ($a as $index=>$word) { |
||
| 90 | if (!self::haveSameRoot($word, $b[$index])) { |
||
| 91 | return false; |
||
| 92 | } |
||
| 93 | } |
||
| 94 | return true; |
||
| 95 | } |
||
| 96 | |||
| 97 | public static function wordsReorderOccured($a, $b, $considerPunctuation=true) |
||
| 98 | { |
||
| 99 | $filter=function ($v) use ($considerPunctuation) { |
||
| 100 | return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v); |
||
| 101 | }; |
||
| 102 | return |
||
| 103 | self::similarText($a, $b, 2, true, $check, true) && |
||
| 104 | is_array($check) && |
||
| 105 | empty(array_filter($check['a-b'], $filter)) && |
||
| 106 | empty(array_filter($check['b-a'], $filter)) && |
||
| 107 | $check['substr'] && |
||
| 108 | !$check['equal'] |
||
| 109 | ?true |
||
| 110 | :false; |
||
| 111 | } |
||
| 112 | |||
| 113 | public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true) |
||
| 114 | { |
||
| 115 | $filter=function ($v) use ($considerSpace) { |
||
| 116 | return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v); |
||
| 117 | }; |
||
| 118 | if (!is_string($a) || !is_string($b)) { |
||
| 119 | return false; |
||
| 120 | } |
||
| 121 | self::filter($a,$b,$filter,$insensitive); |
||
| 122 | return empty(array_diff($a, $b)); |
||
| 123 | } |
||
| 124 | |||
| 125 | |||
| 126 | public static function acronymOrExpanded($a, $b) |
||
| 127 | { |
||
| 128 | if (!is_string($a) || !is_string($b)) { |
||
| 129 | return false; |
||
| 130 | } |
||
| 131 | $filter=function ($v) { |
||
| 132 | return !(ctype_space($v)||ctype_punct($v)); |
||
| 133 | }; |
||
| 134 | |||
| 135 | self::filter($a,$b,$filter,true); |
||
| 136 | foreach ($a as $index=>$word) { |
||
| 137 | if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) { |
||
| 138 | return false; |
||
| 139 | } |
||
| 140 | } |
||
| 141 | return true; |
||
| 142 | } |
||
| 143 | |||
| 144 | public static function wordsAddedOrRemoved($a, $b) |
||
| 156 | } |
||
| 157 | |||
| 158 | private static function filter(&$a,&$b,$filter,$insensitive=true){ |
||
| 165 | } |
||
| 166 | } |
||
| 167 | } |
||
| 168 | } |
||
| 169 |