@@ -17,7 +17,7 @@ discard block |
||
| 17 | 17 | private function __construct() |
| 18 | 18 | { |
| 19 | 19 | } |
| 20 | - public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition=false) |
|
| 20 | + public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition = false) |
|
| 21 | 21 | { |
| 22 | 22 | if (!is_string($a) || !is_string($b)) { |
| 23 | 23 | return false; |
@@ -43,14 +43,14 @@ discard block |
||
| 43 | 43 | return $stats['similar']; |
| 44 | 44 | } |
| 45 | 45 | |
| 46 | - protected static function _check($a, $b, $getParts, $round, $checkposition=false) |
|
| 46 | + protected static function _check($a, $b, $getParts, $round, $checkposition = false) |
|
| 47 | 47 | { |
| 48 | 48 | $diff = array(); |
| 49 | 49 | if ($getParts) { |
| 50 | 50 | $diff[] = array_diff($a, $b); |
| 51 | 51 | $diff[] = array_diff($b, $a); |
| 52 | 52 | } |
| 53 | - $diff[] = $checkposition?array_intersect_assoc($a, $b):array_intersect($a, $b); |
|
| 53 | + $diff[] = $checkposition ?array_intersect_assoc($a, $b) : array_intersect($a, $b); |
|
| 54 | 54 | $diff[] = round(count(array_intersect(self::getParts($a, $c), self::getParts($b))) / $c * 100, $round); |
| 55 | 55 | $diff[] = $a === $b; |
| 56 | 56 | return $diff; |
@@ -82,7 +82,7 @@ discard block |
||
| 82 | 82 | $tmp = ''; |
| 83 | 83 | $c = 0; |
| 84 | 84 | foreach ($b as $k=>$v) { |
| 85 | - if (ctype_space($v)||ctype_punct($v)) { |
|
| 85 | + if (ctype_space($v) || ctype_punct($v)) { |
|
| 86 | 86 | $parts[] = $tmp; |
| 87 | 87 | $parts[] = $v; |
| 88 | 88 | $c += 2; |
@@ -115,7 +115,7 @@ discard block |
||
| 115 | 115 | if (is_array($split)) { |
| 116 | 116 | return |
| 117 | 117 | array_map( |
| 118 | - function ($val) { |
|
| 118 | + function($val) { |
|
| 119 | 119 | if (self::is_ascii($val)) { |
| 120 | 120 | return strtolower($val); |
| 121 | 121 | } |
@@ -20,7 +20,7 @@ discard block |
||
| 20 | 20 | $insensitive = true, |
| 21 | 21 | &$stats = false, |
| 22 | 22 | $getParts = false, |
| 23 | - $checkposition=false |
|
| 23 | + $checkposition = false |
|
| 24 | 24 | ) { |
| 25 | 25 | return similar_text::similarText( |
| 26 | 26 | $firstString, |
@@ -52,12 +52,12 @@ discard block |
||
| 52 | 52 | return commonTextSimilarities::haveSameRoot($a, $b); |
| 53 | 53 | } |
| 54 | 54 | |
| 55 | - function wordsReorderOccured($a, $b, $considerPunctuation=true) |
|
| 55 | + function wordsReorderOccured($a, $b, $considerPunctuation = true) |
|
| 56 | 56 | { |
| 57 | 57 | return commonTextSimilarities::wordsReorderOccured($a, $b, $considerPunctuation); |
| 58 | 58 | } |
| 59 | 59 | |
| 60 | - function punctuactionChangesOccured($a, $b, $considerSpace=true) |
|
| 60 | + function punctuactionChangesOccured($a, $b, $considerSpace = true) |
|
| 61 | 61 | { |
| 62 | 62 | return commonTextSimilarities::punctuactionChangesOccured($a, $b, $considerSpace); |
| 63 | 63 | } |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | namespace Ezama\tests{ |
| 3 | - require($dir=dirname(__DIR__)).DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'similar_text.php'; |
|
| 3 | + require($dir = dirname(__DIR__)).DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'similar_text.php'; |
|
| 4 | 4 | require $dir.DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'commonTextSimilarities.php'; |
| 5 | 5 | require $dir.DIRECTORY_SEPARATOR.'similar_text.php'; |
| 6 | 6 | |
@@ -10,10 +10,10 @@ discard block |
||
| 10 | 10 | { |
| 11 | 11 | public function testSimilarText() |
| 12 | 12 | { |
| 13 | - $this->assertTrue(100.0===similarText('qwerty', 'ytrewq')); |
|
| 14 | - $this->assertTrue(similarText('qwerty', 'ytreq')>=80); |
|
| 13 | + $this->assertTrue(100.0 === similarText('qwerty', 'ytrewq')); |
|
| 14 | + $this->assertTrue(similarText('qwerty', 'ytreq') >= 80); |
|
| 15 | 15 | $this->assertTrue(areAnagrams('qwerty', 'ytrewq')); |
| 16 | - $this->assertTrue(0.0===similarText('qwerty', ';lkjhg')); |
|
| 16 | + $this->assertTrue(0.0 === similarText('qwerty', ';lkjhg')); |
|
| 17 | 17 | $this->assertTrue(haveSameRoot('qwerty', 'qwertyuiop')); |
| 18 | 18 | $this->assertTrue(wordsReorderOccured('joker is a cloon.', 'a cloon is joker')); |
| 19 | 19 | $this->assertTrue(similarButNotEqual('qwerty', 'ytrewq')); |
@@ -155,14 +155,14 @@ |
||
| 155 | 155 | return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a))); |
| 156 | 156 | } |
| 157 | 157 | |
| 158 | - private static function filter(&$a,&$b,$filter,$insensitive=true){ |
|
| 159 | - if ($insensitive) { |
|
| 158 | + private static function filter(&$a,&$b,$filter,$insensitive=true){ |
|
| 159 | + if ($insensitive) { |
|
| 160 | 160 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
| 161 | 161 | $b = array_filter(self::getParts(self::strtolower($b)), $filter); |
| 162 | 162 | } else { |
| 163 | 163 | $a = array_filter(self::getParts(self::split($a)), $filter); |
| 164 | 164 | $b = array_filter(self::getParts(self::split($b)), $filter); |
| 165 | 165 | } |
| 166 | - } |
|
| 166 | + } |
|
| 167 | 167 | } |
| 168 | 168 | } |
@@ -15,40 +15,40 @@ discard block |
||
| 15 | 15 | |
| 16 | 16 | class commonTextSimilarities extends similar_text |
| 17 | 17 | { |
| 18 | - const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol |
|
| 19 | - '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username |
|
| 20 | - '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password |
|
| 21 | - '@)?(?#'. // auth requires @ |
|
| 22 | - ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND |
|
| 23 | - '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR |
|
| 18 | + const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'.// protocol |
|
| 19 | + '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'.// username |
|
| 20 | + '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'.// password |
|
| 21 | + '@)?(?#'.// auth requires @ |
|
| 22 | + ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'.// domain segments AND |
|
| 23 | + '[a-z][a-z0-9-]*[a-z0-9]'.// top level domain OR |
|
| 24 | 24 | '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'. |
| 25 | - '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address |
|
| 26 | - ')(:\d+)?'. // port |
|
| 27 | - ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path |
|
| 28 | - '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string |
|
| 29 | - '?)?)?'. // path and query string optional |
|
| 30 | - '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment |
|
| 25 | + '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'.// IP address |
|
| 26 | + ')(:\d+)?'.// port |
|
| 27 | + ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'.// path |
|
| 28 | + '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'.// query string |
|
| 29 | + '?)?)?'.// path and query string optional |
|
| 30 | + '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'.// fragment |
|
| 31 | 31 | '$/i'; |
| 32 | 32 | |
| 33 | 33 | |
| 34 | 34 | |
| 35 | 35 | |
| 36 | - const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
|
| 36 | + const URL_POSIX_FORMAT = '"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
|
| 37 | 37 | |
| 38 | - protected static function isUrl($url, &$getDomain='') |
|
| 38 | + protected static function isUrl($url, &$getDomain = '') |
|
| 39 | 39 | { |
| 40 | - $matches=array(); |
|
| 41 | - $bool= is_string($url)&&preg_match(self::URL_POSIX_FORMAT, $url)&&preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/; |
|
| 42 | - $getDomain=rtrim($matches[9], '.'); |
|
| 40 | + $matches = array(); |
|
| 41 | + $bool = is_string($url) && preg_match(self::URL_POSIX_FORMAT, $url) && preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/; |
|
| 42 | + $getDomain = rtrim($matches[9], '.'); |
|
| 43 | 43 | return $bool; |
| 44 | 44 | } |
| 45 | 45 | |
| 46 | 46 | public static function strippedUrl($a, $b) |
| 47 | 47 | { |
| 48 | - if (self::isUrl($a, $domain)&&is_string($b)) { |
|
| 49 | - return $domain===trim($b); |
|
| 50 | - } elseif (self::isUrl($b, $domain)&&is_string($a)) { |
|
| 51 | - return $domain===trim($a); |
|
| 48 | + if (self::isUrl($a, $domain) && is_string($b)) { |
|
| 49 | + return $domain === trim($b); |
|
| 50 | + } elseif (self::isUrl($b, $domain) && is_string($a)) { |
|
| 51 | + return $domain === trim($a); |
|
| 52 | 52 | } else { |
| 53 | 53 | return false; |
| 54 | 54 | } |
@@ -56,7 +56,7 @@ discard block |
||
| 56 | 56 | |
| 57 | 57 | public static function areAnagrams($a, $b) |
| 58 | 58 | { |
| 59 | - return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true; |
|
| 59 | + return self::similarText($a, $b, 2, true, $check) && $check['similar'] === 100.0 && $check['contain'] === true; |
|
| 60 | 60 | } |
| 61 | 61 | |
| 62 | 62 | public static function similarButNotEqual($a, $b) |
@@ -66,7 +66,7 @@ discard block |
||
| 66 | 66 | |
| 67 | 67 | public static function aIsSuperStringOfB($a, $b) |
| 68 | 68 | { |
| 69 | - if (strlen($a)>strlen($b)) { |
|
| 69 | + if (strlen($a) > strlen($b)) { |
|
| 70 | 70 | return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0; |
| 71 | 71 | } else { |
| 72 | 72 | return false; |
@@ -75,7 +75,7 @@ discard block |
||
| 75 | 75 | |
| 76 | 76 | public static function haveSameRoot($a, $b) |
| 77 | 77 | { |
| 78 | - return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/; |
|
| 78 | + return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check) && range(0, count($check['a&b']) - 1) === array_keys($check['a&b'])/*?true:false*/; |
|
| 79 | 79 | } |
| 80 | 80 | |
| 81 | 81 | public static function areStems($a, $b) |
@@ -94,10 +94,10 @@ discard block |
||
| 94 | 94 | return true; |
| 95 | 95 | } |
| 96 | 96 | |
| 97 | - public static function wordsReorderOccured($a, $b, $considerPunctuation=true) |
|
| 97 | + public static function wordsReorderOccured($a, $b, $considerPunctuation = true) |
|
| 98 | 98 | { |
| 99 | - $filter=function ($v) use ($considerPunctuation) { |
|
| 100 | - return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v); |
|
| 99 | + $filter = function($v) use ($considerPunctuation) { |
|
| 100 | + return $considerPunctuation ? !(ctype_space($v) || ctype_punct($v)) : !ctype_space($v); |
|
| 101 | 101 | }; |
| 102 | 102 | return |
| 103 | 103 | self::similarText($a, $b, 2, true, $check, true) && |
@@ -110,15 +110,15 @@ discard block |
||
| 110 | 110 | :false; |
| 111 | 111 | } |
| 112 | 112 | |
| 113 | - public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true) |
|
| 113 | + public static function punctuactionChangesOccured($a, $b, $insensitive = true, $considerSpace = true) |
|
| 114 | 114 | { |
| 115 | - $filter=function ($v) use ($considerSpace) { |
|
| 116 | - return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v); |
|
| 115 | + $filter = function($v) use ($considerSpace) { |
|
| 116 | + return $considerSpace ? !(ctype_space($v) || ctype_punct($v)) : !ctype_punct($v); |
|
| 117 | 117 | }; |
| 118 | 118 | if (!is_string($a) || !is_string($b)) { |
| 119 | 119 | return false; |
| 120 | 120 | } |
| 121 | - self::filter($a,$b,$filter,$insensitive); |
|
| 121 | + self::filter($a, $b, $filter, $insensitive); |
|
| 122 | 122 | return empty(array_diff($a, $b)); |
| 123 | 123 | } |
| 124 | 124 | |
@@ -128,13 +128,13 @@ discard block |
||
| 128 | 128 | if (!is_string($a) || !is_string($b)) { |
| 129 | 129 | return false; |
| 130 | 130 | } |
| 131 | - $filter=function ($v) { |
|
| 132 | - return !(ctype_space($v)||ctype_punct($v)); |
|
| 131 | + $filter = function($v) { |
|
| 132 | + return !(ctype_space($v) || ctype_punct($v)); |
|
| 133 | 133 | }; |
| 134 | 134 | |
| 135 | - self::filter($a,$b,$filter,true); |
|
| 135 | + self::filter($a, $b, $filter, true); |
|
| 136 | 136 | foreach ($a as $index=>$word) { |
| 137 | - if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) { |
|
| 137 | + if (!self::haveSameRoot($word, $b[$index]) || (isset($a[$index][2]) && isset($b[$index][2]))) { |
|
| 138 | 138 | return false; |
| 139 | 139 | } |
| 140 | 140 | } |
@@ -146,16 +146,16 @@ discard block |
||
| 146 | 146 | if (!is_string($a) || !is_string($b)) { |
| 147 | 147 | return false; |
| 148 | 148 | } |
| 149 | - $filter=function ($v) { |
|
| 149 | + $filter = function($v) { |
|
| 150 | 150 | return !(ctype_space($v)); |
| 151 | 151 | }; |
| 152 | - self::filter($a,$b,$filter,true); |
|
| 153 | - $ca=count($a); |
|
| 154 | - $cb=count($b); |
|
| 155 | - return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a))); |
|
| 152 | + self::filter($a, $b, $filter, true); |
|
| 153 | + $ca = count($a); |
|
| 154 | + $cb = count($b); |
|
| 155 | + return (bool) (($ca > $cb) ?array_diff_assoc(array_values($a), array_values($b)) : array_diff_assoc(array_values($b), array_values($a))); |
|
| 156 | 156 | } |
| 157 | 157 | |
| 158 | - private static function filter(&$a,&$b,$filter,$insensitive=true){ |
|
| 158 | + private static function filter(&$a, &$b, $filter, $insensitive = true) { |
|
| 159 | 159 | if ($insensitive) { |
| 160 | 160 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
| 161 | 161 | $b = array_filter(self::getParts(self::strtolower($b)), $filter); |