@@ -17,7 +17,7 @@ discard block |
||
| 17 | 17 | private function __construct() |
| 18 | 18 | { |
| 19 | 19 | } |
| 20 | - public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition=false) |
|
| 20 | + public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition = false) |
|
| 21 | 21 | { |
| 22 | 22 | if (!is_string($a) || !is_string($b)) { |
| 23 | 23 | return false; |
@@ -43,14 +43,14 @@ discard block |
||
| 43 | 43 | return $stats['similar']; |
| 44 | 44 | } |
| 45 | 45 | |
| 46 | - protected static function _check($a, $b, $getParts, $round, $checkposition=false) |
|
| 46 | + protected static function _check($a, $b, $getParts, $round, $checkposition = false) |
|
| 47 | 47 | { |
| 48 | 48 | $diff = array(); |
| 49 | 49 | if ($getParts) { |
| 50 | 50 | $diff[] = array_diff($a, $b); |
| 51 | 51 | $diff[] = array_diff($b, $a); |
| 52 | 52 | } |
| 53 | - $diff[] = $checkposition?array_intersect_assoc($a, $b):array_intersect($a, $b); |
|
| 53 | + $diff[] = $checkposition ?array_intersect_assoc($a, $b) : array_intersect($a, $b); |
|
| 54 | 54 | $diff[] = round(count(array_intersect(self::getParts($a, $c), self::getParts($b))) / $c * 100, $round); |
| 55 | 55 | $diff[] = $a === $b; |
| 56 | 56 | return $diff; |
@@ -82,7 +82,7 @@ discard block |
||
| 82 | 82 | $tmp = ''; |
| 83 | 83 | $c = 0; |
| 84 | 84 | foreach ($b as $k=>$v) { |
| 85 | - if (ctype_space($v)||ctype_punct($v)) { |
|
| 85 | + if (ctype_space($v) || ctype_punct($v)) { |
|
| 86 | 86 | $parts[] = $tmp; |
| 87 | 87 | $parts[] = $v; |
| 88 | 88 | $c += 2; |
@@ -115,7 +115,7 @@ discard block |
||
| 115 | 115 | if (is_array($split)) { |
| 116 | 116 | return |
| 117 | 117 | array_map( |
| 118 | - function ($val) { |
|
| 118 | + function($val) { |
|
| 119 | 119 | if (self::is_ascii($val)) { |
| 120 | 120 | return strtolower($val); |
| 121 | 121 | } |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | namespace Ezama\tests{ |
| 3 | - require($dir=dirname(__DIR__)).DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'similar_text.php'; |
|
| 3 | + require($dir = dirname(__DIR__)).DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'similar_text.php'; |
|
| 4 | 4 | require $dir.DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'simpleCommonTextSimilarities.php'; |
| 5 | 5 | require $dir.DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR.'complexCommonTextSimilarities.php'; |
| 6 | 6 | require $dir.DIRECTORY_SEPARATOR.'similar_text.php'; |
@@ -11,10 +11,10 @@ discard block |
||
| 11 | 11 | { |
| 12 | 12 | public function testSimilarText() |
| 13 | 13 | { |
| 14 | - $this->assertTrue(100.0===similarText('qwerty', 'ytrewq')); |
|
| 15 | - $this->assertTrue(similarText('qwerty', 'ytreq')>=80); |
|
| 14 | + $this->assertTrue(100.0 === similarText('qwerty', 'ytrewq')); |
|
| 15 | + $this->assertTrue(similarText('qwerty', 'ytreq') >= 80); |
|
| 16 | 16 | $this->assertTrue(areAnagrams('qwerty', 'ytrewq')); |
| 17 | - $this->assertTrue(0.0===similarText('qwerty', ';lkjhg')); |
|
| 17 | + $this->assertTrue(0.0 === similarText('qwerty', ';lkjhg')); |
|
| 18 | 18 | $this->assertTrue(haveSameRoot('qwerty', 'qwertyuiop')); |
| 19 | 19 | $this->assertTrue(wordsReorderOccured('joker is a cloon.', 'a cloon is joker')); |
| 20 | 20 | $this->assertTrue(similarButNotEqual('qwerty', 'ytrewq')); |
@@ -14,40 +14,40 @@ discard block |
||
| 14 | 14 | |
| 15 | 15 | class complexCommonTextSimilarities extends simpleCommonTextSimilarities |
| 16 | 16 | { |
| 17 | - const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol |
|
| 18 | - '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username |
|
| 19 | - '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password |
|
| 20 | - '@)?(?#'. // auth requires @ |
|
| 21 | - ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND |
|
| 22 | - '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR |
|
| 17 | + const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'.// protocol |
|
| 18 | + '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'.// username |
|
| 19 | + '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'.// password |
|
| 20 | + '@)?(?#'.// auth requires @ |
|
| 21 | + ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'.// domain segments AND |
|
| 22 | + '[a-z][a-z0-9-]*[a-z0-9]'.// top level domain OR |
|
| 23 | 23 | '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'. |
| 24 | - '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address |
|
| 25 | - ')(:\d+)?'. // port |
|
| 26 | - ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path |
|
| 27 | - '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string |
|
| 28 | - '?)?)?'. // path and query string optional |
|
| 29 | - '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment |
|
| 24 | + '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'.// IP address |
|
| 25 | + ')(:\d+)?'.// port |
|
| 26 | + ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'.// path |
|
| 27 | + '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'.// query string |
|
| 28 | + '?)?)?'.// path and query string optional |
|
| 29 | + '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'.// fragment |
|
| 30 | 30 | '$/i'; |
| 31 | 31 | |
| 32 | 32 | |
| 33 | 33 | |
| 34 | 34 | |
| 35 | - const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
|
| 35 | + const URL_POSIX_FORMAT = '"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i'; |
|
| 36 | 36 | |
| 37 | - protected static function isUrl($url, &$getDomain='') |
|
| 37 | + protected static function isUrl($url, &$getDomain = '') |
|
| 38 | 38 | { |
| 39 | - $matches=array(); |
|
| 40 | - $bool= is_string($url)&&preg_match(self::URL_POSIX_FORMAT, $url)&&preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/; |
|
| 41 | - $getDomain=rtrim($matches[9], '.'); |
|
| 39 | + $matches = array(); |
|
| 40 | + $bool = is_string($url) && preg_match(self::URL_POSIX_FORMAT, $url) && preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/; |
|
| 41 | + $getDomain = rtrim($matches[9], '.'); |
|
| 42 | 42 | return $bool; |
| 43 | 43 | } |
| 44 | 44 | |
| 45 | 45 | public static function strippedUrl($a, $b) |
| 46 | 46 | { |
| 47 | - if (self::isUrl($a, $domain)&&is_string($b)) { |
|
| 48 | - return $domain===trim($b); |
|
| 49 | - } elseif (self::isUrl($b, $domain)&&is_string($a)) { |
|
| 50 | - return $domain===trim($a); |
|
| 47 | + if (self::isUrl($a, $domain) && is_string($b)) { |
|
| 48 | + return $domain === trim($b); |
|
| 49 | + } elseif (self::isUrl($b, $domain) && is_string($a)) { |
|
| 50 | + return $domain === trim($a); |
|
| 51 | 51 | } else { |
| 52 | 52 | return false; |
| 53 | 53 | } |
@@ -73,14 +73,14 @@ discard block |
||
| 73 | 73 | if (!is_string($a) || !is_string($b)) { |
| 74 | 74 | return false; |
| 75 | 75 | } |
| 76 | - $filter=function ($v) { |
|
| 76 | + $filter = function($v) { |
|
| 77 | 77 | return !(ctype_space($v)); |
| 78 | 78 | }; |
| 79 | 79 | self::filter($a, $b, $filter, true); |
| 80 | 80 | return self::waorDiff($a, $b, count($a), count($b)); |
| 81 | 81 | } |
| 82 | 82 | |
| 83 | - private static function filter(&$a, &$b, $filter, $insensitive=true) |
|
| 83 | + private static function filter(&$a, &$b, $filter, $insensitive = true) |
|
| 84 | 84 | { |
| 85 | 85 | if ($insensitive) { |
| 86 | 86 | $a = array_filter(self::getParts(self::strtolower($a)), $filter); |
@@ -93,14 +93,14 @@ discard block |
||
| 93 | 93 | |
| 94 | 94 | private static function waorDiff($a, $b, $ca, $cb) |
| 95 | 95 | { |
| 96 | - return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a))); |
|
| 96 | + return (bool) (($ca > $cb) ?array_diff_assoc(array_values($a), array_values($b)) : array_diff_assoc(array_values($b), array_values($a))); |
|
| 97 | 97 | } |
| 98 | 98 | |
| 99 | 99 | |
| 100 | - public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true) |
|
| 100 | + public static function punctuactionChangesOccured($a, $b, $insensitive = true, $considerSpace = true) |
|
| 101 | 101 | { |
| 102 | - $filter=function ($v) use ($considerSpace) { |
|
| 103 | - return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v); |
|
| 102 | + $filter = function($v) use ($considerSpace) { |
|
| 103 | + return $considerSpace ? !(ctype_space($v) || ctype_punct($v)) : !ctype_punct($v); |
|
| 104 | 104 | }; |
| 105 | 105 | if (!is_string($a) || !is_string($b)) { |
| 106 | 106 | return false; |
@@ -115,8 +115,8 @@ discard block |
||
| 115 | 115 | if (!is_string($a) || !is_string($b)) { |
| 116 | 116 | return false; |
| 117 | 117 | } |
| 118 | - $filter=function ($v) { |
|
| 119 | - return !(ctype_space($v)||ctype_punct($v)); |
|
| 118 | + $filter = function($v) { |
|
| 119 | + return !(ctype_space($v) || ctype_punct($v)); |
|
| 120 | 120 | }; |
| 121 | 121 | |
| 122 | 122 | self::filter($a, $b, $filter, true); |
@@ -126,7 +126,7 @@ discard block |
||
| 126 | 126 | private static function aoeStemming($a, $b) |
| 127 | 127 | { |
| 128 | 128 | foreach ($a as $index=>$word) { |
| 129 | - if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) { |
|
| 129 | + if (!self::haveSameRoot($word, $b[$index]) || (isset($a[$index][2]) && isset($b[$index][2]))) { |
|
| 130 | 130 | return false; |
| 131 | 131 | } |
| 132 | 132 | } |
@@ -17,7 +17,7 @@ discard block |
||
| 17 | 17 | { |
| 18 | 18 | public static function areAnagrams($a, $b) |
| 19 | 19 | { |
| 20 | - return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true; |
|
| 20 | + return self::similarText($a, $b, 2, true, $check) && $check['similar'] === 100.0 && $check['contain'] === true; |
|
| 21 | 21 | } |
| 22 | 22 | |
| 23 | 23 | public static function similarButNotEqual($a, $b) |
@@ -27,29 +27,29 @@ discard block |
||
| 27 | 27 | |
| 28 | 28 | public static function aIsSuperStringOfB($a, $b) |
| 29 | 29 | { |
| 30 | - if (strlen($a)>strlen($b)) { |
|
| 30 | + if (strlen($a) > strlen($b)) { |
|
| 31 | 31 | return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0; |
| 32 | 32 | } else { |
| 33 | 33 | return false; |
| 34 | 34 | } |
| 35 | 35 | } |
| 36 | 36 | |
| 37 | - public static function wordsReorderOccured($a, $b, $considerPunctuation=true) |
|
| 37 | + public static function wordsReorderOccured($a, $b, $considerPunctuation = true) |
|
| 38 | 38 | { |
| 39 | - $filter=function ($v) use ($considerPunctuation) { |
|
| 40 | - return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v); |
|
| 39 | + $filter = function($v) use ($considerPunctuation) { |
|
| 40 | + return $considerPunctuation ? !(ctype_space($v) || ctype_punct($v)) : !ctype_space($v); |
|
| 41 | 41 | }; |
| 42 | - return self::similarText($a, $b, 2, true, $check, true) &&is_array($check) &&self::wro_filter($check, $filter)?true :false; |
|
| 42 | + return self::similarText($a, $b, 2, true, $check, true) && is_array($check) && self::wro_filter($check, $filter) ?true :false; |
|
| 43 | 43 | } |
| 44 | 44 | |
| 45 | 45 | private static function wro_filter($check, $filter) |
| 46 | 46 | { |
| 47 | - return empty(array_filter($check['a-b'], $filter)) && empty(array_filter($check['b-a'], $filter)) &&$check['substr'] &&!$check['equal']; |
|
| 47 | + return empty(array_filter($check['a-b'], $filter)) && empty(array_filter($check['b-a'], $filter)) && $check['substr'] && !$check['equal']; |
|
| 48 | 48 | } |
| 49 | 49 | |
| 50 | 50 | public static function haveSameRoot($a, $b) |
| 51 | 51 | { |
| 52 | - return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/; |
|
| 52 | + return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check) && range(0, count($check['a&b']) - 1) === array_keys($check['a&b'])/*?true:false*/; |
|
| 53 | 53 | } |
| 54 | 54 | } |
| 55 | 55 | } |
@@ -21,7 +21,7 @@ discard block |
||
| 21 | 21 | $insensitive = true, |
| 22 | 22 | &$stats = false, |
| 23 | 23 | $getParts = false, |
| 24 | - $checkposition=false |
|
| 24 | + $checkposition = false |
|
| 25 | 25 | ) { |
| 26 | 26 | return similar_text::similarText( |
| 27 | 27 | $firstString, |
@@ -54,12 +54,12 @@ discard block |
||
| 54 | 54 | return simpleCommonTextSimilarities::haveSameRoot($a, $b); |
| 55 | 55 | } |
| 56 | 56 | |
| 57 | - function wordsReorderOccured($a, $b, $considerPunctuation=true) |
|
| 57 | + function wordsReorderOccured($a, $b, $considerPunctuation = true) |
|
| 58 | 58 | { |
| 59 | 59 | return simpleCommonTextSimilarities::wordsReorderOccured($a, $b, $considerPunctuation); |
| 60 | 60 | } |
| 61 | 61 | |
| 62 | - function punctuactionChangesOccured($a, $b, $considerSpace=true) |
|
| 62 | + function punctuactionChangesOccured($a, $b, $considerSpace = true) |
|
| 63 | 63 | { |
| 64 | 64 | return complexCommonTextSimilarities::punctuactionChangesOccured($a, $b, $considerSpace); |
| 65 | 65 | } |