Completed
Pull Request — master (#10)
by Akpé Aurelle Emmanuel Moïse
02:35
created
tests/Similar_textTest.php 1 patch
Indentation   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -3,7 +3,7 @@
 block discarded – undo
3 3
     require($DIR = (($dir = dirname(__DIR__)).DIRECTORY_SEPARATOR.'src'.DIRECTORY_SEPARATOR)).'similar_text.php';
4 4
     require $DIR.'simpleCommonTextSimilarities.php';
5 5
     require $DIR.'complexCommonTextSimilarities.php';
6
-     require $DIR.'distance.php';
6
+        require $DIR.'distance.php';
7 7
     require $dir.DIRECTORY_SEPARATOR.'similar_text.php';
8 8
 
9 9
     use PHPUnit\Framework\TestCase;
Please login to merge, or discard this patch.
src/complexCommonTextSimilarities.php 1 patch
Spacing   +13 added lines, -13 removed lines patch added patch discarded remove patch
@@ -78,32 +78,32 @@  discard block
 block discarded – undo
78 78
             if (!is_string($a) || !is_string($b)) {
79 79
                 return false;
80 80
             }
81
-            $filter = function ($v) {
81
+            $filter = function($v) {
82 82
                 return !(ctype_space($v));
83 83
             };
84 84
             self::filter($a, $b, $filter, true);
85 85
             return self::waorDiff($a, $b, count($a), count($b));
86 86
         }
87 87
         
88
-        private static function filter(&$a, &$b, $filter, $insensitive = true, $captureLength=false)
88
+        private static function filter(&$a, &$b, $filter, $insensitive = true, $captureLength = false)
89 89
         {
90 90
             if ($insensitive) {
91 91
                 $a = array_filter(self::getParts(self::strtolower($a), $c, $captureLength), $filter);
92
-                if ($c===1) {
93
-                    $a=self::strtolower($a);
92
+                if ($c === 1) {
93
+                    $a = self::strtolower($a);
94 94
                 }
95 95
                 $b = array_filter(self::getParts(self::strtolower($b), $c, $captureLength), $filter);
96
-                if ($c===1) {
97
-                    $b=self::strtolower($b);
96
+                if ($c === 1) {
97
+                    $b = self::strtolower($b);
98 98
                 }
99 99
             } else {
100 100
                 $a = array_filter(self::getParts(self::split($a), $c, $captureLength), $filter);
101
-                if ($c===1) {
102
-                    $a=self::strtolower($a);
101
+                if ($c === 1) {
102
+                    $a = self::strtolower($a);
103 103
                 }
104 104
                 $b = array_filter(self::getParts(self::split($b), $c, $captureLength), $filter);
105
-                if ($c===1) {
106
-                    $b=self::strtolower($b);
105
+                if ($c === 1) {
106
+                    $b = self::strtolower($b);
107 107
                 }
108 108
             }
109 109
         }
@@ -116,7 +116,7 @@  discard block
 block discarded – undo
116 116
         
117 117
         public static function punctuationChangesOccured($a, $b, $insensitive = true, $considerSpace = true)
118 118
         {
119
-            $filter = function ($v) use ($considerSpace) {
119
+            $filter = function($v) use ($considerSpace) {
120 120
                 return $considerSpace ? !(ctype_space($v) || ctype_punct($v)) : !ctype_punct($v);
121 121
             };
122 122
             if (!is_string($a) || !is_string($b)) {
@@ -132,7 +132,7 @@  discard block
 block discarded – undo
132 132
             if (!is_string($a) || !is_string($b)) {
133 133
                 return false;
134 134
             }
135
-            $filter = function ($v) {
135
+            $filter = function($v) {
136 136
                 return !(ctype_space($v[0]) || ctype_punct($v[0]));
137 137
             };
138 138
             
@@ -143,7 +143,7 @@  discard block
 block discarded – undo
143 143
         private static function aoeStemming($a, $b)
144 144
         {
145 145
             foreach ($a as $index=>$word) {
146
-                if (!self::haveSameRoot($word[0], $b[$index][0]) || ($a[$index][1]>2 && $b[$index][1]>2)) {
146
+                if (!self::haveSameRoot($word[0], $b[$index][0]) || ($a[$index][1] > 2 && $b[$index][1] > 2)) {
147 147
                     return false;
148 148
                 }
149 149
             }
Please login to merge, or discard this patch.
src/simpleCommonTextSimilarities.php 1 patch
Spacing   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -36,7 +36,7 @@
 block discarded – undo
36 36
         
37 37
         public static function wordsReorderOccured($a, $b, $considerPunctuation = true)
38 38
         {
39
-            $filter = function ($v) use ($considerPunctuation) {
39
+            $filter = function($v) use ($considerPunctuation) {
40 40
                 return $considerPunctuation ? !(ctype_space($v) || ctype_punct($v)) : !ctype_space($v);
41 41
             };
42 42
             return self::similarText($a, $b, 2, true, $check, true) && is_array($check) && self::wro_filter($check, $filter) ?true :false;
Please login to merge, or discard this patch.
src/distance.php 1 patch
Spacing   +73 added lines, -73 removed lines patch added patch discarded remove patch
@@ -14,17 +14,17 @@  discard block
 block discarded – undo
14 14
 namespace EZAMA{
15 15
     class distance extends complexCommonTextSimilarities
16 16
     {
17
-        public static function jaroWinkler($a, $b, $round=2)
17
+        public static function jaroWinkler($a, $b, $round = 2)
18 18
         {
19
-            if (!is_string($a)||!is_string($b)) {
19
+            if (!is_string($a) || !is_string($b)) {
20 20
                 return false;
21 21
             }
22
-            static $distance=array();
23
-            static $previous=array();
24
-            if (array($a,$b)===$previous) {
22
+            static $distance = array();
23
+            static $previous = array();
24
+            if (array($a, $b) === $previous) {
25 25
                 return $distance;
26 26
             }
27
-            $previous=array($a,$b);
27
+            $previous = array($a, $b);
28 28
             return self::getJWDistance($a, $b, $distance, $round);
29 29
         }
30 30
         
@@ -33,15 +33,15 @@  discard block
 block discarded – undo
33 33
         private static function getJWDistance(&$a, &$b, &$distance, $round)
34 34
         {
35 35
             extract(self::prepareJaroWinkler($a, $b));
36
-            for ($i=0,$min=min(count($a), count($b)),$t=0;$i<$min;$i++) {
37
-                if ($a[$i]!==$b[$i]) {
36
+            for ($i = 0, $min = min(count($a), count($b)), $t = 0; $i < $min; $i++) {
37
+                if ($a[$i] !== $b[$i]) {
38 38
                     $t++;
39 39
                 }
40 40
             }
41
-            $t/=2;
42
-            $distance['jaro']=1/3*($corresponding/$ca+$corresponding/$cb+($corresponding-$t)/$corresponding);
43
-            $distance['jaro-winkler']=$distance['jaro']+(min($longCommonSubstr, 4)*0.1*(1-$distance['jaro']));
44
-            $distance=array_map(function ($v) use ($round) {
41
+            $t /= 2;
42
+            $distance['jaro'] = 1 / 3 * ($corresponding / $ca + $corresponding / $cb + ($corresponding - $t) / $corresponding);
43
+            $distance['jaro-winkler'] = $distance['jaro'] + (min($longCommonSubstr, 4) * 0.1 * (1 - $distance['jaro']));
44
+            $distance = array_map(function($v) use ($round) {
45 45
                 return round($v, $round);
46 46
             }, $distance);
47 47
             
@@ -50,15 +50,15 @@  discard block
 block discarded – undo
50 50
         
51 51
         private static function prepareJaroWinkler(&$a, &$b)
52 52
         {
53
-            $a=self::split($a);
54
-            $b=self::split($b);
55
-            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count($a),'cb'=>count($b));
56
-            $Δ=max($transpositions['ca'], $transpositions['cb'])/2-1;
53
+            $a = self::split($a);
54
+            $b = self::split($b);
55
+            $transpositions = array('a'=>array(), 'b'=>array(), 'corresponding'=>0, 'longCommonSubstr'=>0, 'ca'=>count($a), 'cb'=>count($b));
56
+            $Δ = max($transpositions['ca'], $transpositions['cb']) / 2 - 1;
57 57
             self::jwMatches($a, $b, $transpositions, $Δ);
58 58
             ksort($transpositions['a']);
59 59
             ksort($transpositions['b']);
60
-            $transpositions['a']=array_values($transpositions['a']);
61
-            $transpositions['b']=array_values($transpositions['b']);
60
+            $transpositions['a'] = array_values($transpositions['a']);
61
+            $transpositions['b'] = array_values($transpositions['b']);
62 62
             return $transpositions;
63 63
         }
64 64
         
@@ -66,12 +66,12 @@  discard block
 block discarded – undo
66 66
         {
67 67
             foreach ($a as $ind=>$chr) {
68 68
                 foreach ($b as $index=>$char) {
69
-                    if ($chr===$char&&(abs($index-$ind)<=$Δ)) {
70
-                        if ($ind!==$index) {
71
-                            $transpositions['a'][$ind]=$chr;
72
-                            $transpositions['b'][$index]=$char;
69
+                    if ($chr === $char && (abs($index - $ind) <= $Δ)) {
70
+                        if ($ind !== $index) {
71
+                            $transpositions['a'][$ind] = $chr;
72
+                            $transpositions['b'][$index] = $char;
73 73
                         } else {
74
-                            if ($ind-1<=$transpositions['longCommonSubstr']) {
74
+                            if ($ind - 1 <= $transpositions['longCommonSubstr']) {
75 75
                                 $transpositions['longCommonSubstr']++;
76 76
                             }
77 77
                         }
@@ -84,55 +84,55 @@  discard block
 block discarded – undo
84 84
         
85 85
         public static function hamming($a, $b)
86 86
         {
87
-            if (!is_string($a)||!is_string($b)||(strlen($a)!==strlen($b))) {
87
+            if (!is_string($a) || !is_string($b) || (strlen($a) !== strlen($b))) {
88 88
                 return false;
89 89
             }
90
-            static $distance=0;
91
-            static $previous=array();
92
-            if (array($a,$b)===$previous) {
90
+            static $distance = 0;
91
+            static $previous = array();
92
+            if (array($a, $b) === $previous) {
93 93
                 return $distance;
94 94
             }
95
-            $previous=array($a,$b);
96
-            $a=self::split($a);
97
-            $b=self::split($b);
98
-            $distance=count(array_diff_assoc($a, $b));
95
+            $previous = array($a, $b);
96
+            $a = self::split($a);
97
+            $b = self::split($b);
98
+            $distance = count(array_diff_assoc($a, $b));
99 99
             return $distance;
100 100
         }
101 101
         
102
-        public static function dice($a, $b, $round=2)
102
+        public static function dice($a, $b, $round = 2)
103 103
         {
104
-            if (!is_string($a)||!is_string($b)) {
104
+            if (!is_string($a) || !is_string($b)) {
105 105
                 return false;
106 106
             }
107
-            if (empty($a)||empty($b)) {
107
+            if (empty($a) || empty($b)) {
108 108
                 return 0.0;
109 109
             }
110
-            if ($a===$b) {
110
+            if ($a === $b) {
111 111
                 return 1.0;
112 112
             }
113 113
             
114
-            static $distance=0;
115
-            static $previous=array();
116
-            if (array($a,$b)===$previous) {
114
+            static $distance = 0;
115
+            static $previous = array();
116
+            if (array($a, $b) === $previous) {
117 117
                 return $distance;
118 118
             }
119
-            $previous=array($a,$b);
120
-            $a=self::split($a, 2);
121
-            $b=self::split($b, 2);
122
-            $ca=($caGrams=count($a))*2-self::getEndStrLen($a);
123
-            $cb=($cbGrams=count($b))*2-self::getEndStrLen($b);
124
-            $distance=round(2*count($caGrams>$cbGrams?array_intersect($a, $b):array_intersect($b, $a))/($ca+$cb), $round);
119
+            $previous = array($a, $b);
120
+            $a = self::split($a, 2);
121
+            $b = self::split($b, 2);
122
+            $ca = ($caGrams = count($a)) * 2 - self::getEndStrLen($a);
123
+            $cb = ($cbGrams = count($b)) * 2 - self::getEndStrLen($b);
124
+            $distance = round(2 * count($caGrams > $cbGrams ?array_intersect($a, $b) : array_intersect($b, $a)) / ($ca + $cb), $round);
125 125
             return $distance;
126 126
         }
127 127
         
128 128
         private static function getEndStrLen($a)
129 129
         {
130 130
             if (function_exists('array_key_last')) {
131
-                $end=array_key_last($a);
132
-                $end=(isset($end[1]))?0:1;
131
+                $end = array_key_last($a);
132
+                $end = (isset($end[1])) ? 0 : 1;
133 133
             } else {
134
-                $end=end($a);
135
-                $end=(isset($end[1]))?0:1;
134
+                $end = end($a);
135
+                $end = (isset($end[1])) ? 0 : 1;
136 136
                 reset($a);
137 137
             }
138 138
             return $end;
@@ -140,61 +140,61 @@  discard block
 block discarded – undo
140 140
         
141 141
         public static function levenshtein($a, $b)
142 142
         {
143
-            if (!is_string($a)||!is_string($b)) {
143
+            if (!is_string($a) || !is_string($b)) {
144 144
                 return false;
145 145
             }
146 146
             
147 147
             
148
-            static $distance=0;
149
-            static $previous=array();
150
-            if (array($a,$b)===$previous) {
148
+            static $distance = 0;
149
+            static $previous = array();
150
+            if (array($a, $b) === $previous) {
151 151
                 return $distance;
152 152
             }
153
-            $previous=array($a,$b);
154
-            $a=self::split($a);
155
-            $b=self::split($b);
153
+            $previous = array($a, $b);
154
+            $a = self::split($a);
155
+            $b = self::split($b);
156 156
             $ca = count($a);
157 157
             $cb = count($b);
158 158
             $dis = range(0, $cb);
159 159
             self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis);
160 160
 
161
-            return $distance=$dis[$cb];
161
+            return $distance = $dis[$cb];
162 162
         }
163 163
         
164 164
         
165 165
         public static function levenshteinDamerau($a, $b)
166 166
         {
167
-            if (!is_string($a)||!is_string($b)) {
167
+            if (!is_string($a) || !is_string($b)) {
168 168
                 return false;
169 169
             }
170 170
               
171
-            static $distance=0;
172
-            static $previous=array();
173
-            if (array($a,$b)===$previous) {
171
+            static $distance = 0;
172
+            static $previous = array();
173
+            if (array($a, $b) === $previous) {
174 174
                 return $distance;
175 175
             }
176
-            $previous=array($a,$b);
177
-            $a=self::split($a);
178
-            $b=self::split($b);
176
+            $previous = array($a, $b);
177
+            $a = self::split($a);
178
+            $b = self::split($b);
179 179
             $ca = count($a);
180 180
             $cb = count($b);
181 181
             $dis = range(0, $cb);
182 182
             self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis, true);
183 183
         
184
-            return $distance=$dis[$cb];
184
+            return $distance = $dis[$cb];
185 185
         }
186 186
         
187
-        private static function BuildLevenshteinCostMatrix($a, $b, $ca, $cb, &$dis, $damerau=false)
187
+        private static function BuildLevenshteinCostMatrix($a, $b, $ca, $cb, &$dis, $damerau = false)
188 188
         {
189
-            $dis_new=array();
190
-            for ($x=1;$x<=$ca;$x++) {
191
-                $dis_new[0]=$x;
192
-                for ($y=1;$y<=$cb;$y++) {
193
-                    $c = ($a[$x-1] == $b[$y-1])?0:1;
194
-                    $dis_new[$y] = min($dis[$y]+1, $dis_new[$y-1]+1, $dis[$y-1]+$c);
189
+            $dis_new = array();
190
+            for ($x = 1; $x <= $ca; $x++) {
191
+                $dis_new[0] = $x;
192
+                for ($y = 1; $y <= $cb; $y++) {
193
+                    $c = ($a[$x - 1] == $b[$y - 1]) ? 0 : 1;
194
+                    $dis_new[$y] = min($dis[$y] + 1, $dis_new[$y - 1] + 1, $dis[$y - 1] + $c);
195 195
                     if ($damerau) {
196
-                        if ($x > 1 && $y > 1 && $a[$x-1] == $b[$y-2] && $a[$x-2] == $b[$y-1]) {
197
-                            $dis_new[$y]= min($dis_new[$y-1], $dis[$y-3] + $c) ;
196
+                        if ($x > 1 && $y > 1 && $a[$x - 1] == $b[$y - 2] && $a[$x - 2] == $b[$y - 1]) {
197
+                            $dis_new[$y] = min($dis_new[$y - 1], $dis[$y - 3] + $c);
198 198
                         }
199 199
                     }
200 200
                 }
Please login to merge, or discard this patch.
src/similar_text.php 1 patch
Spacing   +14 added lines, -14 removed lines patch added patch discarded remove patch
@@ -76,13 +76,13 @@  discard block
 block discarded – undo
76 76
             return $stats;
77 77
         }
78 78
 
79
-        protected static function getParts($b, &$c = 0, $lengthCapture=false)
79
+        protected static function getParts($b, &$c = 0, $lengthCapture = false)
80 80
         {
81 81
             $parts = array();
82 82
             $tmp = '';
83 83
             $c = 0;
84
-            $length=0;
85
-            $lengthCapture=(bool)$lengthCapture;
84
+            $length = 0;
85
+            $lengthCapture = (bool) $lengthCapture;
86 86
             if ($lengthCapture) {
87 87
                 self::capturePartsWithLength($b, $length, $tmp, $c, $parts);
88 88
             } else {
@@ -114,17 +114,17 @@  discard block
 block discarded – undo
114 114
             foreach ($b as $k=>$v) {
115 115
                 $length++;
116 116
                 if (ctype_space($v) || ctype_punct($v)) {
117
-                    $parts[] =array($tmp,$length-1);
118
-                    $parts[] = array($v,1);
117
+                    $parts[] = array($tmp, $length - 1);
118
+                    $parts[] = array($v, 1);
119 119
                     $c += 2;
120 120
                     $tmp = '';
121
-                    $length=0;
121
+                    $length = 0;
122 122
                     continue;
123 123
                 }
124 124
                 $tmp .= $v;
125 125
             }
126 126
             if (!empty($tmp)) {
127
-                $parts[] = array($tmp,$length);
127
+                $parts[] = array($tmp, $length);
128 128
                 $c++;
129 129
             }
130 130
         }
@@ -146,7 +146,7 @@  discard block
 block discarded – undo
146 146
             if (is_array($split)) {
147 147
                 return
148 148
                     array_map(
149
-                        function ($val) {
149
+                        function($val) {
150 150
                             if (self::is_ascii($val)) {
151 151
                                 return strtolower($val);
152 152
                             }
@@ -161,21 +161,21 @@  discard block
 block discarded – undo
161 161
             }
162 162
         }
163 163
         
164
-        protected static function split($str, $grams=false)
164
+        protected static function split($str, $grams = false)
165 165
         {
166 166
             if (!is_string($str)) {
167 167
                 return array();
168 168
             }
169 169
             static $split = [];
170 170
             static $old = '';
171
-            static $oldGrams=1;
172
-            $grams=is_int($grams) && $grams >=1 && $grams <= strlen($str) ? $grams : false;
173
-            if ($old === $str && $oldGrams===$grams) {
171
+            static $oldGrams = 1;
172
+            $grams = is_int($grams) && $grams >= 1 && $grams <= strlen($str) ? $grams : false;
173
+            if ($old === $str && $oldGrams === $grams) {
174 174
                 return $split;
175 175
             } else {
176 176
                 $old = $str;
177
-                $oldGrams=$grams;
178
-                $split = !$grams ? preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY):preg_split('/(.{'.$grams.'})/su', $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);
177
+                $oldGrams = $grams;
178
+                $split = !$grams ? preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY) : preg_split('/(.{'.$grams.'})/su', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
179 179
                 return $split;
180 180
             }
181 181
         }
Please login to merge, or discard this patch.
similar_text.php 1 patch
Spacing   +2 added lines, -2 removed lines patch added patch discarded remove patch
@@ -95,7 +95,7 @@  discard block
 block discarded – undo
95 95
     }
96 96
     
97 97
     
98
-    function dice($a, $b, $round=2)
98
+    function dice($a, $b, $round = 2)
99 99
     {
100 100
         return Distance::dice($a, $b, $round);
101 101
     }
@@ -107,7 +107,7 @@  discard block
 block discarded – undo
107 107
     }
108 108
     
109 109
     
110
-    function jaroWinkler($a, $b, $round=2)
110
+    function jaroWinkler($a, $b, $round = 2)
111 111
     {
112 112
         return Distance::jaroWinkler($a, $b, $round);
113 113
     }
Please login to merge, or discard this patch.