Passed
Pull Request — master (#10)
by Akpé Aurelle Emmanuel Moïse
01:49
created

distance::getJWDistance()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 16
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 11
dl 0
loc 16
rs 9.9
c 0
b 0
f 0
cc 3
nc 3
nop 4
1
<?php
2
3
/**
4
*
5
* @Name : similar-text
6
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
7
* @Date : 2019-04-01
8
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
9
* @Repository : https://github.com/manuwhat/similar
10
*
11
**/
12
13
14
namespace EZAMA{
15
    class distance extends complexCommonTextSimilarities
16
    {
17
        public static function jaroWinkler($a, $b, $round=2)
18
        {
19
            if (!is_string($a)||!is_string($b)) {
20
                return false;
21
            }
22
            static $distance=array();
23
            static $previous=array();
24
            if (array($a,$b)===$previous) {
25
                return $distance;
26
            }
27
            $previous=array($a,$b);
28
            return self::getJWDistance($a, $b, $distance, $round);
29
        }
30
        
31
        
32
        
33
        private static function getJWDistance(&$a, &$b, &$distance, $round)
34
        {
35
            extract(self::prepareJaroWinkler($a, $b));
36
            for ($i=0,$min=min(count($a), count($b)),$t=0;$i<$min;$i++) {
37
                if ($a[$i]!==$b[$i]) {
38
                    $t++;
39
                }
40
            }
41
            $t/=2;
42
            $distance['jaro']=1/3*($corresponding/$ca+$corresponding/$cb+($corresponding-$t)/$corresponding);
43
            $distance['jaro-winkler']=$distance['jaro']+(min($longCommonSubstr, 4)*0.1*(1-$distance['jaro']));
44
            $distance=array_map(function ($v) use ($round) {
45
                return round($v, $round);
46
            }, $distance);
47
            
48
            return $distance;
49
        }
50
        
51
        private static function prepareJaroWinkler(&$a, &$b)
52
        {
53
            $a=self::split($a);
54
            $b=self::split($b);
55
            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count($a),'cb'=>count($b));
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

55
            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count(/** @scrutinizer ignore-type */ $a),'cb'=>count($b));
Loading history...
56
            $Δ=max($transpositions['ca'], $transpositions['cb'])/2-1;
57
            self::jwMatches($a, $b, $transpositions, $Δ);
58
            ksort($transpositions['a']);
59
            ksort($transpositions['b']);
60
            $transpositions['a']=array_values($transpositions['a']);
61
            $transpositions['b']=array_values($transpositions['b']);
62
            return $transpositions;
63
        }
64
        
65
        private static function jwMatches(&$a, &$b, &$transpositions, $Δ)
66
        {
67
            foreach ($a as $ind=>$chr) {
68
                foreach ($b as $index=>$char) {
69
                    self::_jwMatches($chr, $char, $index, $ind, $transpositions, $Δ);
70
                }
71
            }
72
        }
73
        
74
        private static function _jwMatches($chr, $char, $index, $ind, &$transpositions, $Δ)
75
        {
76
            if ($chr===$char&&(abs($index-$ind)<=$Δ)) {
77
                if ($ind!==$index) {
78
                    $transpositions['a'][$ind]=$chr;
79
                    $transpositions['b'][$index]=$char;
80
                } else {
81
                    if ($ind-1<=$transpositions['longCommonSubstr']) {
82
                        $transpositions['longCommonSubstr']++;
83
                    }
84
                }
85
                $transpositions['corresponding']++;
86
            }
87
        }
88
        
89
        
90
        public static function hamming($a, $b)
91
        {
92
            if (!is_string($a)||!is_string($b)||(strlen($a)!==strlen($b))) {
93
                return false;
94
            }
95
            static $distance=0;
96
            static $previous=array();
97
            if (array($a,$b)===$previous) {
98
                return $distance;
99
            }
100
            $previous=array($a,$b);
101
            $a=self::split($a);
102
            $b=self::split($b);
103
            $distance=count(array_diff_assoc($a, $b));
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $array1 of array_diff_assoc() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

103
            $distance=count(array_diff_assoc(/** @scrutinizer ignore-type */ $a, $b));
Loading history...
Bug introduced by
It seems like $b can also be of type false; however, parameter $array2 of array_diff_assoc() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

103
            $distance=count(array_diff_assoc($a, /** @scrutinizer ignore-type */ $b));
Loading history...
104
            return $distance;
105
        }
106
        
107
        public static function dice($a, $b, $round=2)
108
        {
109
            if ($distance=in_array(self::handleVeryCommonDiceCases($a, $b), array(false,0.0,1.0), true)) {
110
                return $distance;
111
            }
112
            static $distance=0;
113
            static $previous=array();
114
            if (array($a,$b)===$previous) {
115
                return $distance;
116
            }
117
            $previous=array($a,$b);
118
            $a=self::split($a, 2);
119
            $b=self::split($b, 2);
120
            return self::getDiceDistance($distance, $a, $b, $round);
0 ignored issues
show
Bug introduced by
Are you sure the usage of self::getDiceDistance($distance, $a, $b, $round) targeting EZAMA\distance::getDiceDistance() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
121
        }
122
        
123
        private static function handleVeryCommonDiceCases(&$a, &$b)
124
        {
125
            if (!is_string($a)||!is_string($b)) {
126
                return false;
127
            }
128
            if (empty($a)||empty($b)) {
129
                return 0.0;
130
            }
131
            if ($a===$b) {
132
                return 1.0;
133
            }
134
        }
135
        
136
        private static function getDiceDistance(&$distance, &$a, &$b, $round)
137
        {
138
            $ca=($caGrams=count($a))*2-self::getEndStrLen($a);
139
            $cb=($cbGrams=count($b))*2-self::getEndStrLen($b);
140
            $distance=round(2*count($caGrams>$cbGrams?array_intersect($a, $b):array_intersect($b, $a))/($ca+$cb), $round);
141
        }
142
        
143
        private static function getEndStrLen($a)
144
        {
145
            if (function_exists('array_key_last')) {
146
                $end=array_key_last($a);
147
                $end=(isset($end[1]))?0:1;
148
            } else {
149
                $end=end($a);
150
                $end=(isset($end[1]))?0:1;
151
                reset($a);
152
            }
153
            return $end;
154
        }
155
        
156
        public static function levenshtein($a, $b)
157
        {
158
            if (!is_string($a)||!is_string($b)) {
159
                return false;
160
            }
161
            
162
            
163
            static $distance=0;
164
            static $previous=array();
165
            if (array($a,$b)===$previous) {
166
                return $distance;
167
            }
168
            $previous=array($a,$b);
169
            $a=self::split($a);
170
            $b=self::split($b);
171
            $ca = count($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

171
            $ca = count(/** @scrutinizer ignore-type */ $a);
Loading history...
172
            $cb = count($b);
173
            $dis = range(0, $cb);
174
            self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis);
175
176
            return $distance=$dis[$cb];
177
        }
178
        
179
        
180
        public static function levenshteinDamerau($a, $b)
181
        {
182
            if (!is_string($a)||!is_string($b)) {
183
                return false;
184
            }
185
              
186
            static $distance=0;
187
            static $previous=array();
188
            if (array($a,$b)===$previous) {
189
                return $distance;
190
            }
191
            $previous=array($a,$b);
192
            $a=self::split($a);
193
            $b=self::split($b);
194
            $ca = count($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

194
            $ca = count(/** @scrutinizer ignore-type */ $a);
Loading history...
195
            $cb = count($b);
196
            $dis = range(0, $cb);
197
            self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis, true);
198
        
199
            return $distance=$dis[$cb];
200
        }
201
        
202
        private static function BuildLevenshteinCostMatrix($a, $b, $ca, $cb, &$dis, $damerau=false)
203
        {
204
            $dis_new=array();
205
            for ($x=1;$x<=$ca;$x++) {
206
                $dis_new[0]=$x;
207
                for ($y=1;$y<=$cb;$y++) {
208
                    self::costMatrix($a, $b, $dis_new, $dis, $damerau, $x, $y);
209
                }
210
                $dis = $dis_new;
211
            }
212
        }
213
        
214
        private static function costMatrix(&$a, &$b, &$dis_new, &$dis, $damerau, $x, $y)
215
        {
216
            $c = ($a[$x-1] == $b[$y-1])?0:1;
217
            $dis_new[$y] = min($dis[$y]+1, $dis_new[$y-1]+1, $dis[$y-1]+$c);
218
            if ($damerau) {
219
                if ($x > 1 && $y > 1 && $a[$x-1] == $b[$y-2] && $a[$x-2] == $b[$y-1]) {
220
                    $dis_new[$y]= min($dis_new[$y-1], $dis[$y-3] + $c) ;
221
                }
222
            }
223
        }
224
    }
225
    
226
    
227
}
228