Completed
Pull Request — master (#10)
by Akpé Aurelle Emmanuel Moïse
02:35
created

distance::prepareJaroWinkler()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 12
rs 9.9332
c 0
b 0
f 0
cc 1
nc 1
nop 2
1
<?php
2
3
/**
4
*
5
* @Name : similar-text
6
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
7
* @Date : 2019-04-01
8
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
9
* @Repository : https://github.com/manuwhat/similar
10
*
11
**/
12
13
14
namespace EZAMA{
15
    class distance extends complexCommonTextSimilarities
16
    {
17
        public static function jaroWinkler($a, $b, $round=2)
18
        {
19
            if (!is_string($a)||!is_string($b)) {
20
                return false;
21
            }
22
            static $distance=array();
23
            static $previous=array();
24
            if (array($a,$b)===$previous) {
25
                return $distance;
26
            }
27
            $previous=array($a,$b);
28
            return self::getJWDistance($a, $b, $distance, $round);
29
        }
30
        
31
        
32
        
33
        private static function getJWDistance(&$a, &$b, &$distance, $round)
34
        {
35
            extract(self::prepareJaroWinkler($a, $b));
36
            for ($i=0,$min=min(count($a), count($b)),$t=0;$i<$min;$i++) {
37
                if ($a[$i]!==$b[$i]) {
38
                    $t++;
39
                }
40
            }
41
            $t/=2;
42
            $distance['jaro']=1/3*($corresponding/$ca+$corresponding/$cb+($corresponding-$t)/$corresponding);
43
            $distance['jaro-winkler']=$distance['jaro']+(min($longCommonSubstr, 4)*0.1*(1-$distance['jaro']));
44
            $distance=array_map(function ($v) use ($round) {
45
                return round($v, $round);
46
            }, $distance);
47
            
48
            return $distance;
49
        }
50
        
51
        private static function prepareJaroWinkler(&$a, &$b)
52
        {
53
            $a=self::split($a);
54
            $b=self::split($b);
55
            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count($a),'cb'=>count($b));
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

55
            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count(/** @scrutinizer ignore-type */ $a),'cb'=>count($b));
Loading history...
56
            $Δ=max($transpositions['ca'], $transpositions['cb'])/2-1;
57
            self::jwMatches($a, $b, $transpositions, $Δ);
58
            ksort($transpositions['a']);
59
            ksort($transpositions['b']);
60
            $transpositions['a']=array_values($transpositions['a']);
61
            $transpositions['b']=array_values($transpositions['b']);
62
            return $transpositions;
63
        }
64
        
65
        private static function jwMatches(&$a, &$b, &$transpositions, $Δ)
66
        {
67
            foreach ($a as $ind=>$chr) {
68
                foreach ($b as $index=>$char) {
69
                    if ($chr===$char&&(abs($index-$ind)<=$Δ)) {
70
                        if ($ind!==$index) {
71
                            $transpositions['a'][$ind]=$chr;
72
                            $transpositions['b'][$index]=$char;
73
                        } else {
74
                            if ($ind-1<=$transpositions['longCommonSubstr']) {
75
                                $transpositions['longCommonSubstr']++;
76
                            }
77
                        }
78
                        $transpositions['corresponding']++;
79
                    }
80
                }
81
            }
82
        }
83
        
84
        
85
        public static function hamming($a, $b)
86
        {
87
            if (!is_string($a)||!is_string($b)||(strlen($a)!==strlen($b))) {
88
                return false;
89
            }
90
            static $distance=0;
91
            static $previous=array();
92
            if (array($a,$b)===$previous) {
93
                return $distance;
94
            }
95
            $previous=array($a,$b);
96
            $a=self::split($a);
97
            $b=self::split($b);
98
            $distance=count(array_diff_assoc($a, $b));
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $array1 of array_diff_assoc() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

98
            $distance=count(array_diff_assoc(/** @scrutinizer ignore-type */ $a, $b));
Loading history...
Bug introduced by
It seems like $b can also be of type false; however, parameter $array2 of array_diff_assoc() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

98
            $distance=count(array_diff_assoc($a, /** @scrutinizer ignore-type */ $b));
Loading history...
99
            return $distance;
100
        }
101
        
102
        public static function dice($a, $b, $round=2)
103
        {
104
            if (!is_string($a)||!is_string($b)) {
105
                return false;
106
            }
107
            if (empty($a)||empty($b)) {
108
                return 0.0;
109
            }
110
            if ($a===$b) {
111
                return 1.0;
112
            }
113
            
114
            static $distance=0;
115
            static $previous=array();
116
            if (array($a,$b)===$previous) {
117
                return $distance;
118
            }
119
            $previous=array($a,$b);
120
            $a=self::split($a, 2);
121
            $b=self::split($b, 2);
122
            $ca=($caGrams=count($a))*2-self::getEndStrLen($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

122
            $ca=($caGrams=count(/** @scrutinizer ignore-type */ $a))*2-self::getEndStrLen($a);
Loading history...
123
            $cb=($cbGrams=count($b))*2-self::getEndStrLen($b);
124
            $distance=round(2*count($caGrams>$cbGrams?array_intersect($a, $b):array_intersect($b, $a))/($ca+$cb), $round);
0 ignored issues
show
Bug introduced by
It seems like $b can also be of type false; however, parameter $array2 of array_intersect() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

124
            $distance=round(2*count($caGrams>$cbGrams?array_intersect($a, /** @scrutinizer ignore-type */ $b):array_intersect($b, $a))/($ca+$cb), $round);
Loading history...
Bug introduced by
It seems like $a can also be of type false; however, parameter $array1 of array_intersect() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

124
            $distance=round(2*count($caGrams>$cbGrams?array_intersect(/** @scrutinizer ignore-type */ $a, $b):array_intersect($b, $a))/($ca+$cb), $round);
Loading history...
125
            return $distance;
126
        }
127
        
128
        private static function getEndStrLen($a)
129
        {
130
            if (function_exists('array_key_last')) {
131
                $end=array_key_last($a);
132
                $end=(isset($end[1]))?0:1;
133
            } else {
134
                $end=end($a);
135
                $end=(isset($end[1]))?0:1;
136
                reset($a);
137
            }
138
            return $end;
139
        }
140
        
141
        public static function levenshtein($a, $b)
142
        {
143
            if (!is_string($a)||!is_string($b)) {
144
                return false;
145
            }
146
            
147
            
148
            static $distance=0;
149
            static $previous=array();
150
            if (array($a,$b)===$previous) {
151
                return $distance;
152
            }
153
            $previous=array($a,$b);
154
            $a=self::split($a);
155
            $b=self::split($b);
156
            $ca = count($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

156
            $ca = count(/** @scrutinizer ignore-type */ $a);
Loading history...
157
            $cb = count($b);
158
            $dis = range(0, $cb);
159
            self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis);
160
161
            return $distance=$dis[$cb];
162
        }
163
        
164
        
165
        public static function levenshteinDamerau($a, $b)
166
        {
167
            if (!is_string($a)||!is_string($b)) {
168
                return false;
169
            }
170
              
171
            static $distance=0;
172
            static $previous=array();
173
            if (array($a,$b)===$previous) {
174
                return $distance;
175
            }
176
            $previous=array($a,$b);
177
            $a=self::split($a);
178
            $b=self::split($b);
179
            $ca = count($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

179
            $ca = count(/** @scrutinizer ignore-type */ $a);
Loading history...
180
            $cb = count($b);
181
            $dis = range(0, $cb);
182
            self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis, true);
183
        
184
            return $distance=$dis[$cb];
185
        }
186
        
187
        private static function BuildLevenshteinCostMatrix($a, $b, $ca, $cb, &$dis, $damerau=false)
188
        {
189
            $dis_new=array();
190
            for ($x=1;$x<=$ca;$x++) {
191
                $dis_new[0]=$x;
192
                for ($y=1;$y<=$cb;$y++) {
193
                    $c = ($a[$x-1] == $b[$y-1])?0:1;
194
                    $dis_new[$y] = min($dis[$y]+1, $dis_new[$y-1]+1, $dis[$y-1]+$c);
195
                    if ($damerau) {
196
                        if ($x > 1 && $y > 1 && $a[$x-1] == $b[$y-2] && $a[$x-2] == $b[$y-1]) {
197
                            $dis_new[$y]= min($dis_new[$y-1], $dis[$y-3] + $c) ;
198
                        }
199
                    }
200
                }
201
                $dis = $dis_new;
202
            }
203
        }
204
    }
205
    
206
    
207
}
208