Passed
Pull Request — master (#10)
by Akpé Aurelle Emmanuel Moïse
01:39
created

distance::dice()   B

Complexity

Conditions 7
Paths 5

Size

Total Lines 21
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 14
nc 5
nop 3
dl 0
loc 21
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
/**
4
*
5
* @Name : similar-text
6
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
7
* @Date : 2019-04-01
8
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
9
* @Repository : https://github.com/manuwhat/similar
10
*
11
**/
12
13
14
namespace EZAMA{
15
    class distance extends complexCommonTextSimilarities
16
    {
17
        public static function jaroWinkler($a, $b, $round=2)
18
        {
19
            if (!is_string($a)||!is_string($b)) {
20
                return false;
21
            }
22
            static $distance=array();
23
            static $previous=array();
24
            if (array($a,$b)===$previous) {
25
                return $distance;
26
            }
27
            $previous=array($a,$b);
28
            return self::getJWDistance($a, $b, $distance, $round);
29
        }
30
        
31
        
32
        
33
        private static function getJWDistance(&$a, &$b, &$distance, $round)
34
        {
35
            extract(self::prepareJaroWinkler($a, $b));
36
            for ($i=0,$min=min(count($a), count($b)),$t=0;$i<$min;$i++) {
37
                if ($a[$i]!==$b[$i]) {
38
                    $t++;
39
                }
40
            }
41
            $t/=2;
42
            $distance['jaro']=1/3*($corresponding/$ca+$corresponding/$cb+($corresponding-$t)/$corresponding);
43
            $distance['jaro-winkler']=$distance['jaro']+(min($longCommonSubstr, 4)*0.1*(1-$distance['jaro']));
44
            $distance=array_map(function ($v) use ($round) {
45
                return round($v, $round);
46
            }, $distance);
47
            
48
            return $distance;
49
        }
50
        
51
        private static function prepareJaroWinkler(&$a, &$b)
52
        {
53
            $a=self::split($a);
54
            $b=self::split($b);
55
            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count($a),'cb'=>count($b));
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

55
            $transpositions=array('a'=>array(),'b'=>array(),'corresponding'=>0,'longCommonSubstr'=>0,'ca'=>count(/** @scrutinizer ignore-type */ $a),'cb'=>count($b));
Loading history...
56
            $Δ=max($transpositions['ca'], $transpositions['cb'])/2-1;
57
            self::jwMatches($a, $b, $transpositions, $Δ);
58
            ksort($transpositions['a']);
59
            ksort($transpositions['b']);
60
            $transpositions['a']=array_values($transpositions['a']);
61
            $transpositions['b']=array_values($transpositions['b']);
62
            return $transpositions;
63
        }
64
        
65
        private static function jwMatches(&$a, &$b, &$transpositions, $Δ)
66
        {
67
            foreach ($a as $ind=>$chr) {
68
                foreach ($b as $index=>$char) {
69
                    self::_jwMatches($chr, $char, $index, $ind, $transpositions, $Δ);
70
                }
71
            }
72
        }
73
        
74
        private static function _jwMatches($chr, $char, $index, $ind, &$transpositions, $Δ)
75
        {
76
            if ($chr===$char&&(abs($index-$ind)<=$Δ)) {
77
                if ($ind!==$index) {
78
                    $transpositions['a'][$ind]=$chr;
79
                    $transpositions['b'][$index]=$char;
80
                } else {
81
                    if ($ind-1<=$transpositions['longCommonSubstr']) {
82
                        $transpositions['longCommonSubstr']++;
83
                    }
84
                }
85
                $transpositions['corresponding']++;
86
            }
87
        }
88
        
89
        
90
        public static function hamming($a, $b)
91
        {
92
            if (!is_string($a)||!is_string($b)||(strlen($a)!==strlen($b))) {
93
                return false;
94
            }
95
            static $distance=0;
96
            static $previous=array();
97
            if (array($a,$b)===$previous) {
98
                return $distance;
99
            }
100
            $previous=array($a,$b);
101
            $a=self::split($a);
102
            $b=self::split($b);
103
            $distance=count(array_diff_assoc($a, $b));
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $array1 of array_diff_assoc() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

103
            $distance=count(array_diff_assoc(/** @scrutinizer ignore-type */ $a, $b));
Loading history...
Bug introduced by
It seems like $b can also be of type false; however, parameter $array2 of array_diff_assoc() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

103
            $distance=count(array_diff_assoc($a, /** @scrutinizer ignore-type */ $b));
Loading history...
104
            return $distance;
105
        }
106
        
107
        public static function dice($a, $b, $round=2)
108
        {
109
            if (!is_string($a)||!is_string($b)) {
110
                return false;
111
            }
112
            if (empty($a)||empty($b)) {
113
                return 0.0;
114
            }
115
            if ($a===$b) {
116
                return 1.0;
117
            }
118
            
119
            static $distance=0;
120
            static $previous=array();
121
            if (array($a,$b)===$previous) {
122
                return $distance;
123
            }
124
            $previous=array($a,$b);
125
            $a=self::split($a, 2);
126
            $b=self::split($b, 2);
127
            return self::getDiceDistance($distance, $a, $b, $round);
0 ignored issues
show
Bug introduced by
Are you sure the usage of self::getDiceDistance($distance, $a, $b, $round) targeting EZAMA\distance::getDiceDistance() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
128
        }
129
        
130
        private static function getDiceDistance(&$distance, &$a, &$b, $round)
131
        {
132
            $ca=($caGrams=count($a))*2-self::getEndStrLen($a);
133
            $cb=($cbGrams=count($b))*2-self::getEndStrLen($b);
134
            $distance=round(2*count($caGrams>$cbGrams?array_intersect($a, $b):array_intersect($b, $a))/($ca+$cb), $round);
135
        }
136
        
137
        private static function getEndStrLen($a)
138
        {
139
            if (function_exists('array_key_last')) {
140
                $end=array_key_last($a);
141
                $end=(isset($end[1]))?0:1;
142
            } else {
143
                $end=end($a);
144
                $end=(isset($end[1]))?0:1;
145
                reset($a);
146
            }
147
            return $end;
148
        }
149
        
150
        public static function levenshtein($a, $b)
151
        {
152
            if (!is_string($a)||!is_string($b)) {
153
                return false;
154
            }
155
            
156
            
157
            static $distance=0;
158
            static $previous=array();
159
            if (array($a,$b)===$previous) {
160
                return $distance;
161
            }
162
            $previous=array($a,$b);
163
            $a=self::split($a);
164
            $b=self::split($b);
165
            $ca = count($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

165
            $ca = count(/** @scrutinizer ignore-type */ $a);
Loading history...
166
            $cb = count($b);
167
            $dis = range(0, $cb);
168
            self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis);
169
170
            return $distance=$dis[$cb];
171
        }
172
        
173
        
174
        public static function levenshteinDamerau($a, $b)
175
        {
176
            if (!is_string($a)||!is_string($b)) {
177
                return false;
178
            }
179
              
180
            static $distance=0;
181
            static $previous=array();
182
            if (array($a,$b)===$previous) {
183
                return $distance;
184
            }
185
            $previous=array($a,$b);
186
            $a=self::split($a);
187
            $b=self::split($b);
188
            $ca = count($a);
0 ignored issues
show
Bug introduced by
It seems like $a can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

188
            $ca = count(/** @scrutinizer ignore-type */ $a);
Loading history...
189
            $cb = count($b);
190
            $dis = range(0, $cb);
191
            self::BuildLevenshteinCostMatrix($a, $b, $ca, $cb, $dis, true);
192
        
193
            return $distance=$dis[$cb];
194
        }
195
        
196
        private static function BuildLevenshteinCostMatrix($a, $b, $ca, $cb, &$dis, $damerau=false)
197
        {
198
            $dis_new=array();
199
            for ($x=1;$x<=$ca;$x++) {
200
                $dis_new[0]=$x;
201
                for ($y=1;$y<=$cb;$y++) {
202
                    self::costMatrix($a, $b, $dis_new, $dis, $damerau, $x, $y);
203
                }
204
                $dis = $dis_new;
205
            }
206
        }
207
        
208
        private static function costMatrix(&$a, &$b, &$dis_new, &$dis, $damerau, $x, $y)
209
        {
210
            $c = ($a[$x-1] == $b[$y-1])?0:1;
211
            $dis_new[$y] = min($dis[$y]+1, $dis_new[$y-1]+1, $dis[$y-1]+$c);
212
            if ($damerau) {
213
                if ($x > 1 && $y > 1 && $a[$x-1] == $b[$y-2] && $a[$x-2] == $b[$y-1]) {
214
                    $dis_new[$y]= min($dis_new[$y-1], $dis[$y-3] + $c) ;
215
                }
216
            }
217
        }
218
    }
219
    
220
    
221
}
222