Passed
Push — master ( 2433d5...5c4556 )
by Akpé Aurelle Emmanuel Moïse
47s queued 10s
created

similar_text::capturePartsWithLength()   A

Complexity

Conditions 5
Paths 6

Size

Total Lines 17
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 17
rs 9.5222
c 0
b 0
f 0
cc 5
nc 6
nop 5
1
<?php
2
/**
3
*
4
* @Name : similar-text
5
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
6
* @Date : 2019-04-01
7
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
8
* @Repository : https://github.com/manuwhat/similar
9
*
10
**/
11
12
13
namespace EZAMA{
14
        
15
    class similar_text
16
    {
17
        private function __construct()
18
        {
19
        }
20
        public static function similarText($a, $b, $round = 2, $insensitive = true, &$stats = false, $getParts = false, $checkposition = false)
21
        {
22
            if (!is_string($a) || !is_string($b)) {
23
                return false;
24
            }
25
            if ($insensitive) {
26
                $a = self::strtolower($a);
27
                $b = self::strtolower($b);
28
            } else {
29
                $a = self::split($a);
30
                $b = self::split($b);
31
            }
32
            /* prevent bad types and useless memory usage due to for example array instead of simple boolean */
33
            unset($insensitive);
34
            $getParts = (bool) $getParts;
35
            /*  ******************************************************************************************** */
36
            $ca = count($a);
37
            $cb = count($b);
38
            if ($ca < $cb) {
39
                $stats = self::getStats($cb, $a, self::_check($a, $b, $getParts, $round, $checkposition), $getParts, $round);
40
            } else {
41
                $stats = self::getStats($ca, $b, self::_check($b, $a, $getParts, $round, $checkposition), $getParts, $round);
42
            }
43
            return $stats['similar'];
44
        }
45
        
46
        protected static function _check($a, $b, $getParts, $round, $checkposition = false)
47
        {
48
            $diff = array();
49
            if ($getParts) {
50
                $diff[] = array_diff($a, $b);
51
                $diff[] = array_diff($b, $a);
52
            }
53
            $diff[] = $checkposition ?array_intersect_assoc($a, $b) : array_intersect($a, $b);
54
            $diff[] = round(count(array_intersect(self::getParts($a, $c), self::getParts($b))) / $c * 100, $round);
55
            $diff[] = $a === $b;
56
            return $diff;
57
        }
58
        
59
        protected static function getStats($ca, $b, $diff, $getParts, $round)
60
        {
61
            $stats = array();
62
            if ($getParts) {
63
                $stats['similar'] = round(count($diff[2]) * 100 / $ca, $round);
64
                $stats['substr'] = $diff[3];
65
                $stats['contain'] = ($diff[2] === $b) ?true:false;
66
                $stats['equal'] = $diff[4];
67
                $stats['a-b'] = $diff[0];
68
                $stats['b-a'] = $diff[1];
69
                $stats['a&b'] = $diff[2];
70
            } else {
71
                $stats['similar'] = round(count($diff[0]) * 100 / $ca, $round);
72
                $stats['substr'] = $diff[1];
73
                $stats['contain'] = ($diff[0] === $b) ?true:false;
74
                $stats['equal'] = $diff[2];
75
            }
76
            return $stats;
77
        }
78
79
        protected static function getParts($b, &$c = 0, $lengthCapture = false)
80
        {
81
            $parts = array();
82
            $tmp = '';
83
            $c = 0;
84
            $length = 0;
85
            $lengthCapture = (bool) $lengthCapture;
86
            if ($lengthCapture) {
87
                self::capturePartsWithLength($b, $length, $tmp, $c, $parts);
88
            } else {
89
                self::capturePartsWithoutLength($b, $tmp, $c, $parts);
90
            }
91
            return $parts;
92
        }
93
        
94
        private static function capturePartsWithoutLength(&$b, $tmp, &$c, &$parts)
95
        {
96
            foreach ($b as $k=>$v) {
97
                if (ctype_space($v) || ctype_punct($v)) {
98
                    $parts[] = $tmp;
99
                    $parts[] = $v;
100
                    $c += 2;
101
                    $tmp = '';
102
                    continue;
103
                }
104
                $tmp .= $v;
105
            }
106
            if (!empty($tmp)) {
107
                $parts[] = $tmp;
108
                $c++;
109
            }
110
        }
111
        
112
        private static function capturePartsWithLength(&$b, $length, $tmp, &$c, &$parts)
113
        {
114
            foreach ($b as $k=>$v) {
115
                $length++;
116
                if (ctype_space($v) || ctype_punct($v)) {
117
                    $parts[] = array($tmp, $length - 1);
118
                    $parts[] = array($v, 1);
119
                    $c += 2;
120
                    $tmp = '';
121
                    $length = 0;
122
                    continue;
123
                }
124
                $tmp .= $v;
125
            }
126
            if (!empty($tmp)) {
127
                $parts[] = array($tmp, $length);
128
                $c++;
129
            }
130
        }
131
        
132
        
133
        
134
        protected static function is_ascii($str)
135
        {
136
            if ('' === $str) {
137
                return true;
138
            }
139
140
            return !preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
141
        }
142
        
143
        protected static function strtolower($str)
144
        {
145
            $split = self::split($str);
146
            if (is_array($split)) {
147
                return
148
                    array_map(
149
                        function($val) {
150
                            if (self::is_ascii($val)) {
151
                                return strtolower($val);
152
                            }
153
                            return $val;
154
                        },
155
                        $split
156
                )
157
        
158
                            ;
159
            } else {
160
                return array();
161
            }
162
        }
163
        
164
        protected static function split($str, $grams = false)
165
        {
166
            if (!is_string($str)) {
167
                return array();
168
            }
169
            static $split = [];
170
            static $old = '';
171
            static $oldGrams = 1;
172
            $grams = is_int($grams) && $grams >= 1 && $grams <= strlen($str) ? $grams : false;
173
            return self::getSplit($str, $split, $old, $oldGrams, $grams);
174
        }
175
        
176
        private static function _split(&$str, &$split, &$old, &$oldGrams, $grams)
177
        {
178
            $old = $str;
179
            $oldGrams = $grams;
180
            $split = !$grams ? preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY) : preg_split('/(.{'.$grams.'})/su', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
181
            return $split;
182
        }
183
        
184
        private static function getSplit(&$str, &$split, &$old, &$oldGrams, $grams)
185
        {
186
            if ($old === $str && $oldGrams === $grams) {
187
                return $split;
188
            } else {
189
                return self::_split($str, $split, $old, $oldGrams, $grams);
190
            }
191
        }
192
    }
193
    
194
}
195