Passed
Pull Request — master (#3)
by Akpé Aurelle Emmanuel Moïse
01:34
created

commonTextSimilarities::strippedUrl()   A

Complexity

Conditions 5
Paths 3

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 8
rs 9.6111
c 0
b 0
f 0
cc 5
nc 3
nop 2
1
<?php
2
/**
3
*
4
* @Name : similar-text
5
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
6
* @Date : 2019-04-01
7
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
8
* @Repository : https://github.com/manuwhat/similar
9
*
10
**/
11
12
13
namespace EZAMA{
14
    
15
    
16
    class commonTextSimilarities extends similar_text
17
    {
18
        const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol
19
                                            '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username
20
                                            '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password
21
                                            '@)?(?#'. // auth requires @
22
                                            ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND
23
                                            '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR
24
                                            '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'.
25
                                            '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address
26
                                            ')(:\d+)?'. // port
27
                                            ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path
28
                                            '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string
29
                                            '?)?)?'. // path and query string optional
30
                                            '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment
31
                                            '$/i';
32
33
34
35
36
        const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i';
37
        
38
        protected static function isUrl($url, &$getDomain='')
39
        {
40
            $matches=array();
41
            $bool= is_string($url)&&preg_match(self::URL_POSIX_FORMAT, $url)&&preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/;
42
            $getDomain=rtrim($matches[9], '.');
43
            return $bool;
44
        }
45
        
46
        public static function strippedUrl($a, $b)
47
        {
48
            if (self::isUrl($a, $domain)&&is_string($b)) {
49
                return $domain===trim($b);
50
            } elseif (self::isUrl($b, $domain)&&is_string($a)) {
51
                return $domain===trim($a);
52
            } else {
53
                return false;
54
            }
55
        }
56
        
57
        public static function areAnagrams($a, $b)
58
        {
59
            return  self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true;
60
        }
61
        
62
        public static function similarButNotEqual($a, $b)
63
        {
64
            return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['equal'] === false;
65
        }
66
        
67
        public static function aIsSuperStringOfB($a, $b)
68
        {
69
            if (strlen($a)>strlen($b)) {
70
                return   self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0;
71
            } else {
72
                return false;
73
            }
74
        }
75
        
76
        public static function haveSameRoot($a, $b)
77
        {
78
            return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/;
79
        }
80
        
81
        public static function areStems($a, $b)
82
        {
83
            if (!is_string($a) || !is_string($b)) {
84
                return false;
85
            }
86
            
87
            $a = self::getParts(self::strtolower($a));
88
            $b = self::getParts(self::strtolower($b));
89
            foreach ($a as $index=>$word) {
90
                if (!self::haveSameRoot($word, $b[$index])) {
91
                    return false;
92
                }
93
            }
94
            return true;
95
        }
96
        
97
        public static function wordsReorderOccured($a, $b, $considerPunctuation=true)
98
        {
99
            $filter=function ($v) use ($considerPunctuation) {
100
                return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v);
101
            };
102
            return
103
                    self::similarText($a, $b, 2, true, $check, true) &&
104
                    is_array($check) &&
105
                    empty(array_filter($check['a-b'], $filter)) &&
106
                    empty(array_filter($check['b-a'], $filter)) &&
107
                    $check['substr'] &&
108
                    !$check['equal']
109
                    ?true
110
                    :false;
111
        }
112
        
113
        public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true)
114
        {
115
            $filter=function ($v) use ($considerSpace) {
116
                return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v);
117
            };
118
            if (!is_string($a) || !is_string($b)) {
119
                return false;
120
            }
121
            self::filter($a, $b, $filter, $insensitive);
122
            return empty(array_diff($a, $b));
123
        }
124
        
125
        
126
        public static function acronymOrExpanded($a, $b)
127
        {
128
            if (!is_string($a) || !is_string($b)) {
129
                return false;
130
            }
131
            $filter=function ($v) {
132
                return !(ctype_space($v)||ctype_punct($v));
133
            };
134
            
135
            self::filter($a, $b, $filter, true);
136
            return self::aoeStemming($a, $b);
137
        }
138
        
139
        private static function aoeStemming($a, $b)
140
        {
141
            foreach ($a as $index=>$word) {
142
                if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) {
143
                    return false;
144
                }
145
            }
146
            return true;
147
        }
148
        
149
        public static function wordsAddedOrRemoved($a, $b)
150
        {
151
            if (!is_string($a) || !is_string($b)) {
152
                return false;
153
            }
154
            $filter=function ($v) {
155
                return !(ctype_space($v));
156
            };
157
            self::filter($a, $b, $filter, true);
158
            return self::waorDiff($a, $b, count($a), count($b));
159
        }
160
        
161
        private static function filter(&$a, &$b, $filter, $insensitive=true)
162
        {
163
            if ($insensitive) {
164
                $a = array_filter(self::getParts(self::strtolower($a)), $filter);
165
                $b = array_filter(self::getParts(self::strtolower($b)), $filter);
166
            } else {
167
                $a = array_filter(self::getParts(self::split($a)), $filter);
168
                $b = array_filter(self::getParts(self::split($b)), $filter);
169
            }
170
        }
171
        
172
        private static function waorDiff($a, $b, $ca, $cb)
173
        {
174
            return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a)));
175
        }
176
    }
177
}
178