Completed
Pull Request — master (#3)
by Akpé Aurelle Emmanuel Moïse
01:44
created

commonTextSimilarities::strippedUrl()   A

Complexity

Conditions 5
Paths 3

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 8
rs 9.6111
c 0
b 0
f 0
cc 5
nc 3
nop 2
1
<?php
2
/**
3
*
4
* @Name : similar-text
5
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
6
* @Date : 2019-04-01
7
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
8
* @Repository : https://github.com/manuwhat/similar
9
*
10
**/
11
12
13
namespace EZAMA{
14
    
15
    
16
    class commonTextSimilarities extends similar_text
17
    {
18
        const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol
19
                                            '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username
20
                                            '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password
21
                                            '@)?(?#'. // auth requires @
22
                                            ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND
23
                                            '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR
24
                                            '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'.
25
                                            '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address
26
                                            ')(:\d+)?'. // port
27
                                            ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path
28
                                            '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string
29
                                            '?)?)?'. // path and query string optional
30
                                            '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment
31
                                            '$/i';
32
33
34
35
36
        const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i';
37
        
38
        protected static function isUrl($url, &$getDomain='')
39
        {
40
            $matches=array();
41
            $bool= is_string($url)&&preg_match(self::URL_POSIX_FORMAT, $url)&&preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/;
42
            $getDomain=rtrim($matches[9], '.');
43
            return $bool;
44
        }
45
        
46
        public static function strippedUrl($a, $b)
47
        {
48
            if (self::isUrl($a, $domain)&&is_string($b)) {
49
                return $domain===trim($b);
50
            } elseif (self::isUrl($b, $domain)&&is_string($a)) {
51
                return $domain===trim($a);
52
            } else {
53
                return false;
54
            }
55
        }
56
        
57
        public static function areAnagrams($a, $b)
58
        {
59
            return  self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true;
60
        }
61
        
62
        public static function similarButNotEqual($a, $b)
63
        {
64
            return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['equal'] === false;
65
        }
66
        
67
        public static function aIsSuperStringOfB($a, $b)
68
        {
69
            if (strlen($a)>strlen($b)) {
70
                return   self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0;
71
            } else {
72
                return false;
73
            }
74
        }
75
        
76
        public static function haveSameRoot($a, $b)
77
        {
78
            return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/;
79
        }
80
        
81
        public static function areStems($a, $b)
82
        {
83
            if (!is_string($a) || !is_string($b)) {
84
                return false;
85
            }
86
            
87
            $a = self::getParts(self::strtolower($a));
88
            $b = self::getParts(self::strtolower($b));
89
            foreach ($a as $index=>$word) {
90
                if (!self::haveSameRoot($word, $b[$index])) {
91
                    return false;
92
                }
93
            }
94
            return true;
95
        }
96
        
97
        public static function wordsReorderOccured($a, $b, $considerPunctuation=true)
98
        {
99
            $filter=function ($v) use ($considerPunctuation) {
100
                return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v);
101
            };
102
            return self::similarText($a, $b, 2, true, $check, true) &&is_array($check) &&self::wro_filter($check, $filter)?true :false;
103
        }
104
        
105
        private static function wro_filter($check, $filter)
106
        {
107
            return  empty(array_filter($check['a-b'], $filter)) && empty(array_filter($check['b-a'], $filter)) &&$check['substr'] &&!$check['equal'];
108
        }
109
        
110
        public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true)
111
        {
112
            $filter=function ($v) use ($considerSpace) {
113
                return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v);
114
            };
115
            if (!is_string($a) || !is_string($b)) {
116
                return false;
117
            }
118
            self::filter($a, $b, $filter, $insensitive);
119
            return empty(array_diff($a, $b));
120
        }
121
        
122
        
123
        public static function acronymOrExpanded($a, $b)
124
        {
125
            if (!is_string($a) || !is_string($b)) {
126
                return false;
127
            }
128
            $filter=function ($v) {
129
                return !(ctype_space($v)||ctype_punct($v));
130
            };
131
            
132
            self::filter($a, $b, $filter, true);
133
            return self::aoeStemming($a, $b);
134
        }
135
        
136
        private static function aoeStemming($a, $b)
137
        {
138
            foreach ($a as $index=>$word) {
139
                if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) {
140
                    return false;
141
                }
142
            }
143
            return true;
144
        }
145
        
146
        public static function wordsAddedOrRemoved($a, $b)
147
        {
148
            if (!is_string($a) || !is_string($b)) {
149
                return false;
150
            }
151
            $filter=function ($v) {
152
                return !(ctype_space($v));
153
            };
154
            self::filter($a, $b, $filter, true);
155
            return self::waorDiff($a, $b, count($a), count($b));
156
        }
157
        
158
        private static function filter(&$a, &$b, $filter, $insensitive=true)
159
        {
160
            if ($insensitive) {
161
                $a = array_filter(self::getParts(self::strtolower($a)), $filter);
162
                $b = array_filter(self::getParts(self::strtolower($b)), $filter);
163
            } else {
164
                $a = array_filter(self::getParts(self::split($a)), $filter);
165
                $b = array_filter(self::getParts(self::split($b)), $filter);
166
            }
167
        }
168
        
169
        private static function waorDiff($a, $b, $ca, $cb)
170
        {
171
            return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a)));
172
        }
173
    }
174
}
175