Passed
Push — master ( 8fb280...5e82f4 )
by Akpé Aurelle Emmanuel Moïse
31s
created

complexCommonTextSimilarities   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 119
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 63
dl 0
loc 119
rs 9.68
c 0
b 0
f 0
wmc 34

9 Methods

Rating   Name   Duplication   Size   Complexity  
A areStems() 0 14 5
A aoeStemming() 0 8 5
A filter() 0 8 2
A wordsAddedOrRemoved() 0 10 3
A strippedUrl() 0 8 5
A waorDiff() 0 3 2
A acronymOrExpanded() 0 11 4
A punctuactionChangesOccured() 0 10 5
A isUrl() 0 6 3
1
<?php
2
/**
3
*
4
* @Name : similar-text
5
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
6
* @Date : 2019-04-01
7
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
8
* @Repository : https://github.com/manuwhat/similar
9
*
10
**/
11
12
13
namespace EZAMA{
14
    
15
    class complexCommonTextSimilarities extends simpleCommonTextSimilarities
16
    {
17
        const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'.// protocol
18
                                            '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'.// username
19
                                            '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'.// password
20
                                            '@)?(?#'.// auth requires @
21
                                            ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'.// domain segments AND
22
                                            '[a-z][a-z0-9-]*[a-z0-9]'.// top level domain OR
23
                                            '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'.
24
                                            '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'.// IP address
25
                                            ')(:\d+)?'.// port
26
                                            ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'.// path
27
                                            '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'.// query string
28
                                            '?)?)?'.// path and query string optional
29
                                            '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'.// fragment
30
                                            '$/i';
31
32
33
34
35
        const URL_POSIX_FORMAT = '"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i';
36
        
37
        protected static function isUrl($url, &$getDomain = '')
38
        {
39
            $matches = array();
40
            $bool = is_string($url) && preg_match(self::URL_POSIX_FORMAT, $url) && preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/;
41
            $getDomain = rtrim($matches[9], '.');
42
            return $bool;
43
        }
44
        
45
        public static function strippedUrl($a, $b)
46
        {
47
            if (self::isUrl($a, $domain) && is_string($b)) {
48
                return $domain === trim($b);
49
            } elseif (self::isUrl($b, $domain) && is_string($a)) {
50
                return $domain === trim($a);
51
            } else {
52
                return false;
53
            }
54
        }
55
        public static function areStems($a, $b)
56
        {
57
            if (!is_string($a) || !is_string($b)) {
58
                return false;
59
            }
60
            
61
            $a = self::getParts(self::strtolower($a));
62
            $b = self::getParts(self::strtolower($b));
63
            foreach ($a as $index=>$word) {
64
                if (!self::haveSameRoot($word, $b[$index])) {
65
                    return false;
66
                }
67
            }
68
            return true;
69
        }
70
        
71
        public static function wordsAddedOrRemoved($a, $b)
72
        {
73
            if (!is_string($a) || !is_string($b)) {
74
                return false;
75
            }
76
            $filter = function($v) {
77
                return !(ctype_space($v));
78
            };
79
            self::filter($a, $b, $filter, true);
80
            return self::waorDiff($a, $b, count($a), count($b));
81
        }
82
        
83
        private static function filter(&$a, &$b, $filter, $insensitive = true)
84
        {
85
            if ($insensitive) {
86
                $a = array_filter(self::getParts(self::strtolower($a)), $filter);
87
                $b = array_filter(self::getParts(self::strtolower($b)), $filter);
88
            } else {
89
                $a = array_filter(self::getParts(self::split($a)), $filter);
90
                $b = array_filter(self::getParts(self::split($b)), $filter);
91
            }
92
        }
93
        
94
        private static function waorDiff($a, $b, $ca, $cb)
95
        {
96
            return (bool) (($ca > $cb) ?array_diff_assoc(array_values($a), array_values($b)) : array_diff_assoc(array_values($b), array_values($a)));
97
        }
98
        
99
        
100
        public static function punctuactionChangesOccured($a, $b, $insensitive = true, $considerSpace = true)
101
        {
102
            $filter = function($v) use ($considerSpace) {
103
                return $considerSpace ? !(ctype_space($v) || ctype_punct($v)) : !ctype_punct($v);
104
            };
105
            if (!is_string($a) || !is_string($b)) {
106
                return false;
107
            }
108
            self::filter($a, $b, $filter, $insensitive);
109
            return empty(array_diff($a, $b));
110
        }
111
        
112
        
113
        public static function acronymOrExpanded($a, $b)
114
        {
115
            if (!is_string($a) || !is_string($b)) {
116
                return false;
117
            }
118
            $filter = function($v) {
119
                return !(ctype_space($v) || ctype_punct($v));
120
            };
121
            
122
            self::filter($a, $b, $filter, true);
123
            return self::aoeStemming($a, $b);
124
        }
125
        
126
        private static function aoeStemming($a, $b)
127
        {
128
            foreach ($a as $index=>$word) {
129
                if (!self::haveSameRoot($word, $b[$index]) || (isset($a[$index][2]) && isset($b[$index][2]))) {
130
                    return false;
131
                }
132
            }
133
            return true;
134
        }
135
    }
136
}
137