commonTextSimilarities - Code Metrics - Inspection of "common text similarities implemented" - manuwhat/similar-text - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#2)

by Akpé Aurelle Emmanuel Moïse

created 2019-04-21 11:56 UTC

commonTextSimilarities C

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	149
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	53
eloc	81
dl	0
loc	149
rs	6.96
c	0
b	0
f	0

11 Methods

Rating	Name	Size	Complexity
A	isUrl()	5	3
A	similarButNotEqual()	3	3
A	wordsAddedOrRemoved()	14	4
B	acronymOrExpanded()	17	8
A	areStems()	14	5
A	haveSameRoot()	3	3
A	strippedUrl()	8	5
B	wordsReorderOccured()	14	9
A	punctuactionChangesOccured()	17	6
A	aIsSuperStringOfB()	6	4
A	areAnagrams()	3	3

How to fix Complexity

<?php
/**
*
* @Name : similar-text
* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
* @Date : 2019-04-01
* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
* @Repository : https://github.com/manuwhat/similar
*
**/


namespace EZAMA{
    
    
    class commonTextSimilarities extends similar_text
    {
        const URL_FORMAT_EXTENDED_PATTERN = '/^((https?|ftps?|file):\/\/){0,1}'. // protocol
                                            '(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+'. // username
                                            '(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]|%[0-9a-f]{2})+)?'. // password
                                            '@)?(?#'. // auth requires @
                                            ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'. // domain segments AND
                                            '[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR
                                            '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'.
                                            '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'. // IP address
                                            ')(:\d+)?'. // port
                                            ')(((\/+([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)*'. // path
                                            '(\?([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)'. // query string
                                            '?)?)?'. // path and query string optional
                                            '(#([a-z0-9$_\.\+!\*\'\(\),;:@&=-]|%[0-9a-f]{2})*)?'. // fragment
                                            '$/i';




        const URL_POSIX_FORMAT='"^(\b(https?|ftps?|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]$"i';
        
        protected static function isUrl($url, &$getDomain='')
        {
            $bool= is_string($url)&&preg_match(self::URL_POSIX_FORMAT, $url)&&preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/*?true:false*/;
            $getDomain=rtrim($matches[9], '.');

            return $bool;
        }
        
        public static function strippedUrl($a, $b)
        {
            if (self::isUrl($a, $domain)&&is_string($b)) {
                return $domain===trim($b);
            } elseif (self::isUrl($b, $domain)&&is_string($a)) {
                return $domain===trim($a);
            } else {
                return false;
            }
        }
        
        public static function areAnagrams($a, $b)
        {
            return  self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true;
        }
        
        public static function similarButNotEqual($a, $b)
        {
            return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['equal'] === false;
        }
        
        public static function aIsSuperStringOfB($a, $b)
        {
            if (strlen($a)>strlen($b)) {
                return   self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0;
            } else {
                return false;
            }
        }
        
        public static function haveSameRoot($a, $b)
        {
            return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/*?true:false*/;
        }
        
        public static function areStems($a, $b)
        {
            if (!is_string($a) || !is_string($b)) {
                return false;
            }
            
            $a = self::getParts(self::strtolower($a));
            $b = self::getParts(self::strtolower($b));
            foreach ($a as $index=>$word) {
                if (!self::haveSameRoot($word, $b[$index])) {
                    return false;
                }
            }
            return true;
        }
        
        public static function wordsReorderOccured($a, $b, $considerPunctuation=true)
        {
            $filter=function ($v) use ($considerPunctuation) {
                return $considerPunctuation?!(ctype_space($v)||ctype_punct($v)):!ctype_space($v);
            };
            return
                    self::similarText($a, $b, 2, true, $check, true) &&
                    is_array($check) &&
                    empty(array_filter($check['a-b'], $filter)) &&
                    empty(array_filter($check['b-a'], $filter)) &&
                    $check['substr'] &&
                    !$check['equal']
                    ?true
                    :false;
        }
        
        public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true)
        {
            $filter=function ($v) use ($considerSpace) {
                return $considerSpace?!(ctype_space($v)||ctype_punct($v)):!ctype_punct($v);
            };
            if (!is_string($a) || !is_string($b)) {
                return false;
            }
            if ($insensitive) {
                $a = array_filter(self::getParts(self::strtolower($a)), $filter);
                $b = array_filter(self::getParts(self::strtolower($b)), $filter);
            } else {
                $a = array_filter(self::getParts(self::split($a)), $filter);
                $b = array_filter(self::getParts(self::split($b)), $filter);
            }
            
            return empty(array_diff($a, $b));
        }
        
        
        public static function acronymOrExpanded($a, $b)
        {
            if (!is_string($a) || !is_string($b)) {
                return false;
            }
            $filter=function ($v) {
                return !(ctype_space($v)||ctype_punct($v));
            };
            
            $a = array_filter(self::getParts(self::strtolower($a)), $filter);
            $b = array_filter(self::getParts(self::strtolower($b)), $filter);
            foreach ($a as $index=>$word) {
                if (!self::haveSameRoot($word, $b[$index])||(isset($a[$index][2])&&isset($b[$index][2]))) {
                    return false;
                }
            }
            return true;
        }
        
        public static function wordsAddedOrRemoved($a, $b)
        {
            if (!is_string($a) || !is_string($b)) {
                return false;
            }
            $filter=function ($v) {
                return !(ctype_space($v));
            };
            
            $a = array_filter(self::getParts(self::strtolower($a)), $filter);
            $b = array_filter(self::getParts(self::strtolower($b)), $filter);
			$ca=count($a);
			$cb=count($b);
            return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a)));
        }
    }
}


1			<?php
2			/**
3			*
4			* @Name : similar-text
5			* @Programmer : Akpé Aurelle Emmanuel Moïse Zinsou
6			* @Date : 2019-04-01
7			* @Released under : https://github.com/manuwhat/similar-text/blob/master/LICENSE
8			* @Repository : https://github.com/manuwhat/similar
9			*
10			**/
11
12
13			namespace EZAMA{
14
15
16			class commonTextSimilarities extends similar_text
17			{
18			const URL_FORMAT_EXTENDED_PATTERN = '/^((https?\|ftps?\|file):\/\/){0,1}'. // protocol
19			'(([a-z0-9$_\.\+!\*\'\(\),;\?&=-]\|%[0-9a-f]{2})+'. // username
20			'(:([a-z0-9$_\.\+!\*\'\(\),;\?&=-]\|%[0-9a-f]{2})+)?'. // password
21			'@)?(?#'. // auth requires @
22			')((([a-z0-9]\.\|[a-z0-9][a-z0-9-][a-z0-9]\.)'. // domain segments AND
23			'[a-z][a-z0-9-]*[a-z0-9]'. // top level domain OR
24			'\|((\d\|[1-9]\d\|1\d{2}\|2[0-4][0-9]\|25[0-5])\.){3}'.
25			'(\d\|[1-9]\d\|1\d{2}\|2[0-4][0-9]\|25[0-5])'. // IP address
26			')(:\d+)?'. // port
27			')(((\/+([a-z0-9$_\.\+!\\'\(\),;:@&=-]\|%[0-9a-f]{2}))*'. // path
28			'(\?([a-z0-9$_\.\+!\\'\(\),;:@&=-]\|%[0-9a-f]{2}))'. // query string
29			'?)?)?'. // path and query string optional
30			'(#([a-z0-9$_\.\+!\\'\(\),;:@&=-]\|%[0-9a-f]{2}))?'. // fragment
31			'$/i';
32
33
34
35
36			const URL_POSIX_FORMAT='"^(\b(https?\|ftps?\|file):\/\/)?[-A-Za-z0-9+&@#/%?=~_\|!:,.;]+[-A-Za-z0-9+&@#\/%=~_\|]$"i';
37
38			protected static function isUrl($url, &$getDomain='')
39			{
40			$bool= is_string($url)&&preg_match(self::URL_POSIX_FORMAT, $url)&&preg_match(self::URL_FORMAT_EXTENDED_PATTERN, $url, $matches)/?true:false/;
41			$getDomain=rtrim($matches[9], '.');
			0 ignored issues – show Comprehensibility Best Practice introduced 2019-04-21 11:58 UTC by Report Bug Copy Issue Report The variable `$matches` does not seem to be defined for all execution paths leading up to this point. Loading history...
42			return $bool;
43			}
44
45			public static function strippedUrl($a, $b)
46			{
47			if (self::isUrl($a, $domain)&&is_string($b)) {
48			return $domain===trim($b);
49			} elseif (self::isUrl($b, $domain)&&is_string($a)) {
50			return $domain===trim($a);
51			} else {
52			return false;
53			}
54			}
55
56			public static function areAnagrams($a, $b)
57			{
58			return self::similarText($a, $b, 2, true, $check)&&$check['similar'] === 100.0 && $check['contain'] === true;
59			}
60
61			public static function similarButNotEqual($a, $b)
62			{
63			return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['equal'] === false;
64			}
65
66			public static function aIsSuperStringOfB($a, $b)
67			{
68			if (strlen($a)>strlen($b)) {
69			return self::similarText($a, $b, 2, true, $check) && is_array($check) && $check['substr'] === 100.0;
70			} else {
71			return false;
72			}
73			}
74
75			public static function haveSameRoot($a, $b)
76			{
77			return self::similarText($a, $b, 2, true, $check, true, true) && is_array($check)&&range(0, count($check['a&b'])-1)===array_keys($check['a&b'])/?true:false/;
78			}
79
80			public static function areStems($a, $b)
81			{
82			if (!is_string($a) \|\| !is_string($b)) {
83			return false;
84			}
85
86			$a = self::getParts(self::strtolower($a));
87			$b = self::getParts(self::strtolower($b));
88			foreach ($a as $index=>$word) {
89			if (!self::haveSameRoot($word, $b[$index])) {
90			return false;
91			}
92			}
93			return true;
94			}
95
96			public static function wordsReorderOccured($a, $b, $considerPunctuation=true)
97			{
98			$filter=function ($v) use ($considerPunctuation) {
99			return $considerPunctuation?!(ctype_space($v)\|\|ctype_punct($v)):!ctype_space($v);
100			};
101			return
102			self::similarText($a, $b, 2, true, $check, true) &&
103			is_array($check) &&
104			empty(array_filter($check['a-b'], $filter)) &&
105			empty(array_filter($check['b-a'], $filter)) &&
106			$check['substr'] &&
107			!$check['equal']
108			?true
109			:false;
110			}
111
112			public static function punctuactionChangesOccured($a, $b, $insensitive=true, $considerSpace=true)
113			{
114			$filter=function ($v) use ($considerSpace) {
115			return $considerSpace?!(ctype_space($v)\|\|ctype_punct($v)):!ctype_punct($v);
116			};
117			if (!is_string($a) \|\| !is_string($b)) {
118			return false;
119			}
120			if ($insensitive) {
121			$a = array_filter(self::getParts(self::strtolower($a)), $filter);
122			$b = array_filter(self::getParts(self::strtolower($b)), $filter);
123			} else {
124			$a = array_filter(self::getParts(self::split($a)), $filter);
125			$b = array_filter(self::getParts(self::split($b)), $filter);
126			}
127
128			return empty(array_diff($a, $b));
129			}
130
131
132			public static function acronymOrExpanded($a, $b)
133			{
134			if (!is_string($a) \|\| !is_string($b)) {
135			return false;
136			}
137			$filter=function ($v) {
138			return !(ctype_space($v)\|\|ctype_punct($v));
139			};
140
141			$a = array_filter(self::getParts(self::strtolower($a)), $filter);
142			$b = array_filter(self::getParts(self::strtolower($b)), $filter);
143			foreach ($a as $index=>$word) {
144			if (!self::haveSameRoot($word, $b[$index])\|\|(isset($a[$index][2])&&isset($b[$index][2]))) {
145			return false;
146			}
147			}
148			return true;
149			}
150
151			public static function wordsAddedOrRemoved($a, $b)
152			{
153			if (!is_string($a) \|\| !is_string($b)) {
154			return false;
155			}
156			$filter=function ($v) {
157			return !(ctype_space($v));
158			};
159
160			$a = array_filter(self::getParts(self::strtolower($a)), $filter);
161			$b = array_filter(self::getParts(self::strtolower($b)), $filter);
162			$ca=count($a);
163			$cb=count($b);
164			return (bool)(($ca>$cb)?array_diff_assoc(array_values($a), array_values($b)):array_diff_assoc(array_values($b), array_values($a)));
165			}
166			}
167			}
168

manuwhat / similar-text

Pull Request — master (#2)

commonTextSimilarities C

Complexity

Size/Duplication

Importance

11 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like