Total Complexity | 54 |
Complexity/F | 4.15 |
Lines of Code | 259 |
Function Count | 13 |
Duplicated Lines | 0 |
Ratio | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like src/ub.strings.similarity.js often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | /** global: UB */ |
||
3 | var stringFuncs = { |
||
4 | |||
5 | // equality testing |
||
6 | equals: function(str2){ |
||
7 | return this.isEqual(str2); |
||
8 | }, |
||
9 | equalsCI: function(str2){ |
||
10 | return this.isEqual(str2, false); |
||
11 | }, |
||
12 | isEqual: function(str2, caseSensitive = true, str1IsLower = false, str2IsLower = false){ |
||
13 | var str1 = this; |
||
14 | |||
15 | if (!caseSensitive) { |
||
16 | |||
17 | // quick checks |
||
18 | if (str1 == null || str2 == null) { |
||
2 ignored issues
–
show
|
|||
19 | return (str1 == str2); |
||
20 | } |
||
21 | if (str1.length != str2.length) { |
||
22 | return false; |
||
23 | } |
||
24 | |||
25 | // init casing tables |
||
26 | if (UB.UTF_lowerToUpper == null){ |
||
1 ignored issue
–
show
|
|||
27 | UB.initCasing(); |
||
28 | } |
||
29 | |||
30 | // very fast CI comparison |
||
31 | for (var c = 0, cl = str1.length;c<cl;c++){ |
||
32 | var c1 = str1.charCodeAt(c); |
||
33 | var c2 = str2.charCodeAt(c); |
||
34 | |||
35 | // CI |
||
36 | if (!str1IsLower) { |
||
37 | if (c1 <= UB.UTF_casingTablesMax){ |
||
38 | c1 = UB.UTF_upperToLower[c1]; |
||
39 | } |
||
40 | } |
||
41 | if (!str2IsLower) { |
||
42 | if (c2 <= UB.UTF_casingTablesMax){ |
||
43 | c2 = UB.UTF_upperToLower[c2]; |
||
44 | } |
||
45 | } |
||
46 | |||
47 | |||
48 | if (c1 != c2) { |
||
49 | return false; |
||
50 | } |
||
51 | } |
||
52 | return true; |
||
53 | |||
54 | |||
55 | // causes thousands of HOC in a big loop |
||
56 | /*str1 = str1.toUpperCase(); |
||
57 | str2 = str2.toUpperCase();*/ |
||
58 | } |
||
59 | |||
60 | return (str1 == str2); |
||
61 | }, |
||
62 | isEqualAny: function(str2, caseSensitive = true, str1IsLower = false, str2IsLower = false){ |
||
63 | var str1 = this; |
||
64 | |||
65 | for (var s = 0, sl = str2.length;s<sl;s++){ |
||
66 | var s2 = str2[s]; |
||
67 | if (s2 != null) { |
||
1 ignored issue
–
show
|
|||
68 | if (caseSensitive) { |
||
69 | if (str1 == s2) { |
||
70 | return true; |
||
71 | } |
||
72 | }else{ |
||
73 | if (str1.isEqual(s2, false)) { |
||
74 | return true; |
||
75 | } |
||
76 | } |
||
77 | } |
||
78 | } |
||
79 | |||
80 | return false; |
||
81 | }, |
||
82 | isNotEqual: function(str2, caseSensitive = true){ |
||
83 | var str1 = this; |
||
84 | |||
85 | if (!caseSensitive) { |
||
86 | if (str1 == null || str2 == null) { |
||
2 ignored issues
–
show
|
|||
87 | return (str1 != str2); |
||
88 | } |
||
89 | |||
90 | // very fast CI comparison |
||
91 | return !str1.isEqual(str2, false); |
||
92 | } |
||
93 | |||
94 | return (str1 != str2); |
||
95 | }, |
||
96 | |||
97 | // similarity testing |
||
98 | similarityLevenshtein: function(target){ |
||
99 | var source = this; |
||
100 | var ed = source.levenshteinDistance(target); |
||
101 | var maxLen = Math.max(source.length, target.length); |
||
102 | if (maxLen == 0) { return 100; } |
||
1 ignored issue
–
show
|
|||
103 | else { return (1 - ed/maxLen) * 100; } |
||
104 | }, |
||
105 | levenshteinDistance: function(target){ |
||
106 | var source = this; |
||
107 | |||
108 | /** |
||
109 | * Levenshtein distance is a measure of the similarity between two strings, |
||
110 | * The distance is the number of deletions, insertions, or substitutions required to |
||
111 | * transform p_source into p_target. |
||
112 | */ |
||
113 | var i; |
||
114 | |||
115 | var d = new Array(); |
||
116 | var cost; |
||
117 | var n = source.length; |
||
118 | var m = target.length; |
||
119 | var j; |
||
120 | |||
121 | if (n == 0) { return m; } |
||
1 ignored issue
–
show
|
|||
122 | if (m == 0) { return n; } |
||
1 ignored issue
–
show
|
|||
123 | |||
124 | for (i=0; i<=n; i++) { d[i] = new Array(); } |
||
125 | for (i=0; i<=n; i++) { d[i][0] = i; } |
||
126 | for (j=0; j<=m; j++) { d[0][j] = j; } |
||
127 | |||
128 | for (i=1; i<=n; i++) { |
||
129 | |||
130 | var s_i = source.charAt(i - 1); |
||
131 | for (j=1; j<=m; j++) { |
||
132 | |||
133 | var t_j = target.charAt(j - 1); |
||
134 | |||
135 | if (s_i == t_j) { cost = 0; } |
||
136 | else { cost = 1; } |
||
137 | |||
138 | d[i][j] = S._minimum(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost); |
||
139 | } |
||
140 | } |
||
141 | return d[n][m]; |
||
142 | }, |
||
143 | |||
144 | similarityScore: function(str2, caseSensitive = false, spaceSensitive = true){ |
||
145 | var str1 = this; |
||
146 | |||
147 | // better for unequal length strings |
||
148 | |||
149 | // returns ~0.9 for "jhonny" and "jonny" |
||
150 | // returns 0.3 for "ABCD" and "DBCD" |
||
151 | |||
152 | if (!caseSensitive){ |
||
153 | str1 = str1.toUpperCase(); |
||
154 | str2 = str2.toUpperCase(); |
||
155 | } |
||
156 | |||
157 | if (!spaceSensitive){ |
||
158 | str1 = S.RemoveSpaces(str1); |
||
159 | str2 = S.RemoveSpaces(str2); |
||
160 | } |
||
161 | |||
162 | if (str1 == str2) { |
||
163 | return 1; |
||
164 | } |
||
165 | |||
166 | return N1D.MatchingScore(S.SplitCharPairs(str1), S.SplitCharPairs(str2)); |
||
167 | }, |
||
168 | similarityScoreChars: function(str2, caseSensitive = false){ |
||
169 | var str1 = this; |
||
170 | |||
171 | // better for equal length strings where char by char matching is wanted |
||
172 | |||
173 | // returns 0.75 for "ABCD" and "DBCD" |
||
174 | |||
175 | |||
176 | // don't process unequal len strings |
||
177 | if (str1.length != str2.length) { |
||
178 | return S.SimilarityScore(str1, str2, caseSensitive); |
||
179 | } |
||
180 | |||
181 | if (!caseSensitive){ |
||
182 | str1 = str1.toUpperCase(); |
||
183 | str2 = str2.toUpperCase(); |
||
184 | } |
||
185 | |||
186 | // just counts matching chars |
||
187 | return S.CountMatchingChars(str1, str2) / str1.length; |
||
188 | }, |
||
189 | countMatchingCharPairs: function(str2, caseSensitive = false){ |
||
190 | var str1 = this; |
||
191 | |||
192 | if (!caseSensitive){ |
||
193 | str1 = str1.toUpperCase(); |
||
194 | str2 = str2.toUpperCase(); |
||
195 | } |
||
196 | |||
197 | return N1D.MatchingSlots(S.SplitCharPairs(str1), S.SplitCharPairs(str2)); |
||
198 | }, |
||
199 | countMatchingChars: function(str2){ |
||
200 | var str1 = this; |
||
201 | |||
202 | // just counts matching chars |
||
203 | var count = 0; |
||
204 | for (var c = 0, cl = str1.length;c<cl;c++){ |
||
205 | if (str1.charAt(c) == str2.charAt(c)) { |
||
206 | count++; |
||
207 | } |
||
208 | } |
||
209 | return count; |
||
210 | }, |
||
211 | indexOfBestMatchRepeated: function(caseSensitive, similarChars = 4, minSimilarity = 0.7){ |
||
212 | var text = this; |
||
213 | |||
214 | // finds the starting point of the 2nd instance of any repeated substring .. "zjohn ajohn" will return 7; |
||
215 | |||
216 | // per char set |
||
217 | var sims = []; |
||
218 | for (var c = 0, cl = text.length - (similarChars - 1);c<cl;c++){ |
||
219 | var chars = text.substr(c, similarChars); |
||
220 | |||
221 | // per every other char set ahead of this |
||
222 | var sims2 = NewArray.Repeat(cl, 0); |
||
223 | for (var c2 = c + similarChars;c2<cl;c2++){ |
||
224 | var chars2 = text.substr(c2, similarChars); |
||
225 | |||
226 | |||
227 | // calc similarity |
||
228 | sims2[c2] = S.SimilarityScoreChars(chars, chars2, caseSensitive); |
||
229 | } |
||
230 | |||
231 | // store best similarity match if above wanted similarity |
||
232 | sims[c] = N1D.MaxIndexInRange(sims2, minSimilarity, 1); |
||
233 | } |
||
234 | |||
235 | // find char nearest to left |
||
236 | var leftCharIndex = N1D.MinInRange(sims, 0, cl, true); |
||
237 | var leftCharIndex2 = sims[leftCharIndex]; |
||
238 | |||
239 | // now find first exactly matching char after it |
||
240 | for (c = leftCharIndex, c2 = leftCharIndex2, cl = text.length; c2 < cl; c++, c2++) { |
||
241 | var leftChar = text.charAt(c); |
||
242 | var leftChar2 = text.charAt(c2); |
||
243 | if (!caseSensitive) { |
||
244 | leftChar = leftChar.toUpperCase(); |
||
245 | leftChar2 = leftChar2.toUpperCase(); |
||
246 | } |
||
247 | if (leftChar == leftChar2) { |
||
248 | return c2; |
||
249 | } |
||
250 | } |
||
251 | return leftCharIndex2; |
||
252 | }, |
||
253 | isSimilar: function(str2, caseSensitive = false, threshold = 0.8){ |
||
254 | var str1 = this; |
||
255 | return (S.SimilarityScore(str1, str2, caseSensitive) >= threshold); |
||
256 | }, |
||
257 | none:null |
||
258 | }; |
||
259 | |||
260 | // register funcs |
||
261 | UB.registerFuncs(String.prototype, stringFuncs); |
||
262 |