1 | /** global: UB */ |
||
2 | |||
3 | var stringFuncs = { |
||
4 | |||
5 | // equality testing |
||
6 | equals: function(str2){ |
||
7 | return this.isEqual(str2); |
||
8 | }, |
||
9 | equalsCI: function(str2){ |
||
10 | return this.isEqual(str2, false); |
||
11 | }, |
||
12 | isEqual: function(str2, caseSensitive = true, str1IsLower = false, str2IsLower = false){ |
||
13 | var str1 = this; |
||
14 | |||
15 | if (!caseSensitive) { |
||
16 | |||
17 | // quick checks |
||
18 | if (str2 == null) { |
||
19 | return (str1 == str2); |
||
20 | } |
||
21 | if (str1.length != str2.length) { |
||
22 | return false; |
||
23 | } |
||
24 | |||
25 | // init casing tables |
||
26 | if (UB.UTF_lowerToUpper == null){ |
||
27 | UB.initCasing(); |
||
28 | } |
||
29 | |||
30 | // very fast CI comparison |
||
31 | for (var c = 0, cl = str1.length;c<cl;c++){ |
||
32 | var c1 = str1.charCodeAt(c); |
||
33 | var c2 = str2.charCodeAt(c); |
||
34 | |||
35 | // CI |
||
36 | if (!str1IsLower) { |
||
37 | if (c1 <= UB.UTF_casingTablesMax){ |
||
38 | c1 = UB.UTF_upperToLower[c1]; |
||
39 | } |
||
40 | } |
||
41 | if (!str2IsLower) { |
||
42 | if (c2 <= UB.UTF_casingTablesMax){ |
||
43 | c2 = UB.UTF_upperToLower[c2]; |
||
44 | } |
||
45 | } |
||
46 | |||
47 | |||
48 | if (c1 != c2) { |
||
49 | return false; |
||
50 | } |
||
51 | } |
||
52 | return true; |
||
53 | |||
54 | |||
55 | // causes thousands of HOC in a big loop |
||
56 | /*str1 = str1.toUpperCase(); |
||
57 | str2 = str2.toUpperCase();*/ |
||
58 | } |
||
59 | |||
60 | return (str1 == str2); |
||
61 | }, |
||
62 | isEqualAny: function(str2, caseSensitive = true, str1IsLower = false, str2IsLower = false){ |
||
63 | var str1 = this; |
||
64 | |||
65 | for (var s = 0, sl = str2.length;s<sl;s++){ |
||
66 | var s2 = str2[s]; |
||
67 | if (s2 != null) { |
||
68 | if (caseSensitive) { |
||
69 | if (str1 == s2) { |
||
70 | return true; |
||
71 | } |
||
72 | }else{ |
||
73 | if (str1.isEqual(s2, false)) { |
||
74 | return true; |
||
75 | } |
||
76 | } |
||
77 | } |
||
78 | } |
||
79 | |||
80 | return false; |
||
81 | }, |
||
82 | isNotEqual: function(str2, caseSensitive = true){ |
||
83 | var str1 = this; |
||
84 | |||
85 | if (!caseSensitive) { |
||
86 | if (str1 == null || str2 == null) { |
||
87 | return (str1 != str2); |
||
88 | } |
||
89 | |||
90 | // very fast CI comparison |
||
91 | return !str1.isEqual(str2, false); |
||
92 | } |
||
93 | |||
94 | return (str1 != str2); |
||
95 | }, |
||
96 | |||
97 | // similarity testing |
||
98 | similarityLevenshtein: function(target){ |
||
99 | var source = this; |
||
100 | var ed = source.levenshteinDistance(target); |
||
101 | var maxLen = Math.max(source.length, target.length); |
||
102 | if (maxLen === 0) { |
||
103 | return 100; |
||
104 | } |
||
105 | return (1 - ed/maxLen) * 100; |
||
106 | }, |
||
107 | levenshteinDistance: function(target){ |
||
108 | var source = this; |
||
109 | |||
110 | /** |
||
111 | * Levenshtein distance is a measure of the similarity between two strings, |
||
112 | * The distance is the number of deletions, insertions, or substitutions required to |
||
113 | * transform p_source into p_target. |
||
114 | */ |
||
115 | var i; |
||
116 | |||
117 | var d = []; |
||
118 | var cost; |
||
119 | var n = source.length; |
||
120 | var m = target.length; |
||
121 | var j; |
||
122 | |||
123 | if (n === 0) { return m; } |
||
124 | if (m === 0) { return n; } |
||
125 | |||
126 | for (i=0; i<=n; i++) { d[i] = []; } |
||
127 | for (i=0; i<=n; i++) { d[i][0] = i; } |
||
128 | for (j=0; j<=m; j++) { d[0][j] = j; } |
||
129 | |||
130 | for (i=1; i<=n; i++) { |
||
131 | |||
132 | var s_i = source.charAt(i - 1); |
||
133 | for (j=1; j<=m; j++) { |
||
134 | |||
135 | var t_j = target.charAt(j - 1); |
||
136 | |||
137 | if (s_i == t_j) { cost = 0; } |
||
138 | else { cost = 1; } |
||
139 | |||
140 | d[i][j] = S._minimum(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost); |
||
0 ignored issues
–
show
|
|||
141 | } |
||
142 | } |
||
143 | return d[n][m]; |
||
144 | }, |
||
145 | |||
146 | similarityScore: function(str2, caseSensitive = false, spaceSensitive = true){ |
||
147 | var str1 = this; |
||
148 | |||
149 | // better for unequal length strings |
||
150 | |||
151 | // returns ~0.9 for "jhonny" and "jonny" |
||
152 | // returns 0.3 for "ABCD" and "DBCD" |
||
153 | |||
154 | if (!caseSensitive){ |
||
155 | str1 = str1.toUpperCase(); |
||
156 | str2 = str2.toUpperCase(); |
||
157 | } |
||
158 | |||
159 | if (!spaceSensitive){ |
||
160 | str1 = str1.removeSpaces(); |
||
161 | str2 = str2.removeSpaces(); |
||
162 | } |
||
163 | |||
164 | if (str1 == str2) { |
||
165 | return 1; |
||
166 | } |
||
167 | |||
168 | return N1D.MatchingScore(str1.splitCharPairs(), str2.splitCharPairs()); |
||
0 ignored issues
–
show
The variable
N1D seems to be never declared. If this is a global, consider adding a /** global: N1D */ comment.
This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed. To learn more about declaring variables in Javascript, see the MDN. ![]() |
|||
169 | }, |
||
170 | similarityScoreChars: function(str2, caseSensitive = false){ |
||
171 | var str1 = this; |
||
172 | |||
173 | // better for equal length strings where char by char matching is wanted |
||
174 | |||
175 | // returns 0.75 for "ABCD" and "DBCD" |
||
176 | |||
177 | |||
178 | // don't process unequal len strings |
||
179 | if (str1.length != str2.length) { |
||
180 | return str1.similarityScore(str2, caseSensitive); |
||
181 | } |
||
182 | |||
183 | if (!caseSensitive){ |
||
184 | str1 = str1.toUpperCase(); |
||
185 | str2 = str2.toUpperCase(); |
||
186 | } |
||
187 | |||
188 | // just counts matching chars |
||
189 | return str1.countMatchingChars(str2) / str1.length; |
||
190 | }, |
||
191 | countMatchingCharPairs: function(str2, caseSensitive = false){ |
||
192 | var str1 = this; |
||
193 | |||
194 | if (!caseSensitive){ |
||
195 | str1 = str1.toUpperCase(); |
||
196 | str2 = str2.toUpperCase(); |
||
197 | } |
||
198 | |||
199 | return N1D.MatchingSlots(str1.splitCharPairs(), str2.splitCharPairs()); |
||
0 ignored issues
–
show
The variable
N1D seems to be never declared. If this is a global, consider adding a /** global: N1D */ comment.
This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed. To learn more about declaring variables in Javascript, see the MDN. ![]() |
|||
200 | }, |
||
201 | countMatchingChars: function(str2){ |
||
202 | var str1 = this; |
||
203 | |||
204 | // just counts matching chars |
||
205 | var count = 0; |
||
206 | for (var c = 0, cl = str1.length;c<cl;c++){ |
||
207 | if (str1.charAt(c) == str2.charAt(c)) { |
||
208 | count++; |
||
209 | } |
||
210 | } |
||
211 | return count; |
||
212 | }, |
||
213 | indexOfBestMatchRepeated: function(caseSensitive, similarChars = 4, minSimilarity = 0.7){ |
||
214 | var text = this; |
||
215 | |||
216 | // finds the starting point of the 2nd instance of any repeated substring .. "zjohn ajohn" will return 7; |
||
217 | |||
218 | // per char set |
||
219 | var sims = []; |
||
220 | for (var c = 0, cl = text.length - (similarChars - 1);c<cl;c++){ |
||
221 | var chars = text.substr(c, similarChars); |
||
222 | |||
223 | // per every other char set ahead of this |
||
224 | var sims2 = UB.newArray(0, cl); |
||
225 | for (var c2 = c + similarChars;c2<cl;c2++){ |
||
226 | var chars2 = text.substr(c2, similarChars); |
||
227 | |||
228 | |||
229 | // calc similarity |
||
230 | sims2[c2] = chars.similarityScoreChars(chars2, caseSensitive); |
||
231 | } |
||
232 | |||
233 | // store best similarity match if above wanted similarity |
||
234 | sims[c] = N1D.MaxIndexInRange(sims2, minSimilarity, 1); |
||
0 ignored issues
–
show
The variable
N1D seems to be never declared. If this is a global, consider adding a /** global: N1D */ comment.
This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed. To learn more about declaring variables in Javascript, see the MDN. ![]() |
|||
235 | } |
||
236 | |||
237 | // find char nearest to left |
||
238 | var leftCharIndex = N1D.MinInRange(sims, 0, cl, true); |
||
239 | var leftCharIndex2 = sims[leftCharIndex]; |
||
240 | |||
241 | // now find first exactly matching char after it |
||
242 | for (c = leftCharIndex, c2 = leftCharIndex2, cl = text.length; c2 < cl; c++, c2++) { |
||
243 | var leftChar = text.charAt(c); |
||
244 | var leftChar2 = text.charAt(c2); |
||
245 | if (!caseSensitive) { |
||
246 | leftChar = leftChar.toUpperCase(); |
||
247 | leftChar2 = leftChar2.toUpperCase(); |
||
248 | } |
||
249 | if (leftChar == leftChar2) { |
||
250 | return c2; |
||
251 | } |
||
252 | } |
||
253 | return leftCharIndex2; |
||
254 | }, |
||
255 | isSimilar: function(str2, caseSensitive = false, threshold = 0.8){ |
||
256 | var str1 = this; |
||
257 | return (str1.similarityScore(str2, caseSensitive) >= threshold); |
||
258 | }, |
||
259 | none:null |
||
260 | }; |
||
261 | |||
262 | // register funcs |
||
263 | UB.registerFuncs(String.prototype, stringFuncs); |
||
264 |
This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.
To learn more about declaring variables in Javascript, see the MDN.