Passed
Push — master ( ebc067...02f199 )
by Night
49s
created

stringFuncs.levenshteinDistance   C

Complexity

Conditions 9
Paths 34

Size

Total Lines 38
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 21
nc 34
nop 1
dl 0
loc 38
rs 6.6666
c 0
b 0
f 0
1
/** global: UB */
2
3
var stringFuncs = {
4
	
5
	// equality testing
6
	equals: function(str2){
7
		return this.isEqual(str2);
8
	},
9
	equalsCI: function(str2){
10
		return this.isEqual(str2, false);
11
	},
12
	isEqual: function(str2, caseSensitive = true, str1IsLower = false, str2IsLower = false){
13
		var str1 = this;
14
		
15
		if (!caseSensitive) {
16
			
17
			// quick checks
18
			if (str1 == null || str2 == null) {
2 ignored issues
show
Best Practice introduced by
Comparing str2 to null using the == operator is not safe. Consider using === instead.
Loading history...
Best Practice introduced by
Comparing str1 to null using the == operator is not safe. Consider using === instead.
Loading history...
19
				return (str1 == str2);
20
			}
21
			if (str1.length != str2.length) {
22
				return false;
23
			}
24
			
25
			// init casing tables
26
			if (UB.UTF_lowerToUpper == null){
1 ignored issue
show
Best Practice introduced by
Comparing UB.UTF_lowerToUpper to null using the == operator is not safe. Consider using === instead.
Loading history...
27
				UB.initCasing();
28
			}
29
			
30
			// very fast CI comparison
31
			for (var c = 0, cl = str1.length;c<cl;c++){
32
				var c1 = str1.charCodeAt(c);
33
				var c2 = str2.charCodeAt(c);
34
				
35
				// CI
36
				if (!str1IsLower) {
37
					if (c1 <= UB.UTF_casingTablesMax){
38
						c1 = UB.UTF_upperToLower[c1];
39
					}
40
				}
41
				if (!str2IsLower) {
42
					if (c2 <= UB.UTF_casingTablesMax){
43
						c2 = UB.UTF_upperToLower[c2];
44
					}
45
				}
46
				
47
				
48
				if (c1 != c2) {
49
					return false;
50
				}
51
			}
52
			return true;
53
			
54
			
55
			// causes thousands of HOC in a big loop
56
			/*str1 = str1.toUpperCase();
57
			str2 = str2.toUpperCase();*/
58
		}
59
		
60
		return (str1 == str2);
61
	},
62
	isEqualAny: function(str2, caseSensitive = true, str1IsLower = false, str2IsLower = false){
63
		var str1 = this;
64
		
65
		for (var s = 0, sl = str2.length;s<sl;s++){
66
			var s2 = str2[s];
67
			if (s2 != null) {
1 ignored issue
show
Best Practice introduced by
Comparing s2 to null using the != operator is not safe. Consider using !== instead.
Loading history...
68
				if (caseSensitive) {
69
					if (str1 == s2) {
70
						return true;
71
					}
72
				}else{
73
					if (str1.isEqual(s2, false)) {
74
						return true;
75
					}
76
				}
77
			}
78
		}
79
		
80
		return false;
81
	},
82
	isNotEqual: function(str2, caseSensitive = true){
83
		var str1 = this;
84
		
85
		if (!caseSensitive) {
86
			if (str1 == null || str2 == null) {
2 ignored issues
show
Best Practice introduced by
Comparing str1 to null using the == operator is not safe. Consider using === instead.
Loading history...
Best Practice introduced by
Comparing str2 to null using the == operator is not safe. Consider using === instead.
Loading history...
87
				return (str1 != str2);
88
			}
89
			
90
			// very fast CI comparison
91
			return !str1.isEqual(str2, false);
92
		}
93
		
94
		return (str1 != str2);
95
	},
96
	
97
	// similarity testing
98
	similarityLevenshtein: function(target){
99
		var source = this;
100
		var ed = source.levenshteinDistance(target);
101
		var maxLen = Math.max(source.length, target.length);
102
		if (maxLen == 0) { return 100; }
1 ignored issue
show
Best Practice introduced by
Comparing maxLen to 0 using the == operator is not safe. Consider using === instead.
Loading history...
103
		else { return (1 - ed/maxLen) * 100; }
0 ignored issues
show
Comprehensibility introduced by
else is not necessary here since all if branches return, consider removing it to reduce nesting and make code more readable.
Loading history...
104
	},
105
	levenshteinDistance: function(target){
106
		var source = this;
107
		
108
		/**
109
		*	Levenshtein distance is a measure of the similarity between two strings,
110
		*	The distance is the number of deletions, insertions, or substitutions required to
111
		*	transform p_source into p_target.
112
		*/
113
		var i;
114
	
115
		var d =  new Array();
0 ignored issues
show
Coding Style Best Practice introduced by
Using the Array constructor is generally discouraged. Consider using an array literal instead.
Loading history...
116
		var cost;
117
		var n = source.length;
118
		var m = target.length;
119
		var j;
120
	
121
		if (n == 0) { return m; }
1 ignored issue
show
Best Practice introduced by
Comparing n to 0 using the == operator is not safe. Consider using === instead.
Loading history...
122
		if (m == 0) { return n; }
1 ignored issue
show
Best Practice introduced by
Comparing m to 0 using the == operator is not safe. Consider using === instead.
Loading history...
123
	
124
		for (i=0; i<=n; i++) { d[i] = new Array(); }
0 ignored issues
show
Coding Style Best Practice introduced by
Using the Array constructor is generally discouraged. Consider using an array literal instead.
Loading history...
125
		for (i=0; i<=n; i++) { d[i][0] = i; }
126
		for (j=0; j<=m; j++) { d[0][j] = j; }
127
	
128
		for (i=1; i<=n; i++) {
129
	
130
			var s_i = source.charAt(i - 1);
131
			for (j=1; j<=m; j++) {
132
	
133
				var t_j = target.charAt(j - 1);
134
	
135
				if (s_i == t_j) { cost = 0; }
136
				else { cost = 1; }
137
	
138
				d[i][j] = S._minimum(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost);
0 ignored issues
show
Bug introduced by
The variable S seems to be never declared. If this is a global, consider adding a /** global: S */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
139
			}
140
		}
141
		return d[n][m];
142
	},
143
	
144
	similarityScore: function(str2, caseSensitive = false, spaceSensitive = true){
145
		var str1 = this;
146
		
147
		// better for unequal length strings
148
		
149
		// returns ~0.9 for "jhonny" and "jonny"
150
		// returns 0.3 for "ABCD" and "DBCD"
151
		
152
		if	(!caseSensitive){
153
			str1 = str1.toUpperCase();
154
			str2 = str2.toUpperCase();
155
		}
156
		
157
		if	(!spaceSensitive){
158
			str1 = S.RemoveSpaces(str1);
0 ignored issues
show
Bug introduced by
The variable S seems to be never declared. If this is a global, consider adding a /** global: S */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
159
			str2 = S.RemoveSpaces(str2);
160
		}
161
		
162
		if (str1 == str2) {
163
			return 1;
164
		}
165
		
166
		return N1D.MatchingScore(S.SplitCharPairs(str1), S.SplitCharPairs(str2));
0 ignored issues
show
Bug introduced by
The variable N1D seems to be never declared. If this is a global, consider adding a /** global: N1D */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
167
	},
168
	similarityScoreChars: function(str2, caseSensitive = false){
169
		var str1 = this;
170
		
171
		// better for equal length strings where char by char matching is wanted
172
		
173
		// returns 0.75 for "ABCD" and "DBCD"
174
		
175
		
176
		// don't process unequal len strings
177
		if (str1.length != str2.length) {
178
			return S.SimilarityScore(str1, str2, caseSensitive);
0 ignored issues
show
Bug introduced by
The variable S seems to be never declared. If this is a global, consider adding a /** global: S */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
179
		}
180
		
181
		if	(!caseSensitive){
182
			str1 = str1.toUpperCase();
183
			str2 = str2.toUpperCase();
184
		}
185
		
186
		// just counts matching chars
187
		return S.CountMatchingChars(str1, str2) / str1.length;
188
	},
189
	countMatchingCharPairs: function(str2, caseSensitive = false){
190
		var str1 = this;
191
		
192
		if	(!caseSensitive){
193
			str1 = str1.toUpperCase();
194
			str2 = str2.toUpperCase();
195
		}
196
		
197
		return N1D.MatchingSlots(S.SplitCharPairs(str1), S.SplitCharPairs(str2));
0 ignored issues
show
Bug introduced by
The variable S seems to be never declared. If this is a global, consider adding a /** global: S */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
Bug introduced by
The variable N1D seems to be never declared. If this is a global, consider adding a /** global: N1D */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
198
	},
199
	countMatchingChars: function(str2){
200
		var str1 = this;
201
		
202
		// just counts matching chars
203
		var count = 0;
204
		for (var c = 0, cl = str1.length;c<cl;c++){
205
			if (str1.charAt(c) == str2.charAt(c)) {
206
				count++;
207
			}
208
		}
209
		return count;
210
	},
211
	indexOfBestMatchRepeated: function(caseSensitive, similarChars = 4, minSimilarity = 0.7){
212
		var text = this;
213
		
214
		// finds the starting point of the 2nd instance of any repeated substring .. "zjohn ajohn" will return 7;
215
		
216
		// per char set
217
		var sims = [];
218
		for (var c = 0, cl = text.length - (similarChars - 1);c<cl;c++){
219
			var chars = text.substr(c, similarChars);
220
			
221
			// per every other char set ahead of this
222
			var sims2 = NewArray.Repeat(cl, 0);
0 ignored issues
show
Bug introduced by
The variable NewArray seems to be never declared. If this is a global, consider adding a /** global: NewArray */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
223
			for (var c2 = c + similarChars;c2<cl;c2++){
224
				var chars2 = text.substr(c2, similarChars);
225
				
226
				
227
				// calc similarity
228
				sims2[c2] = S.SimilarityScoreChars(chars, chars2, caseSensitive);
0 ignored issues
show
Bug introduced by
The variable S seems to be never declared. If this is a global, consider adding a /** global: S */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
229
			}
230
			
231
			// store best similarity match if above wanted similarity
232
			sims[c] = N1D.MaxIndexInRange(sims2, minSimilarity, 1);
0 ignored issues
show
Bug introduced by
The variable N1D seems to be never declared. If this is a global, consider adding a /** global: N1D */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
233
		}
234
		
235
		// find char nearest to left
236
		var leftCharIndex = N1D.MinInRange(sims, 0, cl, true);
237
		var leftCharIndex2 = sims[leftCharIndex];
238
		
239
		// now find first exactly matching char after it
240
		for (c = leftCharIndex, c2 = leftCharIndex2, cl = text.length; c2 < cl; c++, c2++) {
241
			var leftChar = text.charAt(c);
242
			var leftChar2 = text.charAt(c2);
243
			if (!caseSensitive) {
244
				leftChar = leftChar.toUpperCase();
245
				leftChar2 = leftChar2.toUpperCase();
246
			}
247
			if (leftChar == leftChar2) {
248
				return c2;
249
			}
250
		}
251
		return leftCharIndex2;
252
	},
253
	isSimilar: function(str2, caseSensitive = false, threshold = 0.8){
254
		var str1 = this;
255
		return (S.SimilarityScore(str1, str2, caseSensitive) >= threshold);
0 ignored issues
show
Bug introduced by
The variable S seems to be never declared. If this is a global, consider adding a /** global: S */ comment.

This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.

To learn more about declaring variables in Javascript, see the MDN.

Loading history...
256
	},
257
	none:null
258
};
259
260
// register funcs
261
UB.registerFuncs(String.prototype, stringFuncs);
262