Issues (81)

src/ub.files.csv.js (1 issue)

1
/** global: UB */
2
//removeIf(nodejs)
3
4
5
6
7
var arrayFuncs = {
8
9
10
	/** Fast and simple CSV encoder.
11
		Supports multi-line values and values with double quotes.
12
		Automatic/smart escaping like Excel-generated CSVs. */
13
	encodeCSV: function(headers, trimValues, columnar, seperator = ",") {
14
		var linesData = this;
15
		
16
		if (columnar) {
17
			linesData = linesData.transpose();
18
		}
19
		
20
		var sb = [];
21
		
22
		// per line
23
		for (var l = (headers != null ? -1 : 0), ll = linesData.length - 1; l <= ll; l++) {
24
			
25
			// fetch header / line data words
26
			var words = l === -1 ? headers : linesData[l];
27
			
28
			// per word
29
			for (var v = 0, vl = words.length - 1; v <= vl; v++) {
30
				
31
				// if value given
32
				if (words[v] != null) {
33
					
34
					// convert value to text
35
					var word = words[v].toString();
36
					if (trimValues) {
37
						word = word.trim();
38
					}
39
					
40
					// add word
41
					if (word.indexOf('"') > -1) {
42
						
43
						// escape quotes, and enclose in quotes if word has a quote
44
						sb.push('"');
45
						sb.push(word.split("\"").join("\"\""));
46
						sb.push('"');
47
						
48
					} else if (word.indexOf(',') > -1) {
49
						
50
						// enclose in quotes if word has comma
51
						sb.push('"');
52
						sb.push(word);
53
						sb.push('"');
54
						
55
					} else {
56
						
57
						// add straightaway otherwise
58
						sb.push(word);
59
					}
60
				}
61
				
62
				// add seperator
63
				if (v < vl) {
64
					sb.push(seperator);
65
				}
66
			}
67
			
68
			// add newline
69
			if (l < ll) {
70
				sb.push("\r\n");
71
			}
72
		}
73
		
74
		return sb.join("");
75
	},
76
	
77
78
	/** Fast and simple TSV encoder.
79
		Supports multi-line values and values with double quotes.
80
		Automatic/smart escaping like Excel-generated TSV. */
81
	encodeTSV: function(headers, trimValues, columnar) {
82
		var linesData = this;
83
		return linesData.encodeCSV(headers, trimValues, columnar, "\t");
84
	},
85
86
    none:null,
87
}
88
89
// register funcs
90
UB.registerFuncs(Array.prototype, arrayFuncs);
91
92
93
94
var stringFuncs = {
95
96
	/** Decodes the given CSV file string and returns the cell data as an array.
97
	 *  Extremely robust and fast CSV parser. Only parser that works with all bizarre but valid test files.
98
	 * 
99
	 * 3 modes are available:
100
	 * if `headers` is null - All cells are returned as 2D array. (default)
101
	 * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array.
102
	 * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects.
103
	 */
104
	decodeCSV: function (headers, trimValues, columnar = false, seperator = ",") {
105
		var csvString = this.toString();
106
	
107
		// cut String into lines
108
		var lines = csvString.trim().splitLines();
109
		var sep = seperator == "auto" ? UB.CSV_detectSeperator(csvString, lines.length) : seperator;
110
		
111
		// config
112
		var hasHeaders = headers != null;
113
		var returnAsObjs = hasHeaders && headers.exists();
114
115
		// status
116
		var inQuoted = false;
117
		
118
		// result
119
		var linesData = [];
120
		var word = [];
121
		var tempHeaders = [];
122
		var lineWords = [];
123
124
		// per line
125
		for (var l = 0, ll = lines.length; l < ll; l++) {
126
			var line = lines[l];
127
			var isHeader = (l === 0 && hasHeaders);
128
			
129
			// if we are in quoted text
130
			if (inQuoted) {
0 ignored issues
show
Comprehensibility Documentation Best Practice introduced by
This code block is empty. Consider removing it or adding a comment to explain.
Loading history...
131
				
132
				// keep taking chars
133
				
134
			}else{
135
				
136
				// save words into headers array / new array
137
				lineWords = [];
138
				if (isHeader){
139
					if (returnAsObjs){
140
						lineWords = tempHeaders;
141
					}else{
142
						lineWords = headers;
143
					}
144
				}
145
				if (!isHeader) {
146
					linesData.push(lineWords);
147
				}
148
				
149
			}
150
			
151
			// per char
152
			for (var c = 0, clast = line.length - 1; c <= clast; c++) {
153
				var ch = line.charAt(c);
154
				
155
				// if we are in quoted text
156
				if (inQuoted) {
157
					
158
					// quotes..
159
					if (ch == "\"") {
160
						
161
						// quote may be escaped
162
						if (line.charAt(c + 1) == "\"") {
163
							c++;
164
							word.push("\"");
165
						}else {
166
							
167
							// quote means ending quoted text
168
							inQuoted = false;
169
						}
170
						
171
						continue;
172
					}
173
					
174
					// normal char
175
					word.push(ch);
176
					
177
					
178
				}else {
179
					
180
					// quote means beginning quoted text
181
					if (ch == "\""){
182
						inQuoted = true;
183
						continue;
184
					}
185
					
186
					// comma means end of word
187
					if (ch == sep) {
188
						lineWords.push(trimValues ? word.join("").trim() : word.join(""));
189
						word = [];
190
						continue;
191
					}
192
					
193
					// normal char
194
					word.push(ch);
195
					
196
					// newline means end of word
197
					if (c == clast) {
198
						lineWords.push(trimValues ? word.join("").trim() : word.join(""));
199
						word = [];
200
					}
201
					
202
				}
203
				
204
			}
205
			
206
			// at end of line take word
207
			if (!inQuoted && word.Length > 0) {
208
				lineWords.push(trimValues ? word.join("").trim() : word.join(""));
209
				word = [];
210
			}
211
		}
212
213
		// convert array to objs
214
		if (returnAsObjs){
215
216
			// go thru all rows
217
			for (var l = 0, ll = linesData.length; l < ll; l++) {
218
				var row = linesData[l];
219
				var obj = {};
220
221
				// convert all cells to obj props
222
				for (var h = 0, hl = headers.length; h < hl; h++) {
223
					var header = headers[h];
224
					obj[header] = row[h];
225
				}
226
				linesData[l] = obj;
227
			}
228
		}
229
		
230
		// convert 2D array to columnar
231
		if (columnar && !returnAsObjs) {
232
			linesData = linesData.transpose();
233
		}
234
235
		return linesData;
236
	},
237
238
	/** Decodes the given TSV file string and returns the cell data as an array.
239
	 *  Extremely robust and fast TSV parser. Only parser that works with all bizarre but valid test files.
240
	 * 
241
	 * 3 modes are available:
242
	 * if `headers` is null - All cells are returned as 2D array. (default)
243
	 * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array.
244
	 * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects.
245
	 */
246
	decodeTSV: function (headers, trimValues, columnar = false) {
247
		var tsvString = this.toString();
248
		return tsvString.decodeCSV(headers, trimValues, columnar, "\t");
249
	},
250
251
    none:null,
252
}
253
254
// register funcs
255
UB.registerFuncs(String.prototype, stringFuncs);
256
257
258
259
260
261
/* File Utils - NodeJS only */
262
263
var fs = require('fs');
264
var pathUtil = require('path');
265
266
arrayFuncs = {
267
268
	saveToCSV: function(filePath, headers = null, trimValues = true, columnar = false, fixedSep = ",") {
269
		var data = this;
270
271
		// write CSV to string
272
		var str = data.encodeCSV(headers, trimValues, columnar, fixedSep);
273
		
274
		// save data as string via filestream
275
		str.saveToText(filePath);
276
	},
277
278
    none:null,
279
}
280
281
// register funcs
282
UB.registerFuncs(Array.prototype, arrayFuncs);
283
284
285
stringFuncs = {
286
287
	/**
288
	 * Loads the given CSV file, decoding the cell data and returning it as an array.
289
	 * Extremely robust and fast CSV parser. Only parser that works with all bizarre but valid test files.
290
	 * 
291
	 * 3 modes are available:
292
	 * if `headers` is null - All cells are returned as 2D array. (default)
293
	 * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array.
294
	 * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects.
295
	 */
296
	loadCSV: function(encoding = "utf8", headers = null, trimValues = true, columnar = false, seperator = "auto") {
297
298
		// load text file
299
		var file = this.toString();
300
		var text = file.loadText(encoding);
301
		if (text == null) {
302
			return null;
303
		}
304
		
305
		// parse CSV string into Array
306
		return text.decodeCSV(headers, trimValues, columnar, seperator);
307
	},
308
309
    none:null,
310
}
311
312
// register funcs
313
UB.registerFuncs(String.prototype, stringFuncs);
314
315
//endRemoveIf(nodejs)
316
317
318
319
320
321
// UTILS
322
UB.CSV_seperators = [',', ';', ':', '\t'];
323
324
UB.CSV_detectSeperator = function(csvString, rowCount) {
325
	var sepCount = UB.newArray(0, UB.CSV_seperators.length);
326
327
	var character;
328
329
	var quoted = false;
330
	var firstChar = true;
331
	var foundAny = false;
332
333
	var c = 0;
334
	var cl = csvString.length - 50;  // skip last few chars
335
	while (c < cl) {
336
		character = csvString.charAt(c);
337
		c++;
338
339
		switch (character) {
340
			case '"':
341
				if (quoted) {
342
					if (csvString.charAt(c + 1) != '"') {  // Value is quoted and current character is " and next character is not ".
343
						quoted = false;
344
					} else {
345
						c++;  // Value is quoted and current and next characters are "" - read (skip) peeked qoute.
346
					}
347
				} else {
348
					if (firstChar) {  // Set value as quoted only if this quote is the first char in the value.
349
						quoted = true;
350
					}
351
				}
352
				break;
353
			case '\r':
354
			case '\n':
355
				if (!quoted) {
356
					firstChar = true;
357
					continue;
358
				}
359
				break;
360
			default:
361
				if (!quoted) {
362
					var index = UB.CSV_seperators.indexOf(character);
363
					if (index != -1) {
364
						sepCount[index]++;
365
						firstChar = true;
366
						foundAny = true;
367
						continue;
368
					}
369
				}
370
				break;
371
		}
372
373
		if (firstChar) {
374
			firstChar = false;
375
		}
376
	}
377
378
	return !foundAny ? ',' : UB.CSV_seperators[sepCount.indexOfMax()];
379
}
380
381
382