1 | /** global: UB */ |
||
2 | //removeIf(nodejs) |
||
3 | |||
4 | |||
5 | |||
6 | |||
7 | var arrayFuncs = { |
||
8 | |||
9 | |||
10 | /** Fast and simple CSV encoder. |
||
11 | Supports multi-line values and values with double quotes. |
||
12 | Automatic/smart escaping like Excel-generated CSVs. */ |
||
13 | encodeCSV: function(headers, trimValues, columnar, seperator = ",") { |
||
14 | var linesData = this; |
||
15 | |||
16 | if (columnar) { |
||
17 | linesData = linesData.transpose(); |
||
18 | } |
||
19 | |||
20 | var sb = []; |
||
21 | |||
22 | // per line |
||
23 | for (var l = (headers != null ? -1 : 0), ll = linesData.length - 1; l <= ll; l++) { |
||
24 | |||
25 | // fetch header / line data words |
||
26 | var words = l === -1 ? headers : linesData[l]; |
||
27 | |||
28 | // per word |
||
29 | for (var v = 0, vl = words.length - 1; v <= vl; v++) { |
||
30 | |||
31 | // if value given |
||
32 | if (words[v] != null) { |
||
33 | |||
34 | // convert value to text |
||
35 | var word = words[v].toString(); |
||
36 | if (trimValues) { |
||
37 | word = word.trim(); |
||
38 | } |
||
39 | |||
40 | // add word |
||
41 | if (word.indexOf('"') > -1) { |
||
42 | |||
43 | // escape quotes, and enclose in quotes if word has a quote |
||
44 | sb.push('"'); |
||
45 | sb.push(word.split("\"").join("\"\"")); |
||
46 | sb.push('"'); |
||
47 | |||
48 | } else if (word.indexOf(',') > -1) { |
||
49 | |||
50 | // enclose in quotes if word has comma |
||
51 | sb.push('"'); |
||
52 | sb.push(word); |
||
53 | sb.push('"'); |
||
54 | |||
55 | } else { |
||
56 | |||
57 | // add straightaway otherwise |
||
58 | sb.push(word); |
||
59 | } |
||
60 | } |
||
61 | |||
62 | // add seperator |
||
63 | if (v < vl) { |
||
64 | sb.push(seperator); |
||
65 | } |
||
66 | } |
||
67 | |||
68 | // add newline |
||
69 | if (l < ll) { |
||
70 | sb.push("\r\n"); |
||
71 | } |
||
72 | } |
||
73 | |||
74 | return sb.join(""); |
||
75 | }, |
||
76 | |||
77 | |||
78 | /** Fast and simple TSV encoder. |
||
79 | Supports multi-line values and values with double quotes. |
||
80 | Automatic/smart escaping like Excel-generated TSV. */ |
||
81 | encodeTSV: function(headers, trimValues, columnar) { |
||
82 | var linesData = this; |
||
83 | return linesData.encodeCSV(headers, trimValues, columnar, "\t"); |
||
84 | }, |
||
85 | |||
86 | none:null, |
||
87 | } |
||
88 | |||
89 | // register funcs |
||
90 | UB.registerFuncs(Array.prototype, arrayFuncs); |
||
91 | |||
92 | |||
93 | |||
94 | var stringFuncs = { |
||
95 | |||
96 | /** Decodes the given CSV file string and returns the cell data as an array. |
||
97 | * Extremely robust and fast CSV parser. Only parser that works with all bizarre but valid test files. |
||
98 | * |
||
99 | * 3 modes are available: |
||
100 | * if `headers` is null - All cells are returned as 2D array. (default) |
||
101 | * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array. |
||
102 | * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects. |
||
103 | */ |
||
104 | decodeCSV: function (headers, trimValues, columnar = false, seperator = ",") { |
||
105 | var csvString = this.toString(); |
||
106 | |||
107 | // cut String into lines |
||
108 | var lines = csvString.trim().splitLines(); |
||
109 | var sep = seperator == "auto" ? UB.CSV_detectSeperator(csvString, lines.length) : seperator; |
||
110 | |||
111 | // config |
||
112 | var hasHeaders = headers != null; |
||
113 | var returnAsObjs = hasHeaders && headers.exists(); |
||
114 | |||
115 | // status |
||
116 | var inQuoted = false; |
||
117 | |||
118 | // result |
||
119 | var linesData = []; |
||
120 | var word = []; |
||
121 | var tempHeaders = []; |
||
122 | var lineWords = []; |
||
123 | |||
124 | // per line |
||
125 | for (var l = 0, ll = lines.length; l < ll; l++) { |
||
126 | var line = lines[l]; |
||
127 | var isHeader = (l === 0 && hasHeaders); |
||
128 | |||
129 | // if we are in quoted text |
||
130 | if (inQuoted) { |
||
0 ignored issues
–
show
Comprehensibility
Documentation
Best Practice
introduced
by
![]() |
|||
131 | |||
132 | // keep taking chars |
||
133 | |||
134 | }else{ |
||
135 | |||
136 | // save words into headers array / new array |
||
137 | lineWords = []; |
||
138 | if (isHeader){ |
||
139 | if (returnAsObjs){ |
||
140 | lineWords = tempHeaders; |
||
141 | }else{ |
||
142 | lineWords = headers; |
||
143 | } |
||
144 | } |
||
145 | if (!isHeader) { |
||
146 | linesData.push(lineWords); |
||
147 | } |
||
148 | |||
149 | } |
||
150 | |||
151 | // per char |
||
152 | for (var c = 0, clast = line.length - 1; c <= clast; c++) { |
||
153 | var ch = line.charAt(c); |
||
154 | |||
155 | // if we are in quoted text |
||
156 | if (inQuoted) { |
||
157 | |||
158 | // quotes.. |
||
159 | if (ch == "\"") { |
||
160 | |||
161 | // quote may be escaped |
||
162 | if (line.charAt(c + 1) == "\"") { |
||
163 | c++; |
||
164 | word.push("\""); |
||
165 | }else { |
||
166 | |||
167 | // quote means ending quoted text |
||
168 | inQuoted = false; |
||
169 | } |
||
170 | |||
171 | continue; |
||
172 | } |
||
173 | |||
174 | // normal char |
||
175 | word.push(ch); |
||
176 | |||
177 | |||
178 | }else { |
||
179 | |||
180 | // quote means beginning quoted text |
||
181 | if (ch == "\""){ |
||
182 | inQuoted = true; |
||
183 | continue; |
||
184 | } |
||
185 | |||
186 | // comma means end of word |
||
187 | if (ch == sep) { |
||
188 | lineWords.push(trimValues ? word.join("").trim() : word.join("")); |
||
189 | word = []; |
||
190 | continue; |
||
191 | } |
||
192 | |||
193 | // normal char |
||
194 | word.push(ch); |
||
195 | |||
196 | // newline means end of word |
||
197 | if (c == clast) { |
||
198 | lineWords.push(trimValues ? word.join("").trim() : word.join("")); |
||
199 | word = []; |
||
200 | } |
||
201 | |||
202 | } |
||
203 | |||
204 | } |
||
205 | |||
206 | // at end of line take word |
||
207 | if (!inQuoted && word.Length > 0) { |
||
208 | lineWords.push(trimValues ? word.join("").trim() : word.join("")); |
||
209 | word = []; |
||
210 | } |
||
211 | } |
||
212 | |||
213 | // convert array to objs |
||
214 | if (returnAsObjs){ |
||
215 | |||
216 | // go thru all rows |
||
217 | for (var l = 0, ll = linesData.length; l < ll; l++) { |
||
218 | var row = linesData[l]; |
||
219 | var obj = {}; |
||
220 | |||
221 | // convert all cells to obj props |
||
222 | for (var h = 0, hl = headers.length; h < hl; h++) { |
||
223 | var header = headers[h]; |
||
224 | obj[header] = row[h]; |
||
225 | } |
||
226 | linesData[l] = obj; |
||
227 | } |
||
228 | } |
||
229 | |||
230 | // convert 2D array to columnar |
||
231 | if (columnar && !returnAsObjs) { |
||
232 | linesData = linesData.transpose(); |
||
233 | } |
||
234 | |||
235 | return linesData; |
||
236 | }, |
||
237 | |||
238 | /** Decodes the given TSV file string and returns the cell data as an array. |
||
239 | * Extremely robust and fast TSV parser. Only parser that works with all bizarre but valid test files. |
||
240 | * |
||
241 | * 3 modes are available: |
||
242 | * if `headers` is null - All cells are returned as 2D array. (default) |
||
243 | * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array. |
||
244 | * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects. |
||
245 | */ |
||
246 | decodeTSV: function (headers, trimValues, columnar = false) { |
||
247 | var tsvString = this.toString(); |
||
248 | return tsvString.decodeCSV(headers, trimValues, columnar, "\t"); |
||
249 | }, |
||
250 | |||
251 | none:null, |
||
252 | } |
||
253 | |||
254 | // register funcs |
||
255 | UB.registerFuncs(String.prototype, stringFuncs); |
||
256 | |||
257 | |||
258 | |||
259 | |||
260 | |||
261 | /* File Utils - NodeJS only */ |
||
262 | |||
263 | var fs = require('fs'); |
||
264 | var pathUtil = require('path'); |
||
265 | |||
266 | arrayFuncs = { |
||
267 | |||
268 | saveToCSV: function(filePath, headers = null, trimValues = true, columnar = false, fixedSep = ",") { |
||
269 | var data = this; |
||
270 | |||
271 | // write CSV to string |
||
272 | var str = data.encodeCSV(headers, trimValues, columnar, fixedSep); |
||
273 | |||
274 | // save data as string via filestream |
||
275 | str.saveToText(filePath); |
||
276 | }, |
||
277 | |||
278 | none:null, |
||
279 | } |
||
280 | |||
281 | // register funcs |
||
282 | UB.registerFuncs(Array.prototype, arrayFuncs); |
||
283 | |||
284 | |||
285 | stringFuncs = { |
||
286 | |||
287 | /** |
||
288 | * Loads the given CSV file, decoding the cell data and returning it as an array. |
||
289 | * Extremely robust and fast CSV parser. Only parser that works with all bizarre but valid test files. |
||
290 | * |
||
291 | * 3 modes are available: |
||
292 | * if `headers` is null - All cells are returned as 2D array. (default) |
||
293 | * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array. |
||
294 | * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects. |
||
295 | */ |
||
296 | loadCSV: function(encoding = "utf8", headers = null, trimValues = true, columnar = false, seperator = "auto") { |
||
297 | |||
298 | // load text file |
||
299 | var file = this.toString(); |
||
300 | var text = file.loadText(encoding); |
||
301 | if (text == null) { |
||
302 | return null; |
||
303 | } |
||
304 | |||
305 | // parse CSV string into Array |
||
306 | return text.decodeCSV(headers, trimValues, columnar, seperator); |
||
307 | }, |
||
308 | |||
309 | none:null, |
||
310 | } |
||
311 | |||
312 | // register funcs |
||
313 | UB.registerFuncs(String.prototype, stringFuncs); |
||
314 | |||
315 | //endRemoveIf(nodejs) |
||
316 | |||
317 | |||
318 | |||
319 | |||
320 | |||
321 | // UTILS |
||
322 | UB.CSV_seperators = [',', ';', ':', '\t']; |
||
323 | |||
324 | UB.CSV_detectSeperator = function(csvString, rowCount) { |
||
325 | var sepCount = UB.newArray(0, UB.CSV_seperators.length); |
||
326 | |||
327 | var character; |
||
328 | |||
329 | var quoted = false; |
||
330 | var firstChar = true; |
||
331 | var foundAny = false; |
||
332 | |||
333 | var c = 0; |
||
334 | var cl = csvString.length - 50; // skip last few chars |
||
335 | while (c < cl) { |
||
336 | character = csvString.charAt(c); |
||
337 | c++; |
||
338 | |||
339 | switch (character) { |
||
340 | case '"': |
||
341 | if (quoted) { |
||
342 | if (csvString.charAt(c + 1) != '"') { // Value is quoted and current character is " and next character is not ". |
||
343 | quoted = false; |
||
344 | } else { |
||
345 | c++; // Value is quoted and current and next characters are "" - read (skip) peeked qoute. |
||
346 | } |
||
347 | } else { |
||
348 | if (firstChar) { // Set value as quoted only if this quote is the first char in the value. |
||
349 | quoted = true; |
||
350 | } |
||
351 | } |
||
352 | break; |
||
353 | case '\r': |
||
354 | case '\n': |
||
355 | if (!quoted) { |
||
356 | firstChar = true; |
||
357 | continue; |
||
358 | } |
||
359 | break; |
||
360 | default: |
||
361 | if (!quoted) { |
||
362 | var index = UB.CSV_seperators.indexOf(character); |
||
363 | if (index != -1) { |
||
364 | sepCount[index]++; |
||
365 | firstChar = true; |
||
366 | foundAny = true; |
||
367 | continue; |
||
368 | } |
||
369 | } |
||
370 | break; |
||
371 | } |
||
372 | |||
373 | if (firstChar) { |
||
374 | firstChar = false; |
||
375 | } |
||
376 | } |
||
377 | |||
378 | return !foundAny ? ',' : UB.CSV_seperators[sepCount.indexOfMax()]; |
||
379 | } |
||
380 | |||
381 | |||
382 |