1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* CSVelte: Slender, elegant CSV for PHP |
4
|
|
|
* |
5
|
|
|
* Inspired by Python's CSV module and Frictionless Data and the W3C's CSV |
6
|
|
|
* standardization efforts, CSVelte was written in an effort to take all the |
7
|
|
|
* suck out of working with CSV. |
8
|
|
|
* |
9
|
|
|
* @version v0.2 |
10
|
|
|
* @copyright Copyright (c) 2016 Luke Visinoni <[email protected]> |
11
|
|
|
* @author Luke Visinoni <[email protected]> |
12
|
|
|
* @license https://github.com/deni-zen/csvelte/blob/master/LICENSE The MIT License (MIT) |
13
|
|
|
*/ |
14
|
|
|
namespace CSVelte; |
15
|
|
|
|
16
|
|
|
use \Closure; |
17
|
|
|
use \InvalidArgumentException; |
18
|
|
|
use \FilterIterator; |
19
|
|
|
use CSVelte\IO\Stream; |
20
|
|
|
use CSVelte\Contract\Readable; |
21
|
|
|
use CSVelte\Table\Row; |
22
|
|
|
use CSVelte\Table\HeaderRow; |
23
|
|
|
use CSVelte\Exception\EndOfFileException; |
24
|
|
|
use CSVelte\Reader\FilteredIterator as FilteredReader; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* CSV Reader |
28
|
|
|
* |
29
|
|
|
* Reads CSV data from any object that implements CSVelte\Contract\Readable. |
30
|
|
|
* |
31
|
|
|
* @package CSVelte |
32
|
|
|
* @subpackage Reader |
33
|
|
|
* @since v0.1 |
34
|
|
|
* @todo Also, is there any way to do some kind of caching or something? Probably |
35
|
|
|
* not but if you could that would be a cool feature... |
36
|
|
|
*/ |
37
|
|
|
class Reader implements \Iterator |
38
|
|
|
{ |
39
|
|
|
const PLACEHOLDER_DELIM = '[=[__DLIM__]=]'; |
40
|
|
|
const PLACEHOLDER_NEWLINE = '[=[__NWLN__]=]'; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* This class supports any sources of input that implements this interface. |
44
|
|
|
* This way I can read from local files, streams, FTP, any class that implements |
45
|
|
|
* the "Readable" interface |
46
|
|
|
* @var \CSVelte\Contract\Readable |
47
|
|
|
*/ |
48
|
|
|
protected $source; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* @var \CSVelte\Flavor The "flavor" or format of the CSV being read |
52
|
|
|
*/ |
53
|
|
|
protected $flavor; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @var \CSVelte\Table\Row|boolean Row currently loaded into memory |
57
|
|
|
*/ |
58
|
|
|
protected $current; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* @var integer The current line being read (from input source) |
62
|
|
|
*/ |
63
|
|
|
protected $line = 0; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* @var \CSVelte\Table\HeaderRow The header row (if any) |
67
|
|
|
*/ |
68
|
|
|
protected $header; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* @var array An array of callback functions |
72
|
|
|
*/ |
73
|
|
|
protected $filters = array(); |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @var bool True if current line ended while inside a quoted string |
77
|
|
|
*/ |
78
|
|
|
protected $open = false; |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* @var bool True if last character read was the escape character |
82
|
|
|
*/ |
83
|
|
|
protected $escape = false; |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* Reader Constructor. |
87
|
|
|
* Initializes a reader object using an input source and optionally a flavor |
88
|
|
|
* |
89
|
|
|
* @param \CSVelte\Contract\Readable $input The source of our CSV data |
90
|
|
|
* @param \CSVelte\Flavor $flavor The "flavor" or format specification object |
91
|
|
|
*/ |
92
|
22 |
|
public function __construct($input, $flavor = null) |
93
|
|
|
{ |
94
|
22 |
|
$this->setSource($input) |
95
|
22 |
|
->setFlavor($flavor) |
96
|
22 |
|
->rewind(); |
97
|
22 |
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* Set the flavor. |
101
|
|
|
* |
102
|
|
|
* Set the ``CSVelte\Flavor`` object, used to determine CSV format. |
103
|
|
|
* |
104
|
|
|
* @param \CSVelte\Flavor|array $flavor Either an array or a flavor object |
105
|
|
|
*/ |
106
|
20 |
|
protected function setFlavor($flavor = null) |
107
|
|
|
{ |
108
|
20 |
|
if (is_array($flavor)) $flavor = new Flavor($flavor); |
109
|
20 |
|
$taster = new Taster($this->source); |
110
|
|
|
// @todo put this inside a try/catch |
111
|
20 |
|
if (is_null($flavor)) { |
112
|
12 |
|
$flavor = $taster->lick(); |
113
|
12 |
|
} |
114
|
20 |
|
if (is_null($flavor->header)) { |
115
|
|
|
// Flavor is immutable, give me a new one with header set to lickHeader return val |
116
|
4 |
|
$flavor = $flavor->copy(['header' => $taster->lickHeader($flavor->delimiter, $flavor->lineTerminator)]); |
117
|
4 |
|
} |
118
|
20 |
|
$this->flavor = $flavor; |
119
|
20 |
|
return $this; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* Set the reader source. |
124
|
|
|
* |
125
|
|
|
* The reader can accept anything that implements Readable and is actually |
126
|
|
|
* readable (can be read). This will make sure that whatever is passed to |
127
|
|
|
* the reader meets these expectations and set $this->source. |
128
|
|
|
* |
129
|
|
|
* @param \CSVelte\Contract\Readable|object|string $input See description |
130
|
|
|
* @return $this |
131
|
|
|
*/ |
132
|
20 |
|
protected function setSource($input) |
133
|
|
|
{ |
134
|
20 |
|
if ($input instanceof Readable && $input->isReadable()) { |
135
|
9 |
|
$this->source = $input; |
136
|
20 |
|
} elseif (file_exists((string) $input)) { |
137
|
2 |
|
$this->source = new IO\Stream($input); |
|
|
|
|
138
|
2 |
|
} else { |
139
|
10 |
|
$this->source = Stream::streamize($input); |
140
|
|
|
} |
141
|
20 |
|
return $this; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* Load a line into memory |
146
|
|
|
* |
147
|
|
|
* @return void ($this?) |
148
|
|
|
* @access protected |
149
|
|
|
*/ |
150
|
20 |
|
protected function load() |
151
|
|
|
{ |
152
|
20 |
|
if (is_null($this->current)) { |
153
|
|
|
try { |
154
|
20 |
|
$line = $this->readLine(); |
155
|
20 |
|
$this->line++; |
156
|
20 |
|
$parsed = $this->parse($line); |
157
|
20 |
|
if ($this->hasHeader() && $this->line === 1) { |
158
|
14 |
|
$this->header = new HeaderRow($parsed); |
159
|
14 |
|
} else { |
160
|
20 |
|
$this->current = new Row($parsed); |
161
|
20 |
|
if ($this->header) $this->current->setHeaderRow($this->header); |
162
|
|
|
} |
163
|
20 |
|
} catch (EndOfFileException $e) { |
164
|
7 |
|
$this->current = false; |
165
|
|
|
} |
166
|
20 |
|
} |
167
|
20 |
|
} |
168
|
|
|
|
169
|
|
|
/** |
170
|
|
|
* Read single line from CSV data source (stream, file, etc.), taking into |
171
|
|
|
* account CSV's de-facto quoting rules with respect to designated line |
172
|
|
|
* terminator character when they fall within quoted strings. |
173
|
|
|
* |
174
|
|
|
* @return string A CSV row (could possibly span multiple lines depending on |
175
|
|
|
* quoting and escaping) |
176
|
|
|
* @throws \CSVelte\Exception\EndOfFileException when eof has been reached |
177
|
|
|
* and the read buffer has all been returned |
178
|
|
|
*/ |
179
|
20 |
|
protected function readLine() |
180
|
|
|
{ |
181
|
20 |
|
$f = $this->getFlavor(); |
182
|
20 |
|
$eol = $f->lineTerminator; |
183
|
|
|
try { |
184
|
|
|
do { |
185
|
20 |
|
if (!isset($lines)) $lines = array(); |
186
|
20 |
|
if (false === ($line = $this->source->readLine($eol))) { |
187
|
7 |
|
throw new EndOfFileException("End of file reached: " . $this->source->getName()); |
188
|
|
|
} |
189
|
20 |
|
array_push($lines, rtrim($line, $eol)); |
190
|
20 |
|
} while ($this->inQuotedString(end($lines), $f->quoteChar, $f->escapeChar)); |
191
|
20 |
|
} catch (EndOfFileException $e) { |
192
|
|
|
// only throw the exception if we don't already have lines in the buffer |
193
|
7 |
|
if (!count($lines)) throw $e; |
194
|
|
|
} |
195
|
20 |
|
return rtrim(implode($eol, $lines), $eol); |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* Determine whether last line ended while a quoted string was still "open" |
200
|
|
|
* |
201
|
|
|
* This method is used in a loop to determine if each line being read ends |
202
|
|
|
* while a quoted string is still "open". |
203
|
|
|
* |
204
|
|
|
* @param string $line Line of csv to analyze |
205
|
|
|
* @param string $quoteChar The quote/enclosure character to use |
206
|
|
|
* @param string $escapeChar The escape char/sequence to use |
207
|
|
|
* @return bool True if currently within a quoted string |
208
|
|
|
*/ |
209
|
20 |
|
protected function inQuotedString($line, $quoteChar, $escapeChar) |
210
|
|
|
{ |
211
|
20 |
|
if (!empty($line)) { |
212
|
|
|
do { |
213
|
20 |
|
if (!isset($i)) $i = 0; |
214
|
20 |
|
$c = $line[$i++]; |
215
|
20 |
|
if ($this->escape) { |
216
|
|
|
$this->escape = false; |
217
|
|
|
continue; |
218
|
|
|
} |
219
|
20 |
|
$this->escape = ($c == $escapeChar); |
220
|
20 |
|
if ($c == $quoteChar) $this->open = !$this->open; |
221
|
20 |
|
} while ($i < strlen($line)); |
222
|
20 |
|
} |
223
|
20 |
|
return $this->open; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/** |
227
|
|
|
* Flavor Getter. |
228
|
|
|
* |
229
|
|
|
* Retreive the "flavor" object being used by the reader |
230
|
|
|
* |
231
|
|
|
* @return \CSVelte\Flavor |
232
|
|
|
* @access public |
233
|
|
|
*/ |
234
|
20 |
|
public function getFlavor() |
235
|
|
|
{ |
236
|
20 |
|
return $this->flavor; |
237
|
|
|
} |
238
|
|
|
|
239
|
|
|
/** |
240
|
|
|
* Check if flavor object defines header. |
241
|
|
|
* |
242
|
|
|
* Determine whether or not the input source's CSV data contains a header |
243
|
|
|
* row or not. Unless you explicitly specify so within your Flavor object, |
244
|
|
|
* this method is a logical best guess. The CSV format does not |
245
|
|
|
* provide metadata of any kind and therefor does not provide this info. |
246
|
|
|
* |
247
|
|
|
* @return boolean True if the input source has a header row (or, to be more ) |
248
|
|
|
* accurate, if the flavor SAYS it has a header row) |
249
|
|
|
* @todo Rather than always reading in Taster::SAMPLE_SIZE, read in ten lines at a time until |
250
|
|
|
* whatever method it is has enough data to make a reliable decision/guess |
251
|
|
|
*/ |
252
|
20 |
|
public function hasHeader() |
253
|
|
|
{ |
254
|
20 |
|
return $this->getFlavor()->header; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* Temporarily replace special characters within a quoted string |
259
|
|
|
* |
260
|
|
|
* Replace all instances of newlines and whatever character you specify (as |
261
|
|
|
* the delimiter) that are contained within quoted text. The replacements are |
262
|
|
|
* simply a special placeholder string. This is done so that I can use the |
263
|
|
|
* very unsmart "explode" function and not have to worry about it exploding |
264
|
|
|
* on delimiters or newlines within quotes. Once I have exploded, I typically |
265
|
|
|
* sub back in the real characters before doing anything else. |
266
|
|
|
* |
267
|
|
|
* @param string $data The string to do the replacements on |
268
|
|
|
* @param string $delim The delimiter character to replace |
269
|
|
|
* @param string $quo The quote character |
270
|
|
|
* @param string $eol Line terminator character/sequence |
271
|
|
|
* @return string The data with replacements performed |
272
|
|
|
* @access protected |
273
|
|
|
* @internal |
274
|
|
|
* @todo I could probably pass in (maybe optionally) the newline character I |
275
|
|
|
* want to replace as well. I'll do that if I need to. |
276
|
|
|
* @todo Create a regex class so you can do $regex->escape() rather than |
277
|
|
|
* preg_quote |
278
|
|
|
*/ |
279
|
20 |
|
protected function replaceQuotedSpecialChars($data, $delim, $quo, $eol) |
280
|
|
|
{ |
281
|
|
|
return preg_replace_callback('/(['. preg_quote($quo, '/') . '])(.*)\1/imsU', function($matches) use ($delim, $eol) { |
282
|
11 |
|
$ret = str_replace($eol, self::PLACEHOLDER_NEWLINE, $matches[0]); |
283
|
11 |
|
$ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret); |
284
|
11 |
|
return $ret; |
285
|
20 |
|
}, $data); |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
/** |
289
|
|
|
* Undo temporary special char replacements |
290
|
|
|
* |
291
|
|
|
* Replace the special character placeholders with the characters they |
292
|
|
|
* originally substituted. |
293
|
|
|
* |
294
|
|
|
* @param string $data The data to undo replacements in |
295
|
|
|
* @param string $delim The delimiter character |
296
|
|
|
* @param string $eol The character or string of characters used to terminate lines |
297
|
|
|
* @return string The data with placeholders replaced with original characters |
298
|
|
|
* @internal |
299
|
|
|
*/ |
300
|
20 |
|
protected function undoReplaceQuotedSpecialChars($data, $delim, $eol) |
301
|
|
|
{ |
302
|
20 |
|
$replacements = array(self::PLACEHOLDER_DELIM => $delim, self::PLACEHOLDER_NEWLINE => $eol); |
303
|
|
|
if (array_walk($replacements, function($replacement, $placeholder) use (&$data) { |
304
|
20 |
|
$data = str_replace($placeholder, $replacement, $data); |
305
|
20 |
|
})) { |
306
|
20 |
|
return $data; |
307
|
|
|
} |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
/** |
311
|
|
|
* Remove quotes wrapping text. |
312
|
|
|
* |
313
|
|
|
* @param string $data The data to unquote |
314
|
|
|
* @return string The data with quotes stripped from the outside of it |
315
|
|
|
* @internal |
316
|
|
|
*/ |
317
|
20 |
|
protected function unQuote($data) |
318
|
|
|
{ |
319
|
20 |
|
$escapeChar = $this->getFlavor()->doubleQuote ? $this->getFlavor()->quoteChar : $this->getFlavor()->escapeChar; |
320
|
20 |
|
$quoteChar = $this->getFlavor()->quoteChar; |
321
|
20 |
|
$data = $this->unEscape($data, $escapeChar, $quoteChar); |
322
|
20 |
|
return preg_replace('/^(["\'])(.*)\1$/ms', '\2', $data); |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/** |
326
|
|
|
* @internal |
327
|
|
|
* @todo This actually shouldn't even be necessary. Characters should be read |
328
|
|
|
* in one at a time and a quote that follows another should just be ignored |
329
|
|
|
* deeming this unnecessary. |
330
|
|
|
*/ |
331
|
20 |
|
protected function unEscape($str, $esc, $quo) |
332
|
|
|
{ |
333
|
20 |
|
return str_replace($esc . $quo, $quo, $str); |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/** |
337
|
|
|
* Parse a line of CSV data into an array of columns |
338
|
|
|
* |
339
|
|
|
* @param string A line of CSV data to parse |
340
|
|
|
* @return array An array of columns |
341
|
|
|
* @access protected |
342
|
|
|
* @internal |
343
|
|
|
*/ |
344
|
20 |
|
protected function parse($line) |
345
|
|
|
{ |
346
|
20 |
|
$f = $this->getFlavor(); |
347
|
20 |
|
$replaced = $this->replaceQuotedSpecialChars($line, $f->delimiter, $f->quoteChar, $f->lineTerminator); |
348
|
20 |
|
$columns = explode($f->delimiter, $replaced); |
349
|
20 |
|
$that = $this; |
350
|
|
|
return array_map(function($val) use ($that, $f) { |
351
|
20 |
|
$undone = $that->undoReplaceQuotedSpecialChars($val, $f->delimiter, $f->lineTerminator); |
352
|
20 |
|
return $this->unQuote($undone); |
353
|
20 |
|
}, $columns); |
354
|
|
|
} |
355
|
|
|
|
356
|
20 |
|
public function current() |
357
|
|
|
{ |
358
|
20 |
|
return $this->current; |
359
|
|
|
} |
360
|
|
|
|
361
|
15 |
|
public function next() |
362
|
|
|
{ |
363
|
|
|
|
364
|
15 |
|
$this->current = null; |
365
|
15 |
|
$this->load(); |
366
|
15 |
|
return $this->current; |
367
|
|
|
} |
368
|
|
|
|
369
|
8 |
|
public function valid() |
370
|
|
|
{ |
371
|
8 |
|
return (bool) $this->current; |
372
|
|
|
} |
373
|
|
|
|
374
|
5 |
|
public function key() |
375
|
|
|
{ |
376
|
5 |
|
return $this->line; |
377
|
|
|
} |
378
|
|
|
|
379
|
20 |
|
public function rewind() |
380
|
|
|
{ |
381
|
20 |
|
$this->line = 0; |
382
|
20 |
|
$this->source->rewind(); |
383
|
20 |
|
$this->current = null; |
384
|
20 |
|
$this->load(); |
385
|
20 |
|
if ($this->hasHeader()) { |
386
|
14 |
|
$this->next(); |
387
|
14 |
|
} |
388
|
20 |
|
return $this->current(); |
389
|
|
|
} |
390
|
|
|
|
391
|
2 |
|
public function header() |
392
|
|
|
{ |
393
|
2 |
|
return $this->header; |
394
|
|
|
} |
395
|
|
|
|
396
|
|
|
/** |
397
|
|
|
* @todo Closure should be changed to "Callable" (php5.4+) |
398
|
|
|
*/ |
399
|
3 |
|
public function addFilter(Closure $filter) |
400
|
|
|
{ |
401
|
3 |
|
array_push($this->filters, $filter); |
402
|
3 |
|
return $this; |
403
|
|
|
} |
404
|
|
|
|
405
|
1 |
|
public function addFilters(array $filters) |
406
|
|
|
{ |
407
|
1 |
|
foreach ($filters as $filter) { |
408
|
1 |
|
$this->addFilter($filter); |
409
|
1 |
|
} |
410
|
1 |
|
return $this; |
411
|
|
|
} |
412
|
|
|
|
413
|
3 |
|
public function filter() |
414
|
|
|
{ |
415
|
3 |
|
return new FilteredReader($this, $this->filters); |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
public function toArray() |
419
|
|
|
{ |
420
|
1 |
|
return array_map(function($row){ |
421
|
1 |
|
return $row->toArray(); |
422
|
1 |
|
}, iterator_to_array($this)); |
423
|
|
|
} |
424
|
|
|
|
425
|
|
|
} |
426
|
|
|
|
This check looks at variables that have been passed in as parameters and are passed out again to other methods.
If the outgoing method call has stricter type requirements than the method itself, an issue is raised.
An additional type check may prevent trouble.