Completed
Push — master ( 5c1aea...9ef1c4 )
by Luke
03:03
created

src/CSVelte/Reader.php (1 issue)

assigning incompatible types to properties.

Bug Documentation Major

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * CSVelte: Slender, elegant CSV for PHP
4
 *
5
 * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV
6
 * standardization efforts, CSVelte was written in an effort to take all the
7
 * suck out of working with CSV.
8
 *
9
 * @version   v0.2.1
10
 * @copyright Copyright (c) 2016 Luke Visinoni <[email protected]>
11
 * @author    Luke Visinoni <[email protected]>
12
 * @license   https://github.com/deni-zen/csvelte/blob/master/LICENSE The MIT License (MIT)
13
 */
14
namespace CSVelte;
15
16
use CSVelte\Contract\Streamable;
17
18
use CSVelte\Table\Row;
19
use CSVelte\Table\HeaderRow;
20
use CSVelte\Reader\FilteredIterator as FilteredReader;
21
22
use CSVelte\Exception\EndOfFileException;
23
24
use function
25
    CSVelte\streamize,
26
    CSVelte\taste,
27
    CSVelte\taste_has_header,
28
    CSVelte\collect;
29
30
/**
31
 * CSV Reader
32
 *
33
 * Reads CSV data from any object that implements CSVelte\Contract\Readable.
34
 *
35
 * @package CSVelte
36
 * @subpackage Reader
37
 * @since v0.1
38
 * @todo Also, is there any way to do some kind of caching or something? Probably
39
 *     not but if you could that would be a cool feature...
40
 */
41
class Reader implements \Iterator
42
{
43
    const PLACEHOLDER_DELIM   = '[=[__DLIM__]=]';
44
    const PLACEHOLDER_NEWLINE = '[=[__NWLN__]=]';
45
46
    /**
47
     * This class supports any sources of input that implements this interface.
48
     * This way I can read from local files, streams, FTP, any class that implements
49
     * the "Readable" interface
50
     * @var Contract\Streamable
51
     */
52
    protected $source;
53
54
    /**
55
     * @var Flavor The "flavor" or format of the CSV being read
56
     */
57
    protected $flavor;
58
59
    /**
60
     * @var Table\Row Row currently loaded into memory
61
     */
62
    protected $current;
63
64
    /**
65
     * @var integer The current line being read (from input source)
66
     */
67
    protected $line = 0;
68
69
    /**
70
     * @var Table\HeaderRow The header row (if any)
71
     */
72
    protected $header;
73
74
    /**
75
     * @var array An array of callback functions
76
     */
77
    protected $filters = array();
78
79
    /**
80
     * @var bool True if current line ended while inside a quoted string
81
     */
82
    protected $open = false;
83
84
    /**
85
     * @var bool True if last character read was the escape character
86
     */
87
    protected $escape = false;
88
89
    /**
90
     * Reader Constructor.
91
     * Initializes a reader object using an input source and optionally a flavor
92
     *
93
     * @param mixed $input The source of our CSV data
94
     * @param Flavor|array|null $flavor The "flavor" or format specification object
95
     */
96 21
    public function __construct($input, $flavor = null)
97
    {
98 21
        $this->setSource($input)
99 21
             ->setFlavor($flavor)
100 21
             ->rewind();
101 21
    }
102
103
    /**
104
     * Set the flavor.
105
     *
106
     * Set the ``CSVelte\Flavor`` object, used to determine CSV format.
107
     *
108
     * @param Flavor|array|null $flavor Either an array or a flavor object
109
     * @return $this
110
     */
111 19
    protected function setFlavor($flavor = null)
112
    {
113 19
        if (is_array($flavor)) $flavor = new Flavor($flavor);
114
        // @todo put this inside a try/catch
115 19
        if (is_null($flavor)) {
116 11
            $flavor = taste($this->source);
117 11
        }
118 19
        if (is_null($flavor->header)) {
119
            // Flavor is immutable, give me a new one with header set to lickHeader return val
120 4
            $flavor = $flavor->copy(['header' => taste_has_header($this->source)]);
121 4
        }
122 19
        $this->flavor = $flavor;
123 19
        return $this;
124
    }
125
126
    /**
127
     * Set the reader source.
128
     *
129
     * The reader can accept anything that implements Readable and is actually
130
     * readable (can be read). This will make sure that whatever is passed to
131
     * the reader meets these expectations and set $this->source. It can also
132
     * accept any string (or any object with a __toString() method), or an
133
     * SplFileObject, so long as it represents a file rather than a directory.
134
     *
135
     * @param mixed $input See description
136
     * @return $this
137
     */
138 19
    protected function setSource($input)
139
    {
140 19
        if (!($input instanceof Streamable)) {
141 9
            $input = streamize($input);
142 9
        }
143 19
        $this->source = $input;
144 19
        return $this;
145
    }
146
147
    /**
148
     * Load a line into memory
149
     */
150 19
    protected function load()
151
    {
152 19
        if (is_null($this->current)) {
153
            try {
154 19
                $line = $this->readLine();
155 19
                $this->line++;
156 19
                $parsed = $this->parse($line);
157 19
                if ($this->hasHeader() && $this->line === 1) {
158 13
                    $this->header = new HeaderRow($parsed);
159 13
                } else {
160 19
                    $this->current = new Row($parsed);
161 19
                    if ($this->header) $this->current->setHeaderRow($this->header);
162
                }
163 19
            } catch (EndOfFileException $e) {
164 7
                $this->current = false;
0 ignored issues
show
Documentation Bug introduced by
It seems like false of type false is incompatible with the declared type object<CSVelte\Table\Row> of property $current.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
165
            }
166 19
        }
167 19
    }
168
169
    /**
170
     * Read single line from CSV data source (stream, file, etc.), taking into
171
     * account CSV's de-facto quoting rules with respect to designated line
172
     * terminator character when they fall within quoted strings.
173
     *
174
     * @return string A CSV row (could possibly span multiple lines depending on
175
     *     quoting and escaping)
176
     * @throws Exception\EndOfFileException when eof has been reached
177
     *     and the read buffer has all been returned
178
     */
179 19
    protected function readLine()
180
    {
181 19
        $f = $this->getFlavor();
182 19
        $eol = $f->lineTerminator;
183
        try {
184
            do {
185 19
                if (!isset($lines)) $lines = array();
186 19
                if (false === ($line = $this->source->readLine($eol))) {
187 7
                    throw new EndOfFileException("End of file reached");
188
                }
189 19
                array_push($lines, rtrim($line, $eol));
190 19
            } while ($this->inQuotedString(end($lines), $f->quoteChar, $f->escapeChar));
191 19
        } catch (EndOfFileException $e) {
192
            // only throw the exception if we don't already have lines in the buffer
193 7
            if (!count($lines)) throw $e;
194
        }
195 19
        return rtrim(implode($eol, $lines), $eol);
196
    }
197
198
    /**
199
     * Determine whether last line ended while a quoted string was still "open"
200
     *
201
     * This method is used in a loop to determine if each line being read ends
202
     * while a quoted string is still "open".
203
     *
204
     * @param string $line Line of csv to analyze
205
     * @param string $quoteChar The quote/enclosure character to use
206
     * @param string $escapeChar The escape char/sequence to use
207
     * @return bool True if currently within a quoted string
208
     */
209 19
    protected function inQuotedString($line, $quoteChar, $escapeChar)
210
    {
211 19
        if (!empty($line)) {
212
            do {
213 19
                if (!isset($i)) $i = 0;
214 19
                $c = $line[$i++];
215 19
                if ($this->escape) {
216
                    $this->escape = false;
217
                    continue;
218
                }
219 19
                $this->escape = ($c == $escapeChar);
220 19
                if ($c == $quoteChar) $this->open = !$this->open;
221 19
            } while ($i < strlen($line));
222 19
        }
223 19
        return $this->open;
224
    }
225
226
    /**
227
     * Flavor Getter.
228
     *
229
     * Retreive the "flavor" object being used by the reader
230
     *
231
     * @return Flavor
232
     */
233 19
    public function getFlavor()
234
    {
235 19
        return $this->flavor;
236
    }
237
238
    /**
239
     * Check if flavor object defines header.
240
     *
241
     * Determine whether or not the input source's CSV data contains a header
242
     * row or not. Unless you explicitly specify so within your Flavor object,
243
     * this method is a logical best guess. The CSV format does not
244
     * provide metadata of any kind and therefor does not provide this info.
245
     *
246
     * @return boolean True if the input source has a header row (or, to be more )
247
     *     accurate, if the flavor SAYS it has a header row)
248
     * @todo Rather than always reading in Taster::SAMPLE_SIZE, read in ten lines at a time until
249
     *     whatever method it is has enough data to make a reliable decision/guess
250
     */
251 19
    public function hasHeader()
252
    {
253 19
        return $this->getFlavor()->header;
254
    }
255
256
    /**
257
     * Temporarily replace special characters within a quoted string
258
     *
259
     * Replace all instances of newlines and whatever character you specify (as
260
     * the delimiter) that are contained within quoted text. The replacements are
261
     * simply a special placeholder string. This is done so that I can use the
262
     * very unsmart "explode" function and not have to worry about it exploding
263
     * on delimiters or newlines within quotes. Once I have exploded, I typically
264
     * sub back in the real characters before doing anything else.
265
     *
266
     * @param string $data The string to do the replacements on
267
     * @param string $delim The delimiter character to replace
268
     * @param string $quo The quote character
269
     * @param string $eol Line terminator character/sequence
270
     * @return string The data with replacements performed
271
     * @internal
272
     * @todo I could probably pass in (maybe optionally) the newline character I
273
     *     want to replace as well. I'll do that if I need to.
274
     * @todo Create a regex class so you can do $regex->escape() rather than
275
     *     preg_quote
276
     */
277 19
    protected function replaceQuotedSpecialChars($data, $delim, $quo, $eol)
278
    {
279
        return preg_replace_callback('/(['. preg_quote($quo, '/') . '])(.*)\1/imsU', function($matches) use ($delim, $eol) {
280 11
            $ret = str_replace($eol, self::PLACEHOLDER_NEWLINE, $matches[0]);
281 11
            $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret);
282 11
            return $ret;
283 19
        }, $data);
284
    }
285
286
    /**
287
     * Undo temporary special char replacements
288
     *
289
     * Replace the special character placeholders with the characters they
290
     * originally substituted.
291
     *
292
     * @param string $data The data to undo replacements in
293
     * @param string $delim The delimiter character
294
     * @param string $eol The character or string of characters used to terminate lines
295
     * @return string The data with placeholders replaced with original characters
296
     * @internal
297
     */
298 19
    protected function undoReplaceQuotedSpecialChars($data, $delim, $eol)
299
    {
300 19
        $replacements = array(self::PLACEHOLDER_DELIM => $delim, self::PLACEHOLDER_NEWLINE => $eol);
301
        if (array_walk($replacements, function($replacement, $placeholder) use (&$data) {
302 19
            $data = str_replace($placeholder, $replacement, $data);
303 19
        })) {
304 19
            return $data;
305
        }
306
    }
307
308
    /**
309
     * Remove quotes wrapping text.
310
     *
311
     * @param string $data The data to unquote
312
     * @return string The data with quotes stripped from the outside of it
313
     * @internal
314
     */
315 19
    protected function unQuote($data)
316
    {
317 19
        $escapeChar = $this->getFlavor()->doubleQuote ? $this->getFlavor()->quoteChar : $this->getFlavor()->escapeChar;
318 19
        $quoteChar = $this->getFlavor()->quoteChar;
319 19
        $data = $this->unEscape($data, $escapeChar, $quoteChar);
320 19
        return preg_replace('/^(["\'])(.*)\1$/ms', '\2', $data);
321
    }
322
323
    /**
324
     * "Unescape" a string.
325
     *
326
     * Replaces escaped characters with their unescaped versions.
327
     *
328
     * @internal
329
     * @param string $str The string to unescape
330
     * @param string $esc The escape character used
331
     * @param string $quo The quote character used
332
     * @return mixed The string with characters unescaped
333
     * @todo This actually shouldn't even be necessary. Characters should be read
334
     *     in one at a time and a quote that follows another should just be ignored
335
     *     deeming this unnecessary.
336
     */
337 19
    protected function unEscape($str, $esc, $quo)
338
    {
339 19
        return str_replace($esc . $quo, $quo, $str);
340
    }
341
342
    /**
343
     * Parse a line of CSV data into an array of columns
344
     *
345
     * @param string $line A line of CSV data to parse
346
     * @return array An array of columns
347
     * @internal
348
     */
349 19
    protected function parse($line)
350
    {
351 19
        $f = $this->getFlavor();
352 19
        $replaced = $this->replaceQuotedSpecialChars($line, $f->delimiter, $f->quoteChar, $f->lineTerminator);
353 19
        $columns = explode($f->delimiter, $replaced);
354 19
        $that = $this;
355
        return array_map(function($val) use ($that, $f) {
356 19
            $undone = $that->undoReplaceQuotedSpecialChars($val, $f->delimiter, $f->lineTerminator);
357 19
            return $this->unQuote($undone);
358 19
        }, $columns);
359
    }
360
361
    /**
362
     * Retrieve current row.
363
     *
364
     * @return Table\Row The current row
365
     */
366 19
    public function current()
367
    {
368 19
        return $this->current;
369
    }
370
371
    /**
372
     * Advance to the next row
373
     *
374
     * @return Table\Row|null The current row (if there is one)
375
     */
376 14
    public function next()
377
    {
378
379 14
        $this->current = null;
380 14
        $this->load();
381 14
        return $this->current;
382
    }
383
384
    /**
385
     * Determine if current position has valid row.
386
     *
387
     * @return boolean True if current row is valid
388
     */
389 8
    public function valid()
390
    {
391 8
        return (bool) $this->current;
392
    }
393
394
    /**
395
     * Retrieve current row key (line number).
396
     *
397
     * @return int The current line number
398
     */
399 5
    public function key()
400
    {
401 5
        return $this->line;
402
    }
403
404
    /**
405
     * Rewind to the beginning of the dataset.
406
     *
407
     * @return Table\Row|null The current row
408
     */
409 19
    public function rewind()
410
    {
411 19
        $this->line = 0;
412 19
        $this->source->rewind();
413 19
        $this->current = null;
414 19
        $this->load();
415 19
        if ($this->hasHeader()) {
416 13
            $this->next();
417 13
        }
418 19
        return $this->current();
419
    }
420
421
    /**
422
     * Retrieve header row.
423
     *
424
     * @return Table\HeaderRow The header row if there is one
425
     */
426 2
    public function header()
427
    {
428 2
        return $this->header;
429
    }
430
431
    /**
432
     * Add anonumous function as filter.
433
     *
434
     * Add an anonymous function that accepts the current row as its only argument.
435
     * Return true from the function to keep that row, false otherwise.
436
     *
437
     * @param Callable $filter An anonymous function to filter out row by certain criteria
438
     * @return $this
439
     */
440 3
    public function addFilter(Callable $filter)
441
    {
442 3
        array_push($this->filters, $filter);
443 3
        return $this;
444
    }
445
446
    /**
447
     * Add multiple filters at once.
448
     *
449
     * Add an array of anonymous functions to filter out certain rows.
450
     *
451
     * @param array $filters An array of anonymous functions
452
     * @return $this
453
     */
454 1
    public function addFilters(array $filters)
455
    {
456 1
        foreach ($filters as $filter) {
457 1
            $this->addFilter($filter);
458 1
        }
459 1
        return $this;
460
    }
461
462
    /**
463
     * Returns an iterator with rows from user-supplied filter functions removed
464
     *
465
     * @return FilteredReader An iterator with filtered rows
466
     */
467 3
    public function filter()
468
    {
469 3
        return new FilteredReader($this, $this->filters);
470
    }
471
472
    /**
473
     * Retrieve the contents of the dataset as an array of arrays.
474
     *
475
     * @return array An array of arrays of CSV content
476
     */
477
    public function toArray()
478
    {
479 1
        return array_map(function($row){
480 1
            return $row->toArray();
481 1
        }, iterator_to_array($this));
482
    }
483
484
}
485