Completed
Push — releases/v0.2 ( c1372d...b175d8 )
by Luke
05:30
created

Reader::setSource()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 11
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 11
ccs 8
cts 8
cp 1
rs 9.2
cc 4
eloc 8
nc 3
nop 1
crap 4
1
<?php
2
/**
3
 * CSVelte: Slender, elegant CSV for PHP
4
 *
5
 * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV
6
 * standardization efforts, CSVelte was written in an effort to take all the
7
 * suck out of working with CSV.
8
 *
9
 * @version   v0.2
10
 * @copyright Copyright (c) 2016 Luke Visinoni <[email protected]>
11
 * @author    Luke Visinoni <[email protected]>
12
 * @license   https://github.com/deni-zen/csvelte/blob/master/LICENSE The MIT License (MIT)
13
 */
14
namespace CSVelte;
15
16
use \Closure;
17
use \InvalidArgumentException;
18
use \FilterIterator;
19
use CSVelte\IO\Stream;
20
use CSVelte\Contract\Readable;
21
use CSVelte\Table\Row;
22
use CSVelte\Table\HeaderRow;
23
use CSVelte\Exception\EndOfFileException;
24
use CSVelte\Reader\FilteredIterator as FilteredReader;
25
26
/**
27
 * CSV Reader
28
 *
29
 * Reads CSV data from any object that implements CSVelte\Contract\Readable.
30
 *
31
 * @package CSVelte
32
 * @subpackage Reader
33
 * @since v0.1
34
 * @todo Also, is there any way to do some kind of caching or something? Probably
35
 *     not but if you could that would be a cool feature...
36
 */
37
class Reader implements \Iterator
38
{
39
    const PLACEHOLDER_DELIM   = '[=[__DLIM__]=]';
40
    const PLACEHOLDER_NEWLINE = '[=[__NWLN__]=]';
41
42
    /**
43
     * This class supports any sources of input that implements this interface.
44
     * This way I can read from local files, streams, FTP, any class that implements
45
     * the "Readable" interface
46
     * @var \CSVelte\Contract\Readable
47
     */
48
    protected $source;
49
50
    /**
51
     * @var \CSVelte\Flavor The "flavor" or format of the CSV being read
52
     */
53
    protected $flavor;
54
55
    /**
56
     * @var \CSVelte\Table\Row|boolean Row currently loaded into memory
57
     */
58
    protected $current;
59
60
    /**
61
     * @var integer The current line being read (from input source)
62
     */
63
    protected $line = 0;
64
65
    /**
66
     * @var \CSVelte\Table\HeaderRow The header row (if any)
67
     */
68
    protected $header;
69
70
    /**
71
     * @var array An array of callback functions
72
     */
73
    protected $filters = array();
74
75
    /**
76
     * @var bool True if current line ended while inside a quoted string
77
     */
78
    protected $open = false;
79
80
    /**
81
     * @var bool True if last character read was the escape character
82
     */
83
    protected $escape = false;
84
85
    /**
86
     * Reader Constructor.
87
     * Initializes a reader object using an input source and optionally a flavor
88
     *
89
     * @param \CSVelte\Contract\Readable $input The source of our CSV data
90
     * @param \CSVelte\Flavor $flavor The "flavor" or format specification object
91
     */
92 22
    public function __construct($input, $flavor = null)
93
    {
94 22
        $this->setSource($input)
95 22
             ->setFlavor($flavor)
96 22
             ->rewind();
97 22
    }
98
99
    /**
100
     * Set the flavor.
101
     *
102
     * Set the ``CSVelte\Flavor`` object, used to determine CSV format.
103
     *
104
     * @param \CSVelte\Flavor|array $flavor Either an array or a flavor object
105
     */
106 20
    protected function setFlavor($flavor = null)
107
    {
108 20
        if (is_array($flavor)) $flavor = new Flavor($flavor);
109 20
        $taster = new Taster($this->source);
110
        // @todo put this inside a try/catch
111 20
        if (is_null($flavor)) {
112 12
            $flavor = $taster->lick();
113 12
        }
114 20
        if (is_null($flavor->header)) {
115
            // Flavor is immutable, give me a new one with header set to lickHeader return val
116 4
            $flavor = $flavor->copy(['header' => $taster->lickHeader($flavor->delimiter, $flavor->lineTerminator)]);
117 4
        }
118 20
        $this->flavor = $flavor;
119 20
        return $this;
120
    }
121
122
    /**
123
     * Set the reader source.
124
     *
125
     * The reader can accept anything that implements Readable and is actually
126
     * readable (can be read). This will make sure that whatever is passed to
127
     * the reader meets these expectations and set $this->source.
128
     *
129
     * @param \CSVelte\Contract\Readable|object|string $input See description
130
     * @return $this
131
     */
132 20
    protected function setSource($input)
133
    {
134 20
        if ($input instanceof Readable && $input->isReadable()) {
135 9
            $this->source = $input;
136 20
        } elseif (file_exists((string) $input)) {
137 2
            $this->source = new IO\Stream($input);
0 ignored issues
show
Bug introduced by
It seems like $input defined by parameter $input on line 132 can also be of type object; however, CSVelte\IO\Stream::__construct() does only seem to accept string|resource, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
138 2
        } else {
139 10
            $this->source = Stream::streamize($input);
140
        }
141 20
        return $this;
142
    }
143
144
    /**
145
     * Load a line into memory
146
     *
147
     * @return void ($this?)
148
     * @access protected
149
     */
150 20
    protected function load()
151
    {
152 20
        if (is_null($this->current)) {
153
            try {
154 20
                $line = $this->readLine();
155 20
                $this->line++;
156 20
                $parsed = $this->parse($line);
157 20
                if ($this->hasHeader() && $this->line === 1) {
158 14
                    $this->header = new HeaderRow($parsed);
159 14
                } else {
160 20
                    $this->current = new Row($parsed);
161 20
                    if ($this->header) $this->current->setHeaderRow($this->header);
162
                }
163 20
            } catch (EndOfFileException $e) {
164 7
                $this->current = false;
165
            }
166 20
        }
167 20
    }
168
169
    /**
170
     * Read single line from CSV data source (stream, file, etc.), taking into
171
     * account CSV's de-facto quoting rules with respect to designated line
172
     * terminator character when they fall within quoted strings.
173
     *
174
     * @return string A CSV row (could possibly span multiple lines depending on
175
     *     quoting and escaping)
176
     * @throws \CSVelte\Exception\EndOfFileException when eof has been reached
177
     *     and the read buffer has all been returned
178
     */
179 20
    protected function readLine()
180
    {
181 20
        $f = $this->getFlavor();
182 20
        $eol = $f->lineTerminator;
183
        try {
184
            do {
185 20
                if (!isset($lines)) $lines = array();
186 20
                if (false === ($line = $this->source->readLine($eol))) {
187 7
                    throw new EndOfFileException("End of file reached: " . $this->source->getName());
188
                }
189 20
                array_push($lines, rtrim($line, $eol));
190 20
            } while ($this->inQuotedString(end($lines), $f->quoteChar, $f->escapeChar));
191 20
        } catch (EndOfFileException $e) {
192
            // only throw the exception if we don't already have lines in the buffer
193 7
            if (!count($lines)) throw $e;
194
        }
195 20
        return rtrim(implode($eol, $lines), $eol);
196
    }
197
198
    /**
199
     * Determine whether last line ended while a quoted string was still "open"
200
     *
201
     * This method is used in a loop to determine if each line being read ends
202
     * while a quoted string is still "open".
203
     *
204
     * @param string $line Line of csv to analyze
205
     * @param string $quoteChar The quote/enclosure character to use
206
     * @param string $escapeChar The escape char/sequence to use
207
     * @return bool True if currently within a quoted string
208
     */
209 20
    protected function inQuotedString($line, $quoteChar, $escapeChar)
210
    {
211 20
        if (!empty($line)) {
212
            do {
213 20
                if (!isset($i)) $i = 0;
214 20
                $c = $line[$i++];
215 20
                if ($this->escape) {
216
                    $this->escape = false;
217
                    continue;
218
                }
219 20
                $this->escape = ($c == $escapeChar);
220 20
                if ($c == $quoteChar) $this->open = !$this->open;
221 20
            } while ($i < strlen($line));
222 20
        }
223 20
        return $this->open;
224
    }
225
226
    /**
227
     * Flavor Getter.
228
     *
229
     * Retreive the "flavor" object being used by the reader
230
     *
231
     * @return \CSVelte\Flavor
232
     * @access public
233
     */
234 20
    public function getFlavor()
235
    {
236 20
        return $this->flavor;
237
    }
238
239
    /**
240
     * Check if flavor object defines header.
241
     *
242
     * Determine whether or not the input source's CSV data contains a header
243
     * row or not. Unless you explicitly specify so within your Flavor object,
244
     * this method is a logical best guess. The CSV format does not
245
     * provide metadata of any kind and therefor does not provide this info.
246
     *
247
     * @return boolean True if the input source has a header row (or, to be more )
248
     *     accurate, if the flavor SAYS it has a header row)
249
     * @todo Rather than always reading in Taster::SAMPLE_SIZE, read in ten lines at a time until
250
     *     whatever method it is has enough data to make a reliable decision/guess
251
     */
252 20
    public function hasHeader()
253
    {
254 20
        return $this->getFlavor()->header;
255
    }
256
257
    /**
258
     * Temporarily replace special characters within a quoted string
259
     *
260
     * Replace all instances of newlines and whatever character you specify (as
261
     * the delimiter) that are contained within quoted text. The replacements are
262
     * simply a special placeholder string. This is done so that I can use the
263
     * very unsmart "explode" function and not have to worry about it exploding
264
     * on delimiters or newlines within quotes. Once I have exploded, I typically
265
     * sub back in the real characters before doing anything else.
266
     *
267
     * @param string $data The string to do the replacements on
268
     * @param string $delim The delimiter character to replace
269
     * @param string $quo The quote character
270
     * @param string $eol Line terminator character/sequence
271
     * @return string The data with replacements performed
272
     * @access protected
273
     * @internal
274
     * @todo I could probably pass in (maybe optionally) the newline character I
275
     *     want to replace as well. I'll do that if I need to.
276
     * @todo Create a regex class so you can do $regex->escape() rather than
277
     *     preg_quote
278
     */
279 20
    protected function replaceQuotedSpecialChars($data, $delim, $quo, $eol)
280
    {
281
        return preg_replace_callback('/(['. preg_quote($quo, '/') . '])(.*)\1/imsU', function($matches) use ($delim, $eol) {
282 11
            $ret = str_replace($eol, self::PLACEHOLDER_NEWLINE, $matches[0]);
283 11
            $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret);
284 11
            return $ret;
285 20
        }, $data);
286
    }
287
288
    /**
289
     * Undo temporary special char replacements
290
     *
291
     * Replace the special character placeholders with the characters they
292
     * originally substituted.
293
     *
294
     * @param string $data The data to undo replacements in
295
     * @param string $delim The delimiter character
296
     * @param string $eol The character or string of characters used to terminate lines
297
     * @return string The data with placeholders replaced with original characters
298
     * @internal
299
     */
300 20
    protected function undoReplaceQuotedSpecialChars($data, $delim, $eol)
301
    {
302 20
        $replacements = array(self::PLACEHOLDER_DELIM => $delim, self::PLACEHOLDER_NEWLINE => $eol);
303
        if (array_walk($replacements, function($replacement, $placeholder) use (&$data) {
304 20
            $data = str_replace($placeholder, $replacement, $data);
305 20
        })) {
306 20
            return $data;
307
        }
308
    }
309
310
    /**
311
     * Remove quotes wrapping text.
312
     *
313
     * @param string $data The data to unquote
314
     * @return string The data with quotes stripped from the outside of it
315
     * @internal
316
     */
317 20
    protected function unQuote($data)
318
    {
319 20
        $escapeChar = $this->getFlavor()->doubleQuote ? $this->getFlavor()->quoteChar : $this->getFlavor()->escapeChar;
320 20
        $quoteChar = $this->getFlavor()->quoteChar;
321 20
        $data = $this->unEscape($data, $escapeChar, $quoteChar);
322 20
        return preg_replace('/^(["\'])(.*)\1$/ms', '\2', $data);
323
    }
324
325
    /**
326
     * @internal
327
     * @todo This actually shouldn't even be necessary. Characters should be read
328
     *     in one at a time and a quote that follows another should just be ignored
329
     *     deeming this unnecessary.
330
     */
331 20
    protected function unEscape($str, $esc, $quo)
332
    {
333 20
        return str_replace($esc . $quo, $quo, $str);
334
    }
335
336
    /**
337
     * Parse a line of CSV data into an array of columns
338
     *
339
     * @param string A line of CSV data to parse
340
     * @return array An array of columns
341
     * @access protected
342
     * @internal
343
     */
344 20
    protected function parse($line)
345
    {
346 20
        $f = $this->getFlavor();
347 20
        $replaced = $this->replaceQuotedSpecialChars($line, $f->delimiter, $f->quoteChar, $f->lineTerminator);
348 20
        $columns = explode($f->delimiter, $replaced);
349 20
        $that = $this;
350
        return array_map(function($val) use ($that, $f) {
351 20
            $undone = $that->undoReplaceQuotedSpecialChars($val, $f->delimiter, $f->lineTerminator);
352 20
            return $this->unQuote($undone);
353 20
        }, $columns);
354
    }
355
356 20
    public function current()
357
    {
358 20
        return $this->current;
359
    }
360
361 15
    public function next()
362
    {
363
364 15
        $this->current = null;
365 15
        $this->load();
366 15
        return $this->current;
367
    }
368
369 8
    public function valid()
370
    {
371 8
        return (bool) $this->current;
372
    }
373
374 5
    public function key()
375
    {
376 5
        return $this->line;
377
    }
378
379 20
    public function rewind()
380
    {
381 20
        $this->line = 0;
382 20
        $this->source->rewind();
383 20
        $this->current = null;
384 20
        $this->load();
385 20
        if ($this->hasHeader()) {
386 14
            $this->next();
387 14
        }
388 20
        return $this->current();
389
    }
390
391 2
    public function header()
392
    {
393 2
        return $this->header;
394
    }
395
396
    /**
397
     * @todo Closure should be changed to "Callable" (php5.4+)
398
     */
399 3
    public function addFilter(Closure $filter)
400
    {
401 3
        array_push($this->filters, $filter);
402 3
        return $this;
403
    }
404
405 1
    public function addFilters(array $filters)
406
    {
407 1
        foreach ($filters as $filter) {
408 1
            $this->addFilter($filter);
409 1
        }
410 1
        return $this;
411
    }
412
413 3
    public function filter()
414
    {
415 3
        return new FilteredReader($this, $this->filters);
416
    }
417
418
    public function toArray()
419
    {
420 1
        return array_map(function($row){
421 1
            return $row->toArray();
422 1
        }, iterator_to_array($this));
423
    }
424
425
}
426