Completed
Branch releases/v0.2 (d913c4)
by Luke
02:17
created

Reader::next()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 7
ccs 4
cts 4
cp 1
rs 9.4285
cc 1
eloc 4
nc 1
nop 0
crap 1
1
<?php
2
/**
3
 * CSVelte: Slender, elegant CSV for PHP
4
 *
5
 * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV
6
 * standardization efforts, CSVelte was written in an effort to take all the
7
 * suck out of working with CSV.
8
 *
9
 * @version   v0.2
10
 * @copyright Copyright (c) 2016 Luke Visinoni <[email protected]>
11
 * @author    Luke Visinoni <[email protected]>
12
 * @license   https://github.com/deni-zen/csvelte/blob/master/LICENSE The MIT License (MIT)
13
 */
14
namespace CSVelte;
15
16
use \Closure;
17
use \InvalidArgumentException;
18
use \FilterIterator;
19
use CSVelte\IO\Stream;
20
use CSVelte\Contract\Readable;
21
use CSVelte\Table\Row;
22
use CSVelte\Table\HeaderRow;
23
use CSVelte\Exception\EndOfFileException;
24
use CSVelte\Reader\FilteredIterator as FilteredReader;
25
26
/**
27
 * CSV Reader
28
 *
29
 * Reads CSV data from any object that implements CSVelte\Contract\Readable.
30
 *
31
 * @package CSVelte
32
 * @subpackage Reader
33
 * @since v0.1
34
 * @todo Also, is there any way to do some kind of caching or something? Probably
35
 *     not but if you could that would be a cool feature...
36
 */
37
class Reader implements \Iterator
38
{
39
    const PLACEHOLDER_DELIM   = '[=[__DLIM__]=]';
40
    const PLACEHOLDER_NEWLINE = '[=[__NWLN__]=]';
41
42
    /**
43
     * This class supports any sources of input that implements this interface.
44
     * This way I can read from local files, streams, FTP, any class that implements
45
     * the "Readable" interface
46
     * @var \CSVelte\Contract\Readable
47
     */
48
    protected $source;
49
50
    /**
51
     * @var \CSVelte\Flavor The "flavor" or format of the CSV being read
52
     */
53
    protected $flavor;
54
55
    /**
56
     * @var \CSVelte\Table\Row|boolean Row currently loaded into memory
57
     */
58
    protected $current;
59
60
    /**
61
     * @var integer The current line being read (from input source)
62
     */
63
    protected $line = 0;
64
65
    /**
66
     * @var \CSVelte\Table\HeaderRow The header row (if any)
67
     */
68
    protected $header;
69
70
    /**
71
     * @var array An array of callback functions
72
     */
73
    protected $filters = array();
74
75
    /**
76
     * @var bool True if current line ended while inside a quoted string
77
     */
78
    protected $open = false;
79
80
    /**
81
     * @var bool True if last character read was the escape character
82
     */
83
    protected $escape = false;
84
85
    /**
86
     * Reader Constructor.
87
     * Initializes a reader object using an input source and optionally a flavor
88
     *
89
     * @param \CSVelte\Contract\Readable $input The source of our CSV data
90
     * @param \CSVelte\Flavor $flavor The "flavor" or format specification object
91
     */
92 19
    public function __construct($input, $flavor = null)
93
    {
94 19
        $this->setSource($input)
95 19
             ->setFlavor($flavor)
96 19
             ->rewind();
97 19
    }
98
99
    /**
100
     * Set the flavor.
101
     *
102
     * Set the ``CSVelte\Flavor`` object, used to determine CSV format.
103
     *
104
     * @param \CSVelte\Flavor|array $flavor Either an array or a flavor object
105
     */
106 17
    protected function setFlavor($flavor = null)
107
    {
108 17
        if (is_array($flavor)) $flavor = new Flavor($flavor);
109 17
        $taster = new Taster($this->source);
110
        // @todo put this inside a try/catch
111 17
        if (is_null($flavor)) {
112 9
            $flavor = $taster->lick();
113 9
        }
114 17
        if (is_null($flavor->header)) {
115
            // Flavor is immutable, give me a new one with header set to lickHeader return val
116 5
            $flavor = $flavor->copy(['header' => $taster->lickHeader($flavor->quoteChar, $flavor->delimiter, $flavor->lineTerminator)]);
117 5
        }
118 17
        $this->flavor = $flavor;
119 17
        return $this;
120
    }
121
122
    /**
123
     * Set the reader source.
124
     *
125
     * The reader can accept anything that implements Readable and is actually
126
     * readable (can be read). This will make sure that whatever is passed to
127
     * the reader meets these expectations and set $this->source.
128
     *
129
     * @param \CSVelte\Contract\Readable|object|string $input See description
130
     * @return $this
131
     */
132 17
    protected function setSource($input)
133
    {
134 17
        if ($input instanceof Readable && $input->isReadable()) {
135 8
            $this->source = $input;
136 8
        } else {
137 9
            $this->source = Stream::streamize($input);
1 ignored issue
show
Bug introduced by
It seems like $input defined by parameter $input on line 132 can also be of type object; however, CSVelte\IO\Stream::streamize() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
138
        }
139 17
        return $this;
140
    }
141
142
    /**
143
     * Load a line into memory
144
     *
145
     * @return void ($this?)
146
     * @access protected
147
     */
148 17
    protected function load()
149
    {
150 17
        if (is_null($this->current)) {
151
            try {
152 17
                $line = $this->readLine();
153 17
                $this->line++;
154 17
                $parsed = $this->parse($line);
155 17
                if ($this->hasHeader() && $this->line === 1) {
156 13
                    $this->header = new HeaderRow($parsed);
157 13
                } else {
158 17
                    $this->current = new Row($parsed);
159 17
                    if ($this->header) $this->current->setHeaderRow($this->header);
160
                }
161 17
            } catch (EndOfFileException $e) {
162 5
                $this->current = false;
163
            }
164 17
        }
165 17
    }
166
167
    /**
168
     * Read single line from CSV data source (stream, file, etc.), taking into
169
     * account CSV's de-facto quoting rules with respect to designated line
170
     * terminator character when they fall within quoted strings.
171
     *
172
     * @return string A CSV row (could possibly span multiple lines depending on
173
     *     quoting and escaping)
174
     * @throws \CSVelte\Exception\EndOfFileException when eof has been reached
175
     *     and the read buffer has all been returned
176
     */
177 17
    protected function readLine()
178
    {
179 17
        $f = $this->getFlavor();
180 17
        $eol = $f->lineTerminator;
181
        try {
182
            do {
183 17
                if (!isset($lines)) $lines = array();
184 17
                if (false === ($line = $this->source->readLine($eol))) {
185 5
                    throw new EndOfFileException("End of file reached: " . $this->source->getName());
186
                }
187 17
                array_push($lines, rtrim($line, $eol));
188 17
            } while ($this->inQuotedString(end($lines), $f->quoteChar, $f->escapeChar));
189 17
        } catch (EndOfFileException $e) {
190
            // only throw the exception if we don't already have lines in the buffer
191 5
            if (!count($lines)) throw $e;
192
        }
193 17
        return rtrim(implode($eol, $lines), $eol);
194
    }
195
196
    /**
197
     * Determine whether last line ended while a quoted string was still "open"
198
     *
199
     * This method is used in a loop to determine if each line being read ends
200
     * while a quoted string is still "open".
201
     *
202
     * @param string $line Line of csv to analyze
203
     * @param string $quoteChar The quote/enclosure character to use
204
     * @param string $escapeChar The escape char/sequence to use
205
     * @return bool True if currently within a quoted string
206
     */
207 17
    protected function inQuotedString($line, $quoteChar, $escapeChar)
208
    {
209 17
        if (!empty($line)) {
210
            do {
211 17
                if (!isset($i)) $i = 0;
212 17
                $c = $line[$i++];
213 17
                if ($this->escape) {
214
                    $this->escape = false;
215
                    continue;
216
                }
217 17
                $this->escape = ($c == $escapeChar);
218 17
                if ($c == $quoteChar) $this->open = !$this->open;
219 17
            } while ($i < strlen($line));
220 17
        }
221 17
        return $this->open;
222
    }
223
224
    /**
225
     * Flavor Getter.
226
     *
227
     * Retreive the "flavor" object being used by the reader
228
     *
229
     * @return \CSVelte\Flavor
230
     * @access public
231
     */
232 17
    public function getFlavor()
233
    {
234 17
        return $this->flavor;
235
    }
236
237
    /**
238
     * Check if flavor object defines header.
239
     *
240
     * Determine whether or not the input source's CSV data contains a header
241
     * row or not. Unless you explicitly specify so within your Flavor object,
242
     * this method is a logical best guess. The CSV format does not
243
     * provide metadata of any kind and therefor does not provide this info.
244
     *
245
     * @return boolean True if the input source has a header row (or, to be more )
246
     *     accurate, if the flavor SAYS it has a header row)
247
     * @todo Rather than always reading in Taster::SAMPLE_SIZE, read in ten lines at a time until
248
     *     whatever method it is has enough data to make a reliable decision/guess
249
     */
250 17
    public function hasHeader()
251
    {
252 17
        return $this->getFlavor()->header;
253
    }
254
255
    /**
256
     * Temporarily replace special characters within a quoted string
257
     *
258
     * Replace all instances of newlines and whatever character you specify (as
259
     * the delimiter) that are contained within quoted text. The replacements are
260
     * simply a special placeholder string. This is done so that I can use the
261
     * very unsmart "explode" function and not have to worry about it exploding
262
     * on delimiters or newlines within quotes. Once I have exploded, I typically
263
     * sub back in the real characters before doing anything else.
264
     *
265
     * @param string $data The string to do the replacements on
266
     * @param string $delim The delimiter character to replace
267
     * @param string $quo The quote character
268
     * @param string $eol Line terminator character/sequence
269
     * @return string The data with replacements performed
270
     * @access protected
271
     * @internal
272
     * @todo I could probably pass in (maybe optionally) the newline character I
273
     *     want to replace as well. I'll do that if I need to.
274
     * @todo Create a regex class so you can do $regex->escape() rather than
275
     *     preg_quote
276
     */
277 17
    protected function replaceQuotedSpecialChars($data, $delim, $quo, $eol)
278
    {
279
        return preg_replace_callback('/(['. preg_quote($quo, '/') . '])(.*)\1/imsU', function($matches) use ($delim, $eol) {
280 10
            $ret = str_replace($eol, self::PLACEHOLDER_NEWLINE, $matches[0]);
281 10
            $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret);
282 10
            return $ret;
283 17
        }, $data);
284
    }
285
286
    /**
287
     * Undo temporary special char replacements
288
     *
289
     * Replace the special character placeholders with the characters they
290
     * originally substituted.
291
     *
292
     * @param string $data The data to undo replacements in
293
     * @param string $delim The delimiter character
294
     * @param string $eol The character or string of characters used to terminate lines
295
     * @return string The data with placeholders replaced with original characters
296
     * @internal
297
     */
298 17
    protected function undoReplaceQuotedSpecialChars($data, $delim, $eol)
299
    {
300 17
        $replacements = array(self::PLACEHOLDER_DELIM => $delim, self::PLACEHOLDER_NEWLINE => $eol);
301
        if (array_walk($replacements, function($replacement, $placeholder) use (&$data) {
302 17
            $data = str_replace($placeholder, $replacement, $data);
303 17
        })) {
304 17
            return $data;
305
        }
306
    }
307
308
    /**
309
     * Remove quotes wrapping text.
310
     *
311
     * @param string $data The data to unquote
312
     * @return string The data with quotes stripped from the outside of it
313
     * @internal
314
     */
315 17
    protected function unQuote($data)
316
    {
317 17
        $escapeChar = $this->getFlavor()->doubleQuote ? $this->getFlavor()->quoteChar : $this->getFlavor()->escapeChar;
318 17
        $quoteChar = $this->getFlavor()->quoteChar;
319 17
        $data = $this->unEscape($data, $escapeChar, $quoteChar);
320 17
        return preg_replace('/^(["\'])(.*)\1$/ms', '\2', $data);
321
    }
322
323
    /**
324
     * @internal
325
     * @todo This actually shouldn't even be necessary. Characters should be read
326
     *     in one at a time and a quote that follows another should just be ignored
327
     *     deeming this unnecessary.
328
     */
329 17
    protected function unEscape($str, $esc, $quo)
330
    {
331 17
        return str_replace($esc . $quo, $quo, $str);
332
    }
333
334
    /**
335
     * Parse a line of CSV data into an array of columns
336
     *
337
     * @param string A line of CSV data to parse
338
     * @return array An array of columns
339
     * @access protected
340
     * @internal
341
     */
342 17
    protected function parse($line)
343
    {
344 17
        $f = $this->getFlavor();
345 17
        $replaced = $this->replaceQuotedSpecialChars($line, $f->delimiter, $f->quoteChar, $f->lineTerminator);
346 17
        $columns = explode($f->delimiter, $replaced);
347 17
        $that = $this;
348 17
        return array_map(function($val) use ($that, $f) {
349 17
            $undone = $that->undoReplaceQuotedSpecialChars($val, $f->delimiter, $f->lineTerminator);
350 17
            return $this->unQuote($undone);
351 17
        }, $columns);
352
    }
353
354 17
    public function current()
355
    {
356 17
        return $this->current;
357
    }
358
359 13
    public function next()
360
    {
361
362 13
        $this->current = null;
363 13
        $this->load();
364 13
        return $this->current;
365
    }
366
367 6
    public function valid()
368
    {
369 6
        return (bool) $this->current;
370
    }
371
372 3
    public function key()
373
    {
374 3
        return $this->line;
375
    }
376
377 17
    public function rewind()
378
    {
379 17
        $this->line = 0;
380 17
        $this->source->rewind();
381 17
        $this->current = null;
382 17
        $this->load();
383 17
        if ($this->hasHeader()) {
384 13
            $this->next();
385 13
        }
386 17
        return $this->current();
387
    }
388
389 2
    public function header()
390
    {
391 2
        return $this->header;
392
    }
393
394
    /**
395
     * @todo Closure should be changed to "Callable" (php5.4+)
396
     */
397 2
    public function addFilter(Closure $filter)
398
    {
399 2
        array_push($this->filters, $filter);
400 2
        return $this;
401
    }
402
403 1
    public function addFilters(array $filters)
404
    {
405 1
        foreach ($filters as $filter) {
406 1
            $this->addFilter($filter);
407 1
        }
408 1
        return $this;
409
    }
410
411 2
    public function filter()
412
    {
413 2
        return new FilteredReader($this, $this->filters);
414
    }
415
416
}
417