Completed
Push — releases/v0.2 ( 608f49...99789c )
by Luke
05:36
created

Reader::toArray()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 0
crap 1
1
<?php
2
/**
3
 * CSVelte: Slender, elegant CSV for PHP
4
 *
5
 * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV
6
 * standardization efforts, CSVelte was written in an effort to take all the
7
 * suck out of working with CSV.
8
 *
9
 * @version   v0.2
10
 * @copyright Copyright (c) 2016 Luke Visinoni <[email protected]>
11
 * @author    Luke Visinoni <[email protected]>
12
 * @license   https://github.com/deni-zen/csvelte/blob/master/LICENSE The MIT License (MIT)
13
 */
14
namespace CSVelte;
15
16
use \Closure;
17
use \InvalidArgumentException;
18
use \FilterIterator;
19
use CSVelte\IO\Stream;
20
use CSVelte\Contract\Readable;
21
use CSVelte\Table\Row;
22
use CSVelte\Table\HeaderRow;
23
use CSVelte\Exception\EndOfFileException;
24
use CSVelte\Reader\FilteredIterator as FilteredReader;
25
26
/**
27
 * CSV Reader
28
 *
29
 * Reads CSV data from any object that implements CSVelte\Contract\Readable.
30
 *
31
 * @package CSVelte
32
 * @subpackage Reader
33
 * @since v0.1
34
 * @todo Also, is there any way to do some kind of caching or something? Probably
35
 *     not but if you could that would be a cool feature...
36
 */
37
class Reader implements \Iterator
38
{
39
    const PLACEHOLDER_DELIM   = '[=[__DLIM__]=]';
40
    const PLACEHOLDER_NEWLINE = '[=[__NWLN__]=]';
41
42
    /**
43
     * This class supports any sources of input that implements this interface.
44
     * This way I can read from local files, streams, FTP, any class that implements
45
     * the "Readable" interface
46
     * @var \CSVelte\Contract\Readable
47
     */
48
    protected $source;
49
50
    /**
51
     * @var \CSVelte\Flavor The "flavor" or format of the CSV being read
52
     */
53
    protected $flavor;
54
55
    /**
56
     * @var \CSVelte\Table\Row|boolean Row currently loaded into memory
57
     */
58
    protected $current;
59
60
    /**
61
     * @var integer The current line being read (from input source)
62
     */
63
    protected $line = 0;
64
65
    /**
66
     * @var \CSVelte\Table\HeaderRow The header row (if any)
67
     */
68
    protected $header;
69
70
    /**
71
     * @var array An array of callback functions
72
     */
73
    protected $filters = array();
74
75
    /**
76
     * @var bool True if current line ended while inside a quoted string
77
     */
78
    protected $open = false;
79
80
    /**
81
     * @var bool True if last character read was the escape character
82
     */
83
    protected $escape = false;
84
85
    /**
86
     * Reader Constructor.
87
     * Initializes a reader object using an input source and optionally a flavor
88
     *
89
     * @param \CSVelte\Contract\Readable $input The source of our CSV data
90
     * @param \CSVelte\Flavor $flavor The "flavor" or format specification object
91
     */
92 22
    public function __construct($input, $flavor = null)
93
    {
94 22
        $this->setSource($input)
95 22
             ->setFlavor($flavor)
96 22
             ->rewind();
97 22
    }
98
99
    /**
100
     * Set the flavor.
101
     *
102
     * Set the ``CSVelte\Flavor`` object, used to determine CSV format.
103
     *
104
     * @param \CSVelte\Flavor|array $flavor Either an array or a flavor object
105
     */
106 20
    protected function setFlavor($flavor = null)
107
    {
108 20
        if (is_array($flavor)) $flavor = new Flavor($flavor);
109 20
        $taster = new Taster($this->source);
110
        // @todo put this inside a try/catch
111 20
        if (is_null($flavor)) {
112 12
            $flavor = $taster->lick();
113 12
        }
114 20
        if (is_null($flavor->header)) {
115
            // Flavor is immutable, give me a new one with header set to lickHeader return val
116 4
            $flavor = $flavor->copy(['header' => $taster->lickHeader($flavor->delimiter, $flavor->lineTerminator)]);
117 4
        }
118 20
        $this->flavor = $flavor;
119 20
        return $this;
120
    }
121
122
    /**
123
     * Set the reader source.
124
     *
125
     * The reader can accept anything that implements Readable and is actually
126
     * readable (can be read). This will make sure that whatever is passed to
127
     * the reader meets these expectations and set $this->source.
128
     *
129
     * @param \CSVelte\Contract\Readable|object|string $input See description
130
     * @return $this
131
     */
132 20
    protected function setSource($input)
133
    {
134 20
        if ($input instanceof Readable && $input->isReadable()) {
135 9
            $this->source = $input;
136 9
        } else {
137 11
            $this->source = Stream::streamize($input);
138
        }
139 20
        return $this;
140
    }
141
142
    /**
143
     * Load a line into memory
144
     *
145
     * @return void ($this?)
146
     * @access protected
147
     */
148 20
    protected function load()
149
    {
150 20
        if (is_null($this->current)) {
151
            try {
152 20
                $line = $this->readLine();
153 20
                $this->line++;
154 20
                $parsed = $this->parse($line);
155 20
                if ($this->hasHeader() && $this->line === 1) {
156 14
                    $this->header = new HeaderRow($parsed);
157 14
                } else {
158 20
                    $this->current = new Row($parsed);
159 20
                    if ($this->header) $this->current->setHeaderRow($this->header);
160
                }
161 20
            } catch (EndOfFileException $e) {
162 7
                $this->current = false;
163
            }
164 20
        }
165 20
    }
166
167
    /**
168
     * Read single line from CSV data source (stream, file, etc.), taking into
169
     * account CSV's de-facto quoting rules with respect to designated line
170
     * terminator character when they fall within quoted strings.
171
     *
172
     * @return string A CSV row (could possibly span multiple lines depending on
173
     *     quoting and escaping)
174
     * @throws \CSVelte\Exception\EndOfFileException when eof has been reached
175
     *     and the read buffer has all been returned
176
     */
177 20
    protected function readLine()
178
    {
179 20
        $f = $this->getFlavor();
180 20
        $eol = $f->lineTerminator;
181
        try {
182
            do {
183 20
                if (!isset($lines)) $lines = array();
184 20
                if (false === ($line = $this->source->readLine($eol))) {
185 7
                    throw new EndOfFileException("End of file reached: " . $this->source->getName());
186
                }
187 20
                array_push($lines, rtrim($line, $eol));
188 20
            } while ($this->inQuotedString(end($lines), $f->quoteChar, $f->escapeChar));
189 20
        } catch (EndOfFileException $e) {
190
            // only throw the exception if we don't already have lines in the buffer
191 7
            if (!count($lines)) throw $e;
192
        }
193 20
        return rtrim(implode($eol, $lines), $eol);
194
    }
195
196
    /**
197
     * Determine whether last line ended while a quoted string was still "open"
198
     *
199
     * This method is used in a loop to determine if each line being read ends
200
     * while a quoted string is still "open".
201
     *
202
     * @param string $line Line of csv to analyze
203
     * @param string $quoteChar The quote/enclosure character to use
204
     * @param string $escapeChar The escape char/sequence to use
205
     * @return bool True if currently within a quoted string
206
     */
207 20
    protected function inQuotedString($line, $quoteChar, $escapeChar)
208
    {
209 20
        if (!empty($line)) {
210
            do {
211 20
                if (!isset($i)) $i = 0;
212 20
                $c = $line[$i++];
213 20
                if ($this->escape) {
214
                    $this->escape = false;
215
                    continue;
216
                }
217 20
                $this->escape = ($c == $escapeChar);
218 20
                if ($c == $quoteChar) $this->open = !$this->open;
219 20
            } while ($i < strlen($line));
220 20
        }
221 20
        return $this->open;
222
    }
223
224
    /**
225
     * Flavor Getter.
226
     *
227
     * Retreive the "flavor" object being used by the reader
228
     *
229
     * @return \CSVelte\Flavor
230
     * @access public
231
     */
232 20
    public function getFlavor()
233
    {
234 20
        return $this->flavor;
235
    }
236
237
    /**
238
     * Check if flavor object defines header.
239
     *
240
     * Determine whether or not the input source's CSV data contains a header
241
     * row or not. Unless you explicitly specify so within your Flavor object,
242
     * this method is a logical best guess. The CSV format does not
243
     * provide metadata of any kind and therefor does not provide this info.
244
     *
245
     * @return boolean True if the input source has a header row (or, to be more )
246
     *     accurate, if the flavor SAYS it has a header row)
247
     * @todo Rather than always reading in Taster::SAMPLE_SIZE, read in ten lines at a time until
248
     *     whatever method it is has enough data to make a reliable decision/guess
249
     */
250 20
    public function hasHeader()
251
    {
252 20
        return $this->getFlavor()->header;
253
    }
254
255
    /**
256
     * Temporarily replace special characters within a quoted string
257
     *
258
     * Replace all instances of newlines and whatever character you specify (as
259
     * the delimiter) that are contained within quoted text. The replacements are
260
     * simply a special placeholder string. This is done so that I can use the
261
     * very unsmart "explode" function and not have to worry about it exploding
262
     * on delimiters or newlines within quotes. Once I have exploded, I typically
263
     * sub back in the real characters before doing anything else.
264
     *
265
     * @param string $data The string to do the replacements on
266
     * @param string $delim The delimiter character to replace
267
     * @param string $quo The quote character
268
     * @param string $eol Line terminator character/sequence
269
     * @return string The data with replacements performed
270
     * @access protected
271
     * @internal
272
     * @todo I could probably pass in (maybe optionally) the newline character I
273
     *     want to replace as well. I'll do that if I need to.
274
     * @todo Create a regex class so you can do $regex->escape() rather than
275
     *     preg_quote
276
     */
277 20
    protected function replaceQuotedSpecialChars($data, $delim, $quo, $eol)
278
    {
279
        return preg_replace_callback('/(['. preg_quote($quo, '/') . '])(.*)\1/imsU', function($matches) use ($delim, $eol) {
280 11
            $ret = str_replace($eol, self::PLACEHOLDER_NEWLINE, $matches[0]);
281 11
            $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret);
282 11
            return $ret;
283 20
        }, $data);
284
    }
285
286
    /**
287
     * Undo temporary special char replacements
288
     *
289
     * Replace the special character placeholders with the characters they
290
     * originally substituted.
291
     *
292
     * @param string $data The data to undo replacements in
293
     * @param string $delim The delimiter character
294
     * @param string $eol The character or string of characters used to terminate lines
295
     * @return string The data with placeholders replaced with original characters
296
     * @internal
297
     */
298 20
    protected function undoReplaceQuotedSpecialChars($data, $delim, $eol)
299
    {
300 20
        $replacements = array(self::PLACEHOLDER_DELIM => $delim, self::PLACEHOLDER_NEWLINE => $eol);
301
        if (array_walk($replacements, function($replacement, $placeholder) use (&$data) {
302 20
            $data = str_replace($placeholder, $replacement, $data);
303 20
        })) {
304 20
            return $data;
305
        }
306
    }
307
308
    /**
309
     * Remove quotes wrapping text.
310
     *
311
     * @param string $data The data to unquote
312
     * @return string The data with quotes stripped from the outside of it
313
     * @internal
314
     */
315 20
    protected function unQuote($data)
316
    {
317 20
        $escapeChar = $this->getFlavor()->doubleQuote ? $this->getFlavor()->quoteChar : $this->getFlavor()->escapeChar;
318 20
        $quoteChar = $this->getFlavor()->quoteChar;
319 20
        $data = $this->unEscape($data, $escapeChar, $quoteChar);
320 20
        return preg_replace('/^(["\'])(.*)\1$/ms', '\2', $data);
321
    }
322
323
    /**
324
     * @internal
325
     * @todo This actually shouldn't even be necessary. Characters should be read
326
     *     in one at a time and a quote that follows another should just be ignored
327
     *     deeming this unnecessary.
328
     */
329 20
    protected function unEscape($str, $esc, $quo)
330
    {
331 20
        return str_replace($esc . $quo, $quo, $str);
332
    }
333
334
    /**
335
     * Parse a line of CSV data into an array of columns
336
     *
337
     * @param string A line of CSV data to parse
338
     * @return array An array of columns
339
     * @access protected
340
     * @internal
341
     */
342 20
    protected function parse($line)
343
    {
344 20
        $f = $this->getFlavor();
345 20
        $replaced = $this->replaceQuotedSpecialChars($line, $f->delimiter, $f->quoteChar, $f->lineTerminator);
346 20
        $columns = explode($f->delimiter, $replaced);
347 20
        $that = $this;
348
        return array_map(function($val) use ($that, $f) {
349 20
            $undone = $that->undoReplaceQuotedSpecialChars($val, $f->delimiter, $f->lineTerminator);
350 20
            return $this->unQuote($undone);
351 20
        }, $columns);
352
    }
353
354 20
    public function current()
355
    {
356 20
        return $this->current;
357
    }
358
359 15
    public function next()
360
    {
361
362 15
        $this->current = null;
363 15
        $this->load();
364 15
        return $this->current;
365
    }
366
367 8
    public function valid()
368
    {
369 8
        return (bool) $this->current;
370
    }
371
372 5
    public function key()
373
    {
374 5
        return $this->line;
375
    }
376
377 20
    public function rewind()
378
    {
379 20
        $this->line = 0;
380 20
        $this->source->rewind();
381 20
        $this->current = null;
382 20
        $this->load();
383 20
        if ($this->hasHeader()) {
384 14
            $this->next();
385 14
        }
386 20
        return $this->current();
387
    }
388
389 2
    public function header()
390
    {
391 2
        return $this->header;
392
    }
393
394
    /**
395
     * @todo Closure should be changed to "Callable" (php5.4+)
396
     */
397 3
    public function addFilter(Closure $filter)
398
    {
399 3
        array_push($this->filters, $filter);
400 3
        return $this;
401
    }
402
403 1
    public function addFilters(array $filters)
404
    {
405 1
        foreach ($filters as $filter) {
406 1
            $this->addFilter($filter);
407 1
        }
408 1
        return $this;
409
    }
410
411 3
    public function filter()
412
    {
413 3
        return new FilteredReader($this, $this->filters);
414
    }
415
416
    public function toArray()
417
    {
418 1
        return array_map(function($row){
419 1
            return $row->toArray();
420 1
        }, iterator_to_array($this));
421
    }
422
423
}
424