Completed
Push — master ( b582f9...2d195d )
by Ori
01:37
created

CsvDataSource::isEof()   C

Complexity

Conditions 8
Paths 7

Size

Total Lines 32
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 23
nc 7
nop 0
dl 0
loc 32
rs 5.3846
c 0
b 0
f 0
1
<?php
2
3
namespace frictionlessdata\tableschema\DataSources;
4
5
use frictionlessdata\tableschema\Exceptions\DataSourceException;
6
use frictionlessdata\tableschema\CsvDialect;
7
8
/**
9
 * handles reading data from a csv source
10
 * responsible for finding the header row based on options
11
 * support skipping rows from the csv.
12
 */
13
class CsvDataSource extends BaseDataSource
14
{
15
    /** @var  CsvDialect */
16
    public $csvDialect;
17
18
    public function setCsvDialect($csvDialect)
19
    {
20
        $this->csvDialect = $csvDialect;
21
    }
22
23
    /**
24
     * @throws DataSourceException
25
     */
26
    public function open()
27
    {
28
        $this->curRowNum = 0;
29
        if (!$this->csvDialect) {
30
            throw new \Exception("must set csv dialect");
31
        }
32
        try {
33
            $this->resource = fopen($this->dataSource, 'r');
34
        } catch (\Exception $e) {
35
            throw new DataSourceException($e->getMessage());
36
        }
37
        $this->headerRow = $this->getOption('headerRow');
38
        if ($this->headerRow) {
39
            // specifically set header row - will not skip any rows
40
            $headerRowNum = 0;
41
            $defaultSkipRows = 0;
42
        } else {
43
            // skip rows according to headerRowNum which is 1 by default
44
            $defaultSkipRows = $headerRowNum = $this->getOption('headerRowNum', 1);
45
        }
46
        /*
47
         * RFC4180:
48
         * - The last record in the file may or may not have an ending line break.
49
         * - Each line should contain the same number of fields throughout the file.
50
         *
51
         * Tabular Data requirements
52
         * - File encoding must be either UTF-8 (the default) or include encoding property
53
         * - If the CSV differs from this or the RFC in any other way regarding dialect
54
         *   (e.g. line terminators, quote charactors, field delimiters),
55
         *   the Tabular Data Resource MUST contain a dialect property describing its dialect.
56
         *   The dialect property MUST follow the CSV Dialect specification.
57
         */
58
        $skipRows = $this->getOption('skipRows', $defaultSkipRows);
59
        if ($skipRows > 0) {
60
            // either specifically set skipRows, or as required for the header row
61
            foreach (range(1, $skipRows) as $i) {
62
                $row = $this->getRow();
63
                $this->skippedRows[] = $row;
64
                if ($i == $headerRowNum) {
65
                    $this->headerRow = $row;
66
                }
67
            }
68
        }
69
        if (!$this->headerRow || $this->headerRow == [""]) {
70
            throw new DataSourceException('Failed to get header row');
71
        }
72
    }
73
74
    /**
75
     * @return array
76
     */
77
    public function getSkippedRows()
78
    {
79
        return $this->skippedRows;
80
    }
81
82
    /**
83
     * @return array
84
     *
85
     * @throws DataSourceException
86
     */
87
    public function getNextLine()
88
    {
89
        $row = $this->nextRow;
90
        $this->nextRow = null;
91
        $colNum = 0;
92
        $obj = [];
93
        foreach ($this->headerRow as $fieldName) {
94
            $obj[$fieldName] = $row[$colNum++];
95
        }
96
        return $obj;
97
    }
98
99
    /**
100
     * @return bool
101
     *
102
     * @throws DataSourceException
103
     */
104
    public function isEof()
105
    {
106
        if ($this->nextRow) {
107
            return false;
108
        } else {
109
            try {
110
                $eof = feof($this->resource);
111
            } catch (\Exception $e) {
112
                throw new DataSourceException($e->getMessage(), $this->curRowNum);
113
            }
114
            if ($eof) {
115
                return true;
116
            } else {
117
                $this->nextRow = $this->getRow();
118
                if (!$this->nextRow || $this->nextRow === [""]) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->nextRow of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
119
                    try {
120
                        $eof = feof($this->resource);
121
                    } catch (\Exception $e) {
122
                        throw new DataSourceException($e->getMessage(), $this->curRowNum);
123
                    }
124
                    if ($eof) {
125
                        // RFC4180: The last record in the file may or may not have an ending line break.
126
                        return true;
127
                    } else {
128
                        throw new DataSourceException("invalid csv file", $this->curRowNum);
129
                    }
130
                } else {
131
                    return false;
132
                }
133
            }
134
        }
135
    }
136
137
    /**
138
     * @throws DataSourceException
139
     */
140
    public function close()
141
    {
142
        try {
143
            fclose($this->resource);
144
        } catch (\Exception $e) {
145
            throw new DataSourceException($e->getMessage(), $this->curRowNum);
146
        }
147
    }
148
149
    public function save($outputDataSource)
150
    {
151
        $file = fopen($outputDataSource, 'w');
152
        fputcsv($file, $this->headerRow);
153
        while (!$this->isEof()) {
154
            fputcsv($file, array_values($this->getNextLine()));
155
        }
156
        fclose($file);
157
    }
158
159
    protected $resource;
160
    protected $headerRow;
161
    protected $skippedRows;
162
    protected $curRowNum;
163
    protected $nextRow;
164
165
    /**
166
     * @return array
167
     *
168
     * @throws DataSourceException
169
     */
170
    protected function getRow()
171
    {
172
        ++$this->curRowNum;
173
        try {
174
            $line = fgets($this->resource);
175
        } catch (\Exception $e) {
176
            throw new DataSourceException($e->getMessage(), $this->curRowNum);
177
        }
178
        return $this->csvDialect->parseRow($line);
179
    }
180
}
181