1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace frictionlessdata\tableschema\DataSources; |
4
|
|
|
|
5
|
|
|
use frictionlessdata\tableschema\Exceptions\DataSourceException; |
6
|
|
|
use frictionlessdata\tableschema\CsvDialect; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* handles reading data from a csv source |
10
|
|
|
* responsible for finding the header row based on options |
11
|
|
|
* support skipping rows from the csv. |
12
|
|
|
*/ |
13
|
|
|
class CsvDataSource extends BaseDataSource |
14
|
|
|
{ |
15
|
|
|
/** @var CsvDialect */ |
16
|
|
|
public $csvDialect; |
17
|
|
|
|
18
|
|
|
public function setCsvDialect($csvDialect) |
19
|
|
|
{ |
20
|
|
|
$this->csvDialect = $csvDialect; |
21
|
|
|
} |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @throws DataSourceException |
25
|
|
|
*/ |
26
|
|
|
public function open() |
27
|
|
|
{ |
28
|
|
|
$this->curRowNum = 0; |
29
|
|
|
if (!$this->csvDialect) { |
30
|
|
|
throw new \Exception("must set csv dialect"); |
31
|
|
|
} |
32
|
|
|
try { |
33
|
|
|
$this->resource = fopen($this->dataSource, 'r'); |
34
|
|
|
} catch (\Exception $e) { |
35
|
|
|
throw new DataSourceException($e->getMessage()); |
36
|
|
|
} |
37
|
|
|
$this->headerRow = $this->getOption('headerRow'); |
38
|
|
|
if ($this->headerRow) { |
39
|
|
|
// specifically set header row - will not skip any rows |
40
|
|
|
$headerRowNum = 0; |
41
|
|
|
$defaultSkipRows = 0; |
42
|
|
|
} else { |
43
|
|
|
// skip rows according to headerRowNum which is 1 by default |
44
|
|
|
$defaultSkipRows = $headerRowNum = $this->getOption('headerRowNum', 1); |
45
|
|
|
} |
46
|
|
|
/* |
47
|
|
|
* RFC4180: |
48
|
|
|
* - The last record in the file may or may not have an ending line break. |
49
|
|
|
* - Each line should contain the same number of fields throughout the file. |
50
|
|
|
* |
51
|
|
|
* Tabular Data requirements |
52
|
|
|
* - File encoding must be either UTF-8 (the default) or include encoding property |
53
|
|
|
* - If the CSV differs from this or the RFC in any other way regarding dialect |
54
|
|
|
* (e.g. line terminators, quote charactors, field delimiters), |
55
|
|
|
* the Tabular Data Resource MUST contain a dialect property describing its dialect. |
56
|
|
|
* The dialect property MUST follow the CSV Dialect specification. |
57
|
|
|
*/ |
58
|
|
|
$skipRows = $this->getOption('skipRows', $defaultSkipRows); |
59
|
|
|
if ($skipRows > 0) { |
60
|
|
|
// either specifically set skipRows, or as required for the header row |
61
|
|
|
foreach (range(1, $skipRows) as $i) { |
62
|
|
|
$row = $this->getRow(); |
63
|
|
|
$this->skippedRows[] = $row; |
64
|
|
|
if ($i == $headerRowNum) { |
65
|
|
|
$this->headerRow = $row; |
66
|
|
|
} |
67
|
|
|
} |
68
|
|
|
} |
69
|
|
|
if (!$this->headerRow || $this->headerRow == [""]) { |
70
|
|
|
throw new DataSourceException('Failed to get header row'); |
71
|
|
|
} |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* @return array |
76
|
|
|
*/ |
77
|
|
|
public function getSkippedRows() |
78
|
|
|
{ |
79
|
|
|
return $this->skippedRows; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* @return array |
84
|
|
|
* |
85
|
|
|
* @throws DataSourceException |
86
|
|
|
*/ |
87
|
|
|
public function getNextLine() |
88
|
|
|
{ |
89
|
|
|
$row = $this->nextRow; |
90
|
|
|
$this->nextRow = null; |
91
|
|
|
$colNum = 0; |
92
|
|
|
$obj = []; |
93
|
|
|
foreach ($this->headerRow as $fieldName) { |
94
|
|
|
$obj[$fieldName] = $row[$colNum++]; |
95
|
|
|
} |
96
|
|
|
return $obj; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* @return bool |
101
|
|
|
* |
102
|
|
|
* @throws DataSourceException |
103
|
|
|
*/ |
104
|
|
|
public function isEof() |
105
|
|
|
{ |
106
|
|
|
if ($this->nextRow) { |
107
|
|
|
return false; |
108
|
|
|
} else { |
109
|
|
|
try { |
110
|
|
|
$eof = feof($this->resource); |
111
|
|
|
} catch (\Exception $e) { |
112
|
|
|
throw new DataSourceException($e->getMessage(), $this->curRowNum); |
113
|
|
|
} |
114
|
|
|
if ($eof) { |
115
|
|
|
return true; |
116
|
|
|
} else { |
117
|
|
|
$this->nextRow = $this->getRow(); |
118
|
|
|
if (!$this->nextRow || $this->nextRow === [""]) { |
|
|
|
|
119
|
|
|
try { |
120
|
|
|
$eof = feof($this->resource); |
121
|
|
|
} catch (\Exception $e) { |
122
|
|
|
throw new DataSourceException($e->getMessage(), $this->curRowNum); |
123
|
|
|
} |
124
|
|
|
if ($eof) { |
125
|
|
|
// RFC4180: The last record in the file may or may not have an ending line break. |
126
|
|
|
return true; |
127
|
|
|
} else { |
128
|
|
|
throw new DataSourceException("invalid csv file", $this->curRowNum); |
129
|
|
|
} |
130
|
|
|
} else { |
131
|
|
|
return false; |
132
|
|
|
} |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
/** |
138
|
|
|
* @throws DataSourceException |
139
|
|
|
*/ |
140
|
|
|
public function close() |
141
|
|
|
{ |
142
|
|
|
try { |
143
|
|
|
fclose($this->resource); |
144
|
|
|
} catch (\Exception $e) { |
145
|
|
|
throw new DataSourceException($e->getMessage(), $this->curRowNum); |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
public function save($outputDataSource) |
150
|
|
|
{ |
151
|
|
|
$file = fopen($outputDataSource, 'w'); |
152
|
|
|
fputcsv($file, $this->headerRow); |
153
|
|
|
while (!$this->isEof()) { |
154
|
|
|
fputcsv($file, array_values($this->getNextLine())); |
155
|
|
|
} |
156
|
|
|
fclose($file); |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
protected $resource; |
160
|
|
|
protected $headerRow; |
161
|
|
|
protected $skippedRows; |
162
|
|
|
protected $curRowNum; |
163
|
|
|
protected $nextRow; |
164
|
|
|
|
165
|
|
|
/** |
166
|
|
|
* @return array |
167
|
|
|
* |
168
|
|
|
* @throws DataSourceException |
169
|
|
|
*/ |
170
|
|
|
protected function getRow() |
171
|
|
|
{ |
172
|
|
|
++$this->curRowNum; |
173
|
|
|
try { |
174
|
|
|
$line = fgets($this->resource); |
175
|
|
|
} catch (\Exception $e) { |
176
|
|
|
throw new DataSourceException($e->getMessage(), $this->curRowNum); |
177
|
|
|
} |
178
|
|
|
return $this->csvDialect->parseRow($line); |
179
|
|
|
} |
180
|
|
|
} |
181
|
|
|
|
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.