1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace frictionlessdata\tableschema; |
4
|
|
|
|
5
|
|
|
use frictionlessdata\tableschema\DataSources\CsvDataSource; |
6
|
|
|
use frictionlessdata\tableschema\Exceptions\DataSourceException; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* represents a data source which validates against a table schema |
10
|
|
|
* provides interfaces for validating the data and iterating over it |
11
|
|
|
* casts all values to their native values according to the table schema. |
12
|
|
|
*/ |
13
|
|
|
class Table implements \Iterator |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @param DataSources\DataSourceInterface $dataSource |
17
|
|
|
* @param Schema $schema |
18
|
|
|
* |
19
|
|
|
* @throws Exceptions\DataSourceException |
20
|
|
|
*/ |
21
|
|
|
public function __construct($dataSource, $schema=null) |
22
|
|
|
{ |
23
|
|
|
if (!is_a($dataSource, "frictionlessdata\\tableschema\\DataSources\\BaseDataSource")) { |
24
|
|
|
// TODO: more advanced data source detection |
25
|
|
|
$dataSource = new CsvDataSource($dataSource); |
26
|
|
|
} |
27
|
|
|
$this->dataSource = $dataSource; |
28
|
|
|
if (!is_a($schema, "frictionlessdata\\tableschema\\Schema")) { |
29
|
|
|
if ($schema) { |
30
|
|
|
$schema = new Schema($schema); |
31
|
|
|
} else { |
32
|
|
|
$schema = new InferSchema(); |
33
|
|
|
} |
34
|
|
|
} |
35
|
|
|
$this->schema = $schema; |
36
|
|
|
$this->dataSource->open(); |
37
|
|
|
$this->uniqueFieldValues = []; |
38
|
|
|
} |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @param DataSources\DataSourceInterface $dataSource |
42
|
|
|
* @param Schema $schema |
43
|
|
|
* @param int $numPeekRows |
44
|
|
|
* |
45
|
|
|
* @return array of validation errors |
46
|
|
|
*/ |
47
|
|
|
public static function validate($dataSource, $schema, $numPeekRows = 10) |
48
|
|
|
{ |
49
|
|
|
try { |
50
|
|
|
$table = new static($dataSource, $schema); |
51
|
|
|
} catch (Exceptions\DataSourceException $e) { |
52
|
|
|
return [new SchemaValidationError(SchemaValidationError::LOAD_FAILED, $e->getMessage())]; |
53
|
|
|
} |
54
|
|
|
if ($numPeekRows > 0) { |
55
|
|
|
$i = 0; |
56
|
|
|
try { |
57
|
|
|
foreach ($table as $row) { |
58
|
|
|
if (++$i > $numPeekRows) { |
59
|
|
|
break; |
60
|
|
|
} |
61
|
|
|
} |
62
|
|
|
} catch (Exceptions\DataSourceException $e) { |
|
|
|
|
63
|
|
|
// general error in getting the next row from the data source |
64
|
|
|
return [new SchemaValidationError(SchemaValidationError::ROW_VALIDATION, [ |
65
|
|
|
'row' => $i, |
66
|
|
|
'error' => $e->getMessage(), |
67
|
|
|
])]; |
68
|
|
|
} catch (Exceptions\FieldValidationException $e) { |
69
|
|
|
// validation error in one of the fields |
70
|
|
|
return array_map(function ($validationError) use ($i) { |
71
|
|
|
return new SchemaValidationError(SchemaValidationError::ROW_FIELD_VALIDATION, [ |
72
|
|
|
'row' => $i + 1, |
73
|
|
|
'field' => $validationError->extraDetails['field'], |
74
|
|
|
'error' => $validationError->extraDetails['error'], |
75
|
|
|
'value' => $validationError->extraDetails['value'], |
76
|
|
|
]); |
77
|
|
|
}, $e->validationErrors); |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
return []; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
public function schema($numPeekRows = 10) |
85
|
|
|
{ |
86
|
|
|
$this->ensureInferredSchema($numPeekRows); |
87
|
|
|
return $this->schema; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
public function headers($numPeekRows = 10) |
91
|
|
|
{ |
92
|
|
|
$this->ensureInferredSchema($numPeekRows); |
93
|
|
|
return array_keys($this->schema->fields()); |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
public function read() |
97
|
|
|
{ |
98
|
|
|
$rows = []; |
99
|
|
|
foreach ($this as $row) { |
100
|
|
|
$rows[] = $row; |
101
|
|
|
} |
102
|
|
|
return $rows; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
public function save($outputDataSource) |
106
|
|
|
{ |
107
|
|
|
return $this->dataSource->save($outputDataSource); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* called on each iteration to get the next row |
112
|
|
|
* does validation and casting on the row. |
113
|
|
|
* |
114
|
|
|
* @return mixed[] |
115
|
|
|
* |
116
|
|
|
* @throws Exceptions\FieldValidationException |
117
|
|
|
* @throws Exceptions\DataSourceException |
118
|
|
|
*/ |
119
|
|
|
public function current() |
120
|
|
|
{ |
121
|
|
|
if (count($this->castRows) > 0) { |
122
|
|
|
$row = array_shift($this->castRows); |
123
|
|
|
} else { |
124
|
|
|
$row = $this->schema->castRow($this->dataSource->getNextLine()); |
125
|
|
|
foreach ($this->schema->fields() as $field) { |
126
|
|
|
if ($field->unique()) { |
127
|
|
|
if (!array_key_exists($field->name(), $this->uniqueFieldValues)) { |
128
|
|
|
$this->uniqueFieldValues[$field->name()] = []; |
129
|
|
|
} |
130
|
|
|
$value = $row[$field->name()]; |
131
|
|
|
if (in_array($value, $this->uniqueFieldValues[$field->name()])) { |
132
|
|
|
throw new DataSourceException('field must be unique', $this->currentLine); |
133
|
|
|
} else { |
134
|
|
|
$this->uniqueFieldValues[$field->name()][] = $value; |
135
|
|
|
} |
136
|
|
|
} |
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
return $row; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
// not interesting, standard iterator functions |
144
|
|
|
// to simplify we prevent rewinding - so you can only iterate once |
145
|
|
|
public function __destruct() |
146
|
|
|
{ |
147
|
|
|
$this->dataSource->close(); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
public function rewind() |
151
|
|
|
{ |
152
|
|
|
if ($this->currentLine == 0) { |
153
|
|
|
$this->currentLine = 1; |
154
|
|
|
} elseif (count($this->castRows) == 0) { |
155
|
|
|
$this->currentLine = 1; |
156
|
|
|
$this->dataSource->open(); |
157
|
|
|
} |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
public function key() |
161
|
|
|
{ |
162
|
|
|
return $this->currentLine - count($this->castRows); |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
public function next() |
166
|
|
|
{ |
167
|
|
|
if (count($this->castRows) == 0) { |
168
|
|
|
++$this->currentLine; |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
public function valid() |
173
|
|
|
{ |
174
|
|
|
return count($this->castRows) > 0 || !$this->dataSource->isEof(); |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
protected $currentLine = 0; |
178
|
|
|
protected $dataSource; |
179
|
|
|
protected $schema; |
180
|
|
|
protected $uniqueFieldValues; |
181
|
|
|
protected $castRows = []; |
182
|
|
|
|
183
|
|
|
protected function isInferSchema() |
184
|
|
|
{ |
185
|
|
|
return is_a($this->schema, "frictionlessdata\\tableschema\\InferSchema"); |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
protected function ensureInferredSchema($numPeekRows = 10) |
189
|
|
|
{ |
190
|
|
|
if ($this->isInferSchema() && count($this->schema->fields()) == 0) { |
191
|
|
|
// need to fetch some rows first |
192
|
|
|
if ($numPeekRows > 0) { |
193
|
|
|
$i = 0; |
194
|
|
|
foreach ($this as $row) { |
195
|
|
|
if (++$i > $numPeekRows) { |
196
|
|
|
break; |
197
|
|
|
} |
198
|
|
|
} |
199
|
|
|
// these rows will be returned by next current() call |
200
|
|
|
$this->castRows = $this->schema->lock(); |
|
|
|
|
201
|
|
|
} |
202
|
|
|
}; |
203
|
|
|
} |
204
|
|
|
} |
205
|
|
|
|
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.