1
|
|
|
<?php |
2
|
|
|
namespace frictionlessdata\tableschema; |
3
|
|
|
|
4
|
|
|
use frictionlessdata\tableschema\Exceptions\TableRowValidationException; |
5
|
|
|
|
6
|
|
|
/** |
7
|
|
|
* represents a data source which validates against a table schema |
8
|
|
|
* provides interfaces for validating the data and iterating over it |
9
|
|
|
* casts all values to their native values according to the table schema |
10
|
|
|
*/ |
11
|
|
|
class Table implements \Iterator |
12
|
|
|
{ |
13
|
|
|
/** |
14
|
|
|
* @param DataSources\DataSourceInterface $dataSource |
15
|
|
|
* @param Schema $schema |
16
|
|
|
* @throws Exceptions\DataSourceException |
17
|
|
|
*/ |
18
|
|
|
public function __construct($dataSource, $schema) |
19
|
|
|
{ |
20
|
|
|
$this->dataSource = $dataSource; |
21
|
|
|
$this->schema = $schema; |
22
|
|
|
$this->dataSource->open(); |
23
|
|
|
} |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @param DataSources\DataSourceInterface $dataSource |
27
|
|
|
* @param Schema $schema |
28
|
|
|
* @param int $numPeekRows |
29
|
|
|
* @return array of validation errors |
30
|
|
|
*/ |
31
|
|
|
public static function validate($dataSource, $schema, $numPeekRows=10) |
32
|
|
|
{ |
33
|
|
|
try { |
34
|
|
|
$table = new static($dataSource, $schema); |
35
|
|
|
} catch (Exceptions\DataSourceException $e) { |
36
|
|
|
return [new SchemaValidationError(SchemaValidationError::LOAD_FAILED, $e->getMessage())]; |
37
|
|
|
}; |
38
|
|
|
if ($numPeekRows > 0) { |
39
|
|
|
$i = 0; |
40
|
|
|
try { |
41
|
|
|
foreach ($table as $row) { |
42
|
|
|
if (++$i > $numPeekRows) break; |
43
|
|
|
} |
44
|
|
|
} catch (Exceptions\DataSourceException $e) { |
|
|
|
|
45
|
|
|
return [new TableValidationError(TableValidationError::ROW_VALIDATION_FAILED, [ |
46
|
|
|
"row" => $i, |
47
|
|
|
"error" => $e->getMessage() |
48
|
|
|
])]; |
49
|
|
|
} catch (Exceptions\TableRowValidationException $e) { |
50
|
|
|
return $e->validationErrors; |
51
|
|
|
} |
52
|
|
|
} |
53
|
|
|
return []; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* called on each iteration to get the next row |
58
|
|
|
* depends on order of fields in the schema to match to the order of fields from the data source |
59
|
|
|
* @return array |
60
|
|
|
* @throws TableRowValidationException |
61
|
|
|
*/ |
62
|
|
|
public function current() { |
63
|
|
|
$line = $this->dataSource->getNextLine(); |
64
|
|
|
return $this->filterLine($line); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
// not interesting, standard iterator functions |
68
|
|
|
// to simplify we prevent rewinding - so you can only iterate once |
69
|
|
|
public function __destruct() {$this->dataSource->close();} |
70
|
|
|
public function rewind() {if ($this->currentLine == 0) {$this->currentLine = 1;} else {throw new \Exception("rewind is not supported");}} |
71
|
|
|
public function key() {return $this->currentLine;} |
72
|
|
|
public function next() {$this->currentLine++;} |
73
|
|
|
public function valid() {return !$this->dataSource->isEof();} |
74
|
|
|
|
75
|
|
|
protected $currentLine = 0; |
76
|
|
|
protected $dataSource; |
77
|
|
|
protected $schema; |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* validates the given line against the table schema |
81
|
|
|
* casts the values to the native representation according to the schema |
82
|
|
|
* @param array $line |
83
|
|
|
* @return array |
84
|
|
|
* @throws TableRowValidationException |
85
|
|
|
*/ |
86
|
|
|
protected function filterLine($line) |
87
|
|
|
{ |
88
|
|
|
$outLine = []; |
89
|
|
|
$validationErrors = []; |
90
|
|
|
foreach ($this->schema->descriptor()->fields as $field) { |
91
|
|
|
if (isset($line[$field->name])) { |
92
|
|
|
$value = $line[$field->name]; |
93
|
|
|
} else { |
94
|
|
|
$value = null; |
95
|
|
|
} |
96
|
|
|
if ( |
97
|
|
|
isset($field->type) && $field->type == "string" |
98
|
|
|
&& isset($field->format) && $field->format == "email" |
99
|
|
|
&& strpos($value, "@") === false |
100
|
|
|
) { |
101
|
|
|
$validationErrors[] = new TableValidationError(TableValidationError::ROW_VALIDATION_FAILED, [ |
102
|
|
|
"row" => $this->currentLine, |
103
|
|
|
"col" => $field->name, |
104
|
|
|
"val" => $value, |
105
|
|
|
"error" => "invalid value for email format" |
106
|
|
|
]); |
107
|
|
|
} |
108
|
|
|
$outLine[$field->name] = $value; |
109
|
|
|
} |
110
|
|
|
if (count($validationErrors) > 0) { |
111
|
|
|
throw new TableRowValidationException($validationErrors); |
112
|
|
|
} |
113
|
|
|
return $outLine; |
114
|
|
|
} |
115
|
|
|
} |
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.