1
|
|
|
<?php |
2
|
|
|
namespace JsonTable\Analyse; |
3
|
|
|
|
4
|
|
|
/** |
5
|
|
|
* Perform primary key analysis. |
6
|
|
|
* |
7
|
|
|
* @package JsonTable |
8
|
|
|
*/ |
9
|
|
|
class PrimaryKey extends Analyse implements AnalyseInterface |
10
|
|
|
{ |
11
|
|
|
/** |
12
|
|
|
* @var string The description for fields with duplicated primary keys. |
13
|
|
|
*/ |
14
|
|
|
const ERROR_DUPLICATE_PRIMARY_KEY = 'There are <strong>%d</strong> rows that have duplicated primary keys:'; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* @var array The current CSV row being analysed. |
18
|
|
|
*/ |
19
|
|
|
private $currentCsvRow; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* @var int The position of the current CSV row row in the CSV file. |
23
|
|
|
*/ |
24
|
|
|
private $rowNumber; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var array The primary keys for every row in the file. |
28
|
|
|
*/ |
29
|
|
|
private $fileKeys; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var array The primary key parts for the current row. |
33
|
|
|
*/ |
34
|
|
|
private $rowKeyParts; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var array The primary key fields. |
38
|
|
|
*/ |
39
|
|
|
private $primaryKeyFields; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @var string The name of the primary key field currently being analysed. |
43
|
|
|
*/ |
44
|
|
|
private $primaryKeyFieldName; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* @var string The hash of the data taken from the primary key fields in the current CSV row. |
48
|
|
|
*/ |
49
|
|
|
private $hash; |
50
|
|
|
|
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Validate that any specified primary key constraints have been met. |
54
|
|
|
* |
55
|
|
|
* @access public |
56
|
|
|
* |
57
|
|
|
* @return boolean Does the data meet the primary key constraints. |
58
|
|
|
* |
59
|
|
|
* |
60
|
|
|
*/ |
61
|
|
|
public function validate() |
62
|
|
|
{ |
63
|
|
|
if (false === property_exists(parent::$schemaJson, 'primaryKey')) { |
64
|
|
|
return true; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
$this->setPrimaryKeyFields(); |
68
|
|
|
$this->fileKeys = []; |
69
|
|
|
|
70
|
|
|
self::rewindFilePointerToFirstData(); |
71
|
|
|
|
72
|
|
|
$this->rowNumber= 1; |
73
|
|
|
|
74
|
|
|
while ($this->currentCsvRow = self::loopThroughFileRows()) { |
75
|
|
|
$this->getPrimaryKeyDataForRow(); |
76
|
|
|
$this->createHash(); |
77
|
|
|
|
78
|
|
|
if ($existingKey = $this->isHashUnique()) { |
79
|
|
|
$this->handleDuplicateHash($existingKey); |
80
|
|
|
|
81
|
|
|
if ($this->stopIfInvalid) { |
82
|
|
|
return false; |
83
|
|
|
} |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
$this->fileKeys[$this->rowNumber] = $this->hash; |
87
|
|
|
$this->rowNumber++; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
return true; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* Set the primary key fields. |
96
|
|
|
* |
97
|
|
|
* @access private |
98
|
|
|
* |
99
|
|
|
* @return void |
100
|
|
|
*/ |
101
|
|
|
private function setPrimaryKeyFields() |
102
|
|
|
{ |
103
|
|
|
$this->primaryKeyFields = (array) parent::$schemaJson->primaryKey; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* Check that there is a column in the JSON table schema file for the current primary key field. |
109
|
|
|
* |
110
|
|
|
* @access private |
111
|
|
|
* |
112
|
|
|
* @return void |
113
|
|
|
* |
114
|
|
|
* @throws \Exception if the primary key was not in the schema file. |
115
|
|
|
*/ |
116
|
|
|
private function checkColumnExistsInSchema() |
117
|
|
|
{ |
118
|
|
|
if (false === $this->getSchemaKeyFromName($this->primaryKeyFieldName)) { |
119
|
|
|
throw new \Exception("The primary key "$this->primaryKeyFieldName" was not in the file. |
120
|
|
|
Primary key columns should be set as required."); |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* Get the data in the CSV column for the current primary key column. |
127
|
|
|
* |
128
|
|
|
* @access private |
129
|
|
|
* |
130
|
|
|
* @return string The data in the column. |
131
|
|
|
*/ |
132
|
|
|
private function csvDataForPrimaryKeyColumn() |
133
|
|
|
{ |
134
|
|
|
$csvPosition = $this->getCsvPositionFromName($this->primaryKeyFieldName); |
135
|
|
|
return $this->currentCsvRow[$csvPosition]; |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Get the data in the primary key columns for the current CSV row. |
141
|
|
|
* |
142
|
|
|
* @access private |
143
|
|
|
* |
144
|
|
|
* @return void |
145
|
|
|
*/ |
146
|
|
|
private function getPrimaryKeyDataForRow() |
147
|
|
|
{ |
148
|
|
|
$this->rowKeyParts = []; |
149
|
|
|
|
150
|
|
|
foreach ($this->primaryKeyFields as $fieldName) { |
151
|
|
|
$this->primaryKeyFieldName = strtolower($fieldName); |
152
|
|
|
$this->checkColumnExistsInSchema(); |
153
|
|
|
$this->rowKeyParts[] = $this->csvDataForPrimaryKeyColumn(); |
154
|
|
|
} |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Create a hash of the data taken from the primary key fields in the current CSV row. |
160
|
|
|
* |
161
|
|
|
* @access private |
162
|
|
|
* |
163
|
|
|
* @return void |
164
|
|
|
*/ |
165
|
|
|
private function createHash() |
166
|
|
|
{ |
167
|
|
|
$this->hash = implode(', ', $this->rowKeyParts); |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
|
171
|
|
|
/** |
172
|
|
|
* Check whether the current hash has already been created for this file. |
173
|
|
|
* |
174
|
|
|
* @access private |
175
|
|
|
* |
176
|
|
|
* @return boolean|int False if this row's primary key hash is unique |
177
|
|
|
* or the number of the row with the same hash if it's not. |
178
|
|
|
*/ |
179
|
|
|
private function isHashUnique() |
180
|
|
|
{ |
181
|
|
|
return array_search($this->hash, $this->fileKeys); |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* Handle the current hash not being unique. |
187
|
|
|
* |
188
|
|
|
* @access private |
189
|
|
|
* |
190
|
|
|
* @param int $existingKey The number of the row with the same hash. |
191
|
|
|
* |
192
|
|
|
* @return void |
193
|
|
|
*/ |
194
|
|
View Code Duplication |
private function handleDuplicateHash($existingKey) |
|
|
|
|
195
|
|
|
{ |
196
|
|
|
$primaryKeyColumns = implode(', ', $this->primaryKeyFields); |
197
|
|
|
$errorMessage = "The data in columns "$primaryKeyColumns" should be unique, |
198
|
|
|
but rows $existingKey & $this->rowNumber have the same values of "$this->hash""; |
199
|
|
|
|
200
|
|
|
$this->setError(self::ERROR_DUPLICATE_PRIMARY_KEY, $errorMessage); |
201
|
|
|
$this->statistics['rows_with_errors'][] = $this->rowNumber; |
202
|
|
|
} |
203
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.