1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Coco\SourceWatcher\Core\Extractors; |
4
|
|
|
|
5
|
|
|
use Coco\SourceWatcher\Core\Extractor; |
6
|
|
|
use Coco\SourceWatcher\Core\IO\Inputs\FileInput; |
7
|
|
|
use Coco\SourceWatcher\Core\Row; |
8
|
|
|
use Coco\SourceWatcher\Core\SourceWatcherException; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Class CsvExtractor |
12
|
|
|
* |
13
|
|
|
* @package Coco\SourceWatcher\Core\Extractors |
14
|
|
|
*/ |
15
|
|
|
class CsvExtractor extends Extractor |
16
|
|
|
{ |
17
|
|
|
protected array $columns; |
18
|
|
|
protected string $delimiter; |
19
|
|
|
protected string $enclosure; |
20
|
|
|
protected string $overrideHeaders; |
21
|
|
|
protected array $regexChange; |
22
|
|
|
protected ?Row $resumeRow; |
23
|
|
|
protected string $resumeRowByField; |
24
|
|
|
|
25
|
|
|
protected array $availableOptions = [ |
26
|
|
|
"columns", |
27
|
|
|
"delimiter", |
28
|
|
|
"enclosure", |
29
|
|
|
"overrideHeaders", |
30
|
|
|
"regexChange", |
31
|
|
|
"resumeRow", |
32
|
|
|
"resumeRowByField" |
33
|
|
|
]; |
34
|
|
|
|
35
|
|
|
public function __construct () |
36
|
|
|
{ |
37
|
|
|
$this->columns = []; |
38
|
|
|
$this->delimiter = ","; |
39
|
|
|
$this->enclosure = "\""; |
40
|
|
|
$this->overrideHeaders = false; |
41
|
|
|
$this->regexChange = []; |
42
|
|
|
$this->resumeRow = null; |
43
|
|
|
$this->resumeRowByField = ""; |
44
|
|
|
} |
45
|
|
|
|
46
|
|
|
public function getColumns () : array |
47
|
|
|
{ |
48
|
|
|
return $this->columns; |
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
public function setColumns ( array $columns ) : void |
52
|
|
|
{ |
53
|
|
|
$this->columns = $columns; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
public function getDelimiter () : string |
57
|
|
|
{ |
58
|
|
|
return $this->delimiter; |
59
|
|
|
} |
60
|
|
|
|
61
|
|
|
public function setDelimiter ( string $delimiter ) : void |
62
|
|
|
{ |
63
|
|
|
$this->delimiter = $delimiter; |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
public function getEnclosure () : string |
67
|
|
|
{ |
68
|
|
|
return $this->enclosure; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
public function setEnclosure ( string $enclosure ) : void |
72
|
|
|
{ |
73
|
|
|
$this->enclosure = $enclosure; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
public function getOverrideHeaders () : string |
77
|
|
|
{ |
78
|
|
|
return $this->overrideHeaders; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
public function setOverrideHeaders ( string $overrideHeaders ) : void |
82
|
|
|
{ |
83
|
|
|
$this->overrideHeaders = $overrideHeaders; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
public function getRegexChange () : array |
87
|
|
|
{ |
88
|
|
|
return $this->regexChange; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
public function setRegexChange ( array $regexChange ) : void |
92
|
|
|
{ |
93
|
|
|
$this->regexChange = $regexChange; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* @return Row|null |
98
|
|
|
*/ |
99
|
|
|
public function getResumeRow () : ?Row |
100
|
|
|
{ |
101
|
|
|
return $this->resumeRow; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* @param Row|null $resumeRow |
106
|
|
|
*/ |
107
|
|
|
public function setResumeRow ( ?Row $resumeRow ) : void |
108
|
|
|
{ |
109
|
|
|
$this->resumeRow = $resumeRow; |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* @return String |
114
|
|
|
*/ |
115
|
|
|
public function getResumeRowByField () : string |
116
|
|
|
{ |
117
|
|
|
return $this->resumeRowByField; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* @param String $resumeRowByField |
122
|
|
|
*/ |
123
|
|
|
public function setResumeRowByField ( string $resumeRowByField ) : void |
124
|
|
|
{ |
125
|
|
|
$this->resumeRowByField = $resumeRowByField; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* @return array |
130
|
|
|
* @throws SourceWatcherException |
131
|
|
|
*/ |
132
|
|
|
public function extract () : array |
133
|
|
|
{ |
134
|
|
|
if ( $this->input == null ) { |
135
|
|
|
throw new SourceWatcherException( "An input must be provided." ); |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
$inputIsFileInput = $this->input instanceof FileInput; |
139
|
|
|
|
140
|
|
|
if ( !$inputIsFileInput ) { |
141
|
|
|
throw new SourceWatcherException( sprintf( "The input must be an instance of %s", FileInput::class ) ); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
$this->result = []; |
145
|
|
|
|
146
|
|
|
$fileHandler = fopen( $this->input->getInput(), "r" ); |
147
|
|
|
|
148
|
|
|
if ( !$this->overrideHeaders ) { |
149
|
|
|
$this->columns = $this->generateColumns( $fileHandler ); |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
$pushRow = true; |
153
|
|
|
|
154
|
|
|
while ( $currentFileLine = fgets( $fileHandler ) ) { |
155
|
|
|
if ( !empty( $this->regexChange ) ) { |
156
|
|
|
$regex = $this->regexChange["regex"]; |
157
|
|
|
$callback = $this->regexChange["callback"]; |
158
|
|
|
|
159
|
|
|
preg_match( $regex, $currentFileLine, $matches ); |
160
|
|
|
|
161
|
|
|
$currentFileLine = $callback( $currentFileLine, $matches ); |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
$currentRowArray = $this->generateRow( $currentFileLine, $this->columns ); |
165
|
|
|
|
166
|
|
|
if ( !empty( $this->resumeRow ) && !empty( $this->resumeRowByField ) ) { |
167
|
|
|
$pushRow = false; |
168
|
|
|
|
169
|
|
|
if ( $currentRowArray[$this->resumeRowByField] == $this->resumeRow[$this->resumeRowByField] ) { |
170
|
|
|
$pushRow = true; |
171
|
|
|
|
172
|
|
|
// change this, it's an ugly hack! |
173
|
|
|
$this->resumeRow = null; |
174
|
|
|
$this->resumeRowByField = ""; |
175
|
|
|
|
176
|
|
|
continue; |
177
|
|
|
} |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
if ( $pushRow ) { |
181
|
|
|
array_push( $this->result, new Row( $currentRowArray ) ); |
182
|
|
|
} |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
fclose( $fileHandler ); |
186
|
|
|
|
187
|
|
|
return $this->result; |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
private function generateColumns ( $fileHandler ) : array |
191
|
|
|
{ |
192
|
|
|
// The goal will be to represent the keys in format [key1 -> 1, key2 -> 2, ... keyN -> N] |
193
|
|
|
$columnsArrayFlipped = array_flip( str_getcsv( fgets( $fileHandler ), $this->delimiter, $this->enclosure ) ); |
194
|
|
|
|
195
|
|
|
foreach ( $columnsArrayFlipped as $key => $index ) { |
196
|
|
|
$columnsArrayFlipped[$key] = $index + 1; |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
// If no columns have been defined, make the columns attribute equal to the ones with format [key1 -> 1, key2 -> 2, ... keyN -> n] |
200
|
|
|
if ( empty( $this->columns ) ) { |
201
|
|
|
return $columnsArrayFlipped; |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
// If the keys of the columns attribute equal to an array in format [0, 1, ... N] then they need to be reformatted as an intersection of the ones found and the ones requested. |
205
|
|
|
if ( array_keys( $this->columns ) === range( 0, count( $this->columns ) - 1 ) ) { |
206
|
|
|
return array_intersect_key( $columnsArrayFlipped, array_flip( $this->columns ) ); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
$resultColumns = []; |
210
|
|
|
|
211
|
|
|
foreach ( $this->columns as $key => $value ) { |
212
|
|
|
$resultColumns[$value] = $columnsArrayFlipped[$key]; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
return $resultColumns; |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
private function generateRow ( string $rowString, array $columns ) : array |
219
|
|
|
{ |
220
|
|
|
$resultRow = []; |
221
|
|
|
|
222
|
|
|
$rowArray = str_getcsv( $rowString, $this->delimiter, $this->enclosure ); |
223
|
|
|
|
224
|
|
|
foreach ( $columns as $column => $index ) { |
225
|
|
|
if ( !array_key_exists( $index - 1, $rowArray ) ) { |
226
|
|
|
$resultRow[$column] = ""; |
227
|
|
|
} else { |
228
|
|
|
$resultRow[$column] = $rowArray[$index - 1]; |
229
|
|
|
} |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
return $resultRow; |
233
|
|
|
} |
234
|
|
|
} |
235
|
|
|
|