Complex classes like Reader often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Reader, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
41 | class Reader extends AbstractCsv implements TabularDataReader, JsonSerializable |
||
42 | { |
||
43 | /** |
||
44 | * header offset. |
||
45 | * |
||
46 | * @var int|null |
||
47 | */ |
||
48 | protected $header_offset; |
||
49 | |||
50 | /** |
||
51 | * header record. |
||
52 | * |
||
53 | * @var string[] |
||
54 | */ |
||
55 | protected $header = []; |
||
56 | |||
57 | /** |
||
58 | * records count. |
||
59 | * |
||
60 | * @var int |
||
61 | */ |
||
62 | protected $nb_records = -1; |
||
63 | |||
64 | /** |
||
65 | * {@inheritdoc} |
||
66 | */ |
||
67 | protected $stream_filter_mode = STREAM_FILTER_READ; |
||
68 | |||
69 | /** |
||
70 | * @var bool |
||
71 | */ |
||
72 | protected $is_empty_records_included = false; |
||
73 | |||
74 | /** |
||
75 | * {@inheritdoc} |
||
76 | */ |
||
77 | 3 | public static function createFromPath(string $path, string $open_mode = 'r', $context = null) |
|
78 | { |
||
79 | 3 | return parent::createFromPath($path, $open_mode, $context); |
|
80 | } |
||
81 | |||
82 | /** |
||
83 | * {@inheritdoc} |
||
84 | */ |
||
85 | 30 | protected function resetProperties(): void |
|
86 | { |
||
87 | 30 | parent::resetProperties(); |
|
88 | 30 | $this->nb_records = -1; |
|
89 | 30 | $this->header = []; |
|
90 | 30 | } |
|
91 | |||
92 | /** |
||
93 | * Returns the header offset. |
||
94 | * |
||
95 | * If no CSV header offset is set this method MUST return null |
||
96 | * |
||
97 | */ |
||
98 | 21 | public function getHeaderOffset(): ?int |
|
99 | { |
||
100 | 21 | return $this->header_offset; |
|
101 | } |
||
102 | |||
103 | /** |
||
104 | * {@inheritDoc} |
||
105 | */ |
||
106 | 24 | public function getHeader(): array |
|
107 | { |
||
108 | 24 | if (null === $this->header_offset) { |
|
109 | 18 | return $this->header; |
|
110 | } |
||
111 | |||
112 | 9 | if ([] !== $this->header) { |
|
113 | 3 | return $this->header; |
|
114 | } |
||
115 | |||
116 | 9 | $this->header = $this->setHeader($this->header_offset); |
|
117 | |||
118 | 6 | return $this->header; |
|
119 | } |
||
120 | |||
121 | /** |
||
122 | * Determine the CSV record header. |
||
123 | * |
||
124 | * @throws Exception If the header offset is set and no record is found or is the empty array |
||
125 | * |
||
126 | * @return string[] |
||
127 | */ |
||
128 | 12 | protected function setHeader(int $offset): array |
|
129 | { |
||
130 | 12 | $header = $this->seekRow($offset); |
|
131 | 12 | if (in_array($header, [[], [null]], true)) { |
|
132 | 6 | throw new SyntaxError(sprintf('The header record does not exist or is empty at offset: `%s`', $offset)); |
|
133 | } |
||
134 | |||
135 | 6 | if (0 === $offset) { |
|
136 | 3 | return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure); |
|
137 | } |
||
138 | |||
139 | 3 | return $header; |
|
140 | } |
||
141 | |||
142 | /** |
||
143 | * Returns the row at a given offset. |
||
144 | */ |
||
145 | 12 | protected function seekRow(int $offset): array |
|
146 | { |
||
147 | 12 | foreach ($this->getDocument() as $index => $record) { |
|
148 | 12 | if ($offset === $index) { |
|
149 | 6 | return $record; |
|
150 | } |
||
151 | } |
||
152 | |||
153 | 6 | return []; |
|
154 | } |
||
155 | |||
156 | /** |
||
157 | * Returns the document as an Iterator. |
||
158 | */ |
||
159 | 21 | protected function getDocument(): Iterator |
|
160 | { |
||
161 | 21 | if (70400 > PHP_VERSION_ID && '' === $this->escape) { |
|
162 | 6 | $this->document->setCsvControl($this->delimiter, $this->enclosure); |
|
163 | |||
164 | 6 | return EmptyEscapeParser::parse($this->document); |
|
165 | } |
||
166 | |||
167 | 15 | $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD); |
|
168 | 15 | $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape); |
|
169 | 15 | $this->document->rewind(); |
|
170 | |||
171 | 15 | return $this->document; |
|
172 | } |
||
173 | |||
174 | /** |
||
175 | * Strip the BOM sequence from a record. |
||
176 | * |
||
177 | * @param string[] $record |
||
178 | * |
||
179 | * @return string[] |
||
180 | */ |
||
181 | 12 | protected function removeBOM(array $record, int $bom_length, string $enclosure): array |
|
182 | { |
||
183 | 12 | if (0 === $bom_length) { |
|
184 | 3 | return $record; |
|
185 | } |
||
186 | |||
187 | 9 | $record[0] = mb_substr($record[0], $bom_length); |
|
188 | 9 | if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) { |
|
189 | 6 | return $record; |
|
190 | } |
||
191 | |||
192 | 3 | $record[0] = substr($record[0], 1, -1); |
|
193 | |||
194 | 3 | return $record; |
|
195 | } |
||
196 | |||
197 | /** |
||
198 | * {@inheritdoc} |
||
199 | */ |
||
200 | 3 | public function fetchColumn($index = 0): Iterator |
|
201 | { |
||
202 | 3 | $tabular_data = new ResultSet($this->getRecords(), $this->getHeader()); |
|
203 | |||
204 | 3 | return $tabular_data->fetchColumn($index); |
|
205 | } |
||
206 | |||
207 | /** |
||
208 | * {@inheritdoc} |
||
209 | */ |
||
210 | 3 | public function fetchOne(int $nth_record = 0): array |
|
211 | { |
||
212 | 3 | $tabular_data = new ResultSet($this->getRecords(), $this->getHeader()); |
|
213 | |||
214 | 3 | return $tabular_data->fetchOne($nth_record); |
|
215 | } |
||
216 | |||
217 | /** |
||
218 | * {@inheritdoc} |
||
219 | */ |
||
220 | 3 | public function fetchPairs($offset_index = 0, $value_index = 1): Iterator |
|
221 | { |
||
222 | 3 | $tabular_data = new ResultSet($this->getRecords(), $this->getHeader()); |
|
223 | |||
224 | 3 | return $tabular_data->fetchPairs($offset_index, $value_index); |
|
225 | } |
||
226 | |||
227 | /** |
||
228 | * {@inheritdoc} |
||
229 | */ |
||
230 | 3 | public function count(): int |
|
238 | |||
239 | /** |
||
240 | * {@inheritdoc} |
||
241 | */ |
||
242 | 6 | public function getIterator(): Iterator |
|
246 | |||
247 | /** |
||
248 | * {@inheritdoc} |
||
249 | */ |
||
250 | 3 | public function jsonSerialize(): array |
|
254 | |||
255 | /** |
||
256 | * Returns the CSV records as an iterator object. |
||
257 | * |
||
258 | * Each CSV record is represented as a simple array containing strings or null values. |
||
259 | * |
||
260 | * If the CSV document has a header record then each record is combined |
||
261 | * to the header record and the header record is removed from the iterator. |
||
262 | * |
||
263 | * If the CSV document is inconsistent. Missing record fields are |
||
264 | * filled with null values while extra record fields are strip from |
||
265 | * the returned object. |
||
266 | * |
||
267 | * @param string[] $header an optional header to use instead of the CSV document header |
||
268 | */ |
||
269 | 36 | public function getRecords(array $header = []): Iterator |
|
270 | { |
||
271 | 36 | $header = $this->computeHeader($header); |
|
272 | $normalized = function ($record): bool { |
||
273 | 33 | return is_array($record) && ($this->is_empty_records_included || $record != [null]); |
|
274 | 33 | }; |
|
275 | |||
276 | 33 | $bom = ''; |
|
277 | 33 | if (!$this->is_input_bom_included) { |
|
278 | 30 | $bom = $this->getInputBOM(); |
|
279 | } |
||
280 | |||
281 | 33 | $document = $this->getDocument(); |
|
282 | 33 | $records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom); |
|
283 | 33 | if (null !== $this->header_offset) { |
|
284 | $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool { |
||
285 | 18 | return $offset !== $this->header_offset; |
|
286 | 18 | }); |
|
287 | } |
||
288 | |||
289 | 33 | if ($this->is_empty_records_included) { |
|
290 | $normalized_empty_records = static function (array $record): array { |
||
291 | 12 | if ([null] === $record) { |
|
292 | 12 | return []; |
|
293 | } |
||
294 | |||
295 | 12 | return $record; |
|
296 | 12 | }; |
|
297 | |||
298 | 12 | return $this->combineHeader(new MapIterator($records, $normalized_empty_records), $header); |
|
299 | } |
||
300 | |||
301 | 33 | return $this->combineHeader($records, $header); |
|
302 | } |
||
303 | |||
304 | /** |
||
305 | * Returns the header to be used for iteration. |
||
306 | * |
||
307 | * @param string[] $header |
||
308 | * |
||
309 | * @throws Exception If the header contains non unique column name |
||
310 | * |
||
311 | * @return string[] |
||
312 | */ |
||
313 | 30 | protected function computeHeader(array $header) |
|
325 | |||
326 | /** |
||
327 | * Combine the CSV header to each record if present. |
||
328 | * |
||
329 | * @param string[] $header |
||
330 | */ |
||
331 | 36 | protected function combineHeader(Iterator $iterator, array $header): Iterator |
|
332 | { |
||
333 | 36 | if ([] === $header) { |
|
334 | 27 | return $iterator; |
|
335 | } |
||
336 | |||
337 | 12 | $field_count = count($header); |
|
351 | |||
352 | /** |
||
353 | * Strip the BOM sequence from the returned records if necessary. |
||
354 | */ |
||
355 | 30 | protected function stripBOM(Iterator $iterator, string $bom): Iterator |
|
372 | |||
373 | /** |
||
374 | * Selects the record to be used as the CSV header. |
||
375 | * |
||
376 | * Because the header is represented as an array, to be valid |
||
377 | * a header MUST contain only unique string value. |
||
378 | * |
||
379 | * @param int|null $offset the header record offset |
||
380 | * |
||
381 | * @throws Exception if the offset is a negative integer |
||
382 | * |
||
383 | * @return static |
||
384 | */ |
||
385 | 27 | public function setHeaderOffset(?int $offset): self |
|
400 | |||
401 | /** |
||
402 | * Enable skipping empty records. |
||
403 | */ |
||
404 | 12 | public function skipEmptyRecords(): self |
|
413 | |||
414 | /** |
||
415 | * Disable skipping empty records. |
||
416 | */ |
||
417 | 12 | public function includeEmptyRecords(): self |
|
426 | |||
427 | /** |
||
428 | * Tells whether empty records are skipped by the instance. |
||
429 | */ |
||
430 | 12 | public function isEmptyRecordsIncluded(): bool |
|
434 | } |
||
435 |