| Total Complexity | 43 |
| Total Lines | 495 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like CSVHelper often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CSVHelper, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 42 | class CSVHelper |
||
| 43 | { |
||
| 44 | public const ERROR_INVALID_HEADERS_POSITION = 561002; |
||
| 45 | public const ERROR_INVALID_FILE_ENCODING = 561003; |
||
| 46 | public const ERROR_FILE_PARSING_FAILED = 561004; |
||
| 47 | public const ERROR_CSV_FILE_NOT_READABLE = 561005; |
||
| 48 | public const ERROR_STRING_PARSING_FAILED = 561006; |
||
| 49 | |||
| 50 | public const DELIMITER_AUTO = 'auto'; |
||
| 51 | |||
| 52 | public const HEADERS_LEFT = 'hleft'; |
||
| 53 | public const HEADERS_TOP = 'htop'; |
||
| 54 | public const HEADERS_NONE = 'hnone'; |
||
| 55 | |||
| 56 | protected string $csv = ''; |
||
| 57 | protected string $headersPosition = self::HEADERS_NONE; |
||
| 58 | protected string $separator = ';'; |
||
| 59 | protected int $columnCount = 0; |
||
| 60 | protected int $rowCount = 0; |
||
| 61 | |||
| 62 | /** |
||
| 63 | * @var string[] |
||
| 64 | */ |
||
| 65 | protected array $errors = array(); |
||
| 66 | |||
| 67 | /** |
||
| 68 | * @var array<int,array<int,mixed>> |
||
| 69 | */ |
||
| 70 | protected array $data = array(); |
||
| 71 | |||
| 72 | /** |
||
| 73 | * @var string[] |
||
| 74 | */ |
||
| 75 | protected array $headers = array(); |
||
| 76 | |||
| 77 | public function __construct() |
||
| 78 | { |
||
| 79 | |||
| 80 | } |
||
| 81 | |||
| 82 | /** |
||
| 83 | * Creates and returns a new instance of the CSV builder which |
||
| 84 | * can be used to build CSV from scratch. |
||
| 85 | * |
||
| 86 | * @return CSVHelper_Builder |
||
| 87 | */ |
||
| 88 | public static function createBuilder() : CSVHelper_Builder |
||
| 89 | { |
||
| 90 | return new CSVHelper_Builder(); |
||
| 91 | } |
||
| 92 | |||
| 93 | |||
| 94 | |||
| 95 | /** |
||
| 96 | * Loads CSV data from a string. |
||
| 97 | * |
||
| 98 | * Note: Use the {@link hasErrors()} method to |
||
| 99 | * check if the string could be parsed correctly |
||
| 100 | * afterwards. |
||
| 101 | * |
||
| 102 | * @param string $string |
||
| 103 | * @return $this |
||
| 104 | */ |
||
| 105 | public function loadString(string $string) : self |
||
| 106 | { |
||
| 107 | // remove any UTF byte order marks that may still be present in the string |
||
| 108 | $this->csv = ConvertHelper::stripUTFBom($string); |
||
| 109 | |||
| 110 | // ensure the string is valid UTF8 |
||
| 111 | $this->csv = ConvertHelper::string2utf8($this->csv); |
||
| 112 | |||
| 113 | $this->parse(); |
||
| 114 | |||
| 115 | return $this; |
||
| 116 | } |
||
| 117 | |||
| 118 | /** |
||
| 119 | * Loads CSV data from a file. |
||
| 120 | * |
||
| 121 | * Note: Use the {@link hasErrors()} method to |
||
| 122 | * check if the string could be parsed correctly |
||
| 123 | * afterwards. |
||
| 124 | * |
||
| 125 | * @param string $file |
||
| 126 | * @throws FileHelper_Exception |
||
| 127 | * @return CSVHelper |
||
| 128 | * |
||
| 129 | * @see FileHelper::ERROR_FILE_DOES_NOT_EXIST |
||
| 130 | * @see FileHelper::ERROR_CANNOT_READ_FILE_CONTENTS |
||
| 131 | */ |
||
| 132 | public function loadFile(string $file) : self |
||
| 133 | { |
||
| 134 | $csv = FileHelper::readContents($file); |
||
| 135 | |||
| 136 | return $this->loadString($csv); |
||
| 137 | } |
||
| 138 | |||
| 139 | /** |
||
| 140 | * Specifies that headers are positioned on top, horizontally. |
||
| 141 | * @return $this |
||
| 142 | * |
||
| 143 | * @throws CSVHelper_Exception |
||
| 144 | */ |
||
| 145 | public function setHeadersTop() : self |
||
| 146 | { |
||
| 147 | return $this->setHeadersPosition(self::HEADERS_TOP); |
||
| 148 | } |
||
| 149 | |||
| 150 | /** |
||
| 151 | * Specifies that headers are positioned on the left, vertically. |
||
| 152 | * @return $this |
||
| 153 | * |
||
| 154 | * @throws CSVHelper_Exception |
||
| 155 | */ |
||
| 156 | public function setHeadersLeft() : self |
||
| 157 | { |
||
| 158 | return $this->setHeadersPosition(self::HEADERS_LEFT); |
||
| 159 | } |
||
| 160 | |||
| 161 | /** |
||
| 162 | * Specifies that there are no headers in the file (default). |
||
| 163 | * @return $this |
||
| 164 | * |
||
| 165 | * @throws CSVHelper_Exception |
||
| 166 | */ |
||
| 167 | public function setHeadersNone() : self |
||
| 168 | { |
||
| 169 | return $this->setHeadersPosition(self::HEADERS_NONE); |
||
| 170 | } |
||
| 171 | |||
| 172 | public function isHeadersLeft() : bool |
||
| 173 | { |
||
| 174 | return $this->isHeadersPosition(self::HEADERS_LEFT); |
||
| 175 | } |
||
| 176 | |||
| 177 | public function isHeadersTop() : bool |
||
| 178 | { |
||
| 179 | return $this->isHeadersPosition(self::HEADERS_TOP); |
||
| 180 | } |
||
| 181 | |||
| 182 | public function isHeadersNone() : bool |
||
| 183 | { |
||
| 184 | return $this->isHeadersPosition(self::HEADERS_NONE); |
||
| 185 | } |
||
| 186 | |||
| 187 | public function isHeadersPosition(string $position) : bool |
||
| 188 | { |
||
| 189 | return $this->headersPosition === $position; |
||
| 190 | } |
||
| 191 | |||
| 192 | /** |
||
| 193 | * Specifies where the headers are positioned in the |
||
| 194 | * CSV, or turns them off entirely. Use the class constants |
||
| 195 | * to ensure the value is correct. |
||
| 196 | * |
||
| 197 | * @param string $position |
||
| 198 | * @throws CSVHelper_Exception |
||
| 199 | * @return $this |
||
| 200 | * |
||
| 201 | * @see CSVHelper::HEADERS_LEFT |
||
| 202 | * @see CSVHelper::HEADERS_TOP |
||
| 203 | * @see CSVHelper::HEADERS_NONE |
||
| 204 | */ |
||
| 205 | public function setHeadersPosition(string $position) : self |
||
| 206 | { |
||
| 207 | $validPositions = array( |
||
| 208 | self::HEADERS_LEFT, |
||
| 209 | self::HEADERS_NONE, |
||
| 210 | self::HEADERS_TOP |
||
| 211 | ); |
||
| 212 | |||
| 213 | if(!in_array($position, $validPositions)) { |
||
| 214 | throw new CSVHelper_Exception( |
||
| 215 | 'Invalid headers position', |
||
| 216 | sprintf( |
||
| 217 | 'The header position [%s] is invalid. Valid positions are [%s]. '. |
||
| 218 | 'It is recommended to use the class constants, for example [%s].', |
||
| 219 | $position, |
||
| 220 | implode(', ', $validPositions), |
||
| 221 | 'CSVHelper::HEADERS_LEFT' |
||
| 222 | ), |
||
| 223 | self::ERROR_INVALID_HEADERS_POSITION |
||
| 224 | ); |
||
| 225 | } |
||
| 226 | |||
| 227 | $this->headersPosition = $position; |
||
| 228 | return $this; |
||
| 229 | } |
||
| 230 | |||
| 231 | /** |
||
| 232 | * Resets all internal data, allowing to start entirely anew |
||
| 233 | * with a new file, or to start building a new CSV file from |
||
| 234 | * scratch. |
||
| 235 | * |
||
| 236 | * @return $this |
||
| 237 | */ |
||
| 238 | public function reset() : self |
||
| 239 | { |
||
| 240 | $this->data = array(); |
||
| 241 | $this->headers = array(); |
||
| 242 | $this->errors = array(); |
||
| 243 | $this->columnCount = 0; |
||
| 244 | $this->rowCount = 0; |
||
| 245 | |||
| 246 | return $this; |
||
| 247 | } |
||
| 248 | |||
| 249 | /** |
||
| 250 | * @return array<int,array<int,mixed>> |
||
| 251 | */ |
||
| 252 | public function getData() : array |
||
| 253 | { |
||
| 254 | return $this->data; |
||
| 255 | } |
||
| 256 | |||
| 257 | /** |
||
| 258 | * Retrieves the row at the specified index. |
||
| 259 | * If there is no data at the index, this will |
||
| 260 | * return an array populated with empty strings |
||
| 261 | * for all available columns. |
||
| 262 | * |
||
| 263 | * Tip: Use the {@link rowExists()} method to check |
||
| 264 | * whether the specified row exists. |
||
| 265 | * |
||
| 266 | * @param integer $index |
||
| 267 | * @return array<int,mixed> |
||
| 268 | * @see rowExists() |
||
| 269 | */ |
||
| 270 | public function getRow(int $index) : array |
||
| 271 | { |
||
| 272 | return $this->data[$index] ?? array_fill(0, $this->rowCount, ''); |
||
| 273 | } |
||
| 274 | |||
| 275 | /** |
||
| 276 | * Checks whether the specified row exists in the data set. |
||
| 277 | * @param integer $index |
||
| 278 | * @return boolean |
||
| 279 | */ |
||
| 280 | public function rowExists(int $index) : bool |
||
| 281 | { |
||
| 282 | return isset($this->data[$index]); |
||
| 283 | } |
||
| 284 | |||
| 285 | /** |
||
| 286 | * Counts the amount of rows in the parsed CSV, |
||
| 287 | * excluding the headers if any, depending on |
||
| 288 | * their position. |
||
| 289 | * |
||
| 290 | * @return integer |
||
| 291 | */ |
||
| 292 | public function countRows() : int |
||
| 293 | { |
||
| 294 | return $this->rowCount; |
||
| 295 | } |
||
| 296 | |||
| 297 | /** |
||
| 298 | * Counts the amount of rows in the parsed CSV, |
||
| 299 | * excluding the headers if any, depending on |
||
| 300 | * their position. |
||
| 301 | * |
||
| 302 | * @return integer |
||
| 303 | */ |
||
| 304 | public function countColumns() : int |
||
| 305 | { |
||
| 306 | return $this->columnCount; |
||
| 307 | } |
||
| 308 | |||
| 309 | /** |
||
| 310 | * Retrieves the headers, if any. Specify the position of the |
||
| 311 | * headers first to ensure this works correctly. |
||
| 312 | * |
||
| 313 | * @return string[] Indexed array with header names. |
||
| 314 | */ |
||
| 315 | public function getHeaders() : array |
||
| 316 | { |
||
| 317 | return $this->headers; |
||
| 318 | } |
||
| 319 | |||
| 320 | /** |
||
| 321 | * Retrieves the column at the specified index. If there |
||
| 322 | * is no column at the index, this returns an array |
||
| 323 | * populated with empty strings. |
||
| 324 | * |
||
| 325 | * Tip: Use the {@link columnExists()} method to check |
||
| 326 | * whether a column exists. |
||
| 327 | * |
||
| 328 | * @param integer $index |
||
| 329 | * @return string[] |
||
| 330 | * @see columnExists() |
||
| 331 | */ |
||
| 332 | public function getColumn(int $index) : array |
||
| 333 | { |
||
| 334 | $data = array(); |
||
| 335 | |||
| 336 | for($i=0; $i < $this->rowCount; $i++) |
||
| 337 | { |
||
| 338 | $value = $this->data[$i][$index] ?? ''; |
||
| 339 | |||
| 340 | $data[] = $value; |
||
| 341 | } |
||
| 342 | |||
| 343 | return $data; |
||
| 344 | } |
||
| 345 | |||
| 346 | /** |
||
| 347 | * Checks whether the specified column exists in the data set. |
||
| 348 | * @param integer $index |
||
| 349 | * @return boolean |
||
| 350 | */ |
||
| 351 | public function columnExists(int $index) : bool |
||
| 352 | { |
||
| 353 | return $index < $this->columnCount; |
||
| 354 | } |
||
| 355 | |||
| 356 | protected function parse() : void |
||
| 357 | { |
||
| 358 | $this->reset(); |
||
| 359 | |||
| 360 | if(empty(trim($this->csv))) { |
||
| 361 | $this->addError('Tried to parse an empty CSV string.'); |
||
| 362 | return; |
||
| 363 | } |
||
| 364 | |||
| 365 | // ensure that the last line in the CSV has |
||
| 366 | // a linebreak afterwards, otherwise the line |
||
| 367 | // will not be parsed. |
||
| 368 | $this->csv = rtrim($this->csv).PHP_EOL; |
||
| 369 | |||
| 370 | $parser = self::createParser(); |
||
| 371 | |||
| 372 | if(!$parser->parse($this->csv)) { |
||
| 373 | $this->addError('The CSV string could not be parsed.'); |
||
| 374 | return; |
||
| 375 | } |
||
| 376 | |||
| 377 | $result = $parser->data; |
||
| 378 | |||
| 379 | switch($this->headersPosition) |
||
| 380 | { |
||
| 381 | case self::HEADERS_TOP: |
||
| 382 | $this->headers = array_shift($result); |
||
| 383 | break; |
||
| 384 | |||
| 385 | case self::HEADERS_LEFT: |
||
| 386 | $keep = array(); |
||
| 387 | |||
| 388 | foreach ($result as $value) |
||
| 389 | { |
||
| 390 | $row = $value; |
||
| 391 | $this->headers[] = array_shift($row); |
||
| 392 | $keep[] = $row; |
||
| 393 | } |
||
| 394 | |||
| 395 | $result = $keep; |
||
| 396 | break; |
||
| 397 | } |
||
| 398 | |||
| 399 | $this->data = $result; |
||
| 400 | $this->rowCount = count($this->data); |
||
| 401 | |||
| 402 | for($i=0; $i < $this->rowCount; $i++) { |
||
| 403 | $amount = count($this->data[$i]); |
||
| 404 | if($amount > $this->columnCount) { |
||
| 405 | $this->columnCount = $amount; |
||
| 406 | } |
||
| 407 | } |
||
| 408 | } |
||
| 409 | |||
| 410 | /** |
||
| 411 | * Checks whether any errors have been encountered |
||
| 412 | * while parsing the CSV. |
||
| 413 | * |
||
| 414 | * @return boolean |
||
| 415 | * @see getErrorMessages() |
||
| 416 | */ |
||
| 417 | public function hasErrors() : bool |
||
| 418 | { |
||
| 419 | return !empty($this->errors); |
||
| 420 | } |
||
| 421 | |||
| 422 | /** |
||
| 423 | * Retrieves all error messages. |
||
| 424 | * @return string[] |
||
| 425 | */ |
||
| 426 | public function getErrorMessages() : array |
||
| 427 | { |
||
| 428 | return $this->errors; |
||
| 429 | } |
||
| 430 | |||
| 431 | protected function addError(string $error) : self |
||
| 432 | { |
||
| 433 | $this->errors[] = $error; |
||
| 434 | return $this; |
||
| 435 | } |
||
| 436 | |||
| 437 | protected function detectSeparator() : string |
||
| 438 | { |
||
| 439 | $search = array( |
||
| 440 | "\"\t\"" => "\t", |
||
| 441 | '";"' => ';', |
||
| 442 | '","' => ',', |
||
| 443 | ';;' => ';', |
||
| 444 | ',,' => ',' |
||
| 445 | ); |
||
| 446 | |||
| 447 | foreach($search as $char => $separator) { |
||
| 448 | if(strpos($this->csv, $char) !== false) { |
||
| 449 | return $separator; |
||
| 450 | } |
||
| 451 | } |
||
| 452 | |||
| 453 | return $this->separator; |
||
| 454 | } |
||
| 455 | |||
| 456 | /** |
||
| 457 | * Creates a new CSV parser instance. |
||
| 458 | * |
||
| 459 | * @param string $delimiter |
||
| 460 | * @return Csv |
||
| 461 | */ |
||
| 462 | public static function createParser(string $delimiter=self::DELIMITER_AUTO) : Csv |
||
| 463 | { |
||
| 464 | $csv = new Csv(); |
||
| 465 | |||
| 466 | if($delimiter !== self::DELIMITER_AUTO) { |
||
| 467 | $csv->delimiter = $delimiter; |
||
| 468 | } |
||
| 469 | |||
| 470 | return $csv; |
||
| 471 | } |
||
| 472 | |||
| 473 | /** |
||
| 474 | * Parses a CSV file in automatic mode (to detect the delimiter and |
||
| 475 | * enclosure), and returns the data rows, including the header row |
||
| 476 | * if any. |
||
| 477 | * |
||
| 478 | * @param string $path |
||
| 479 | * @return array<int,array<int,mixed>> |
||
| 480 | * |
||
| 481 | * @throws CSVHelper_Exception |
||
| 482 | * @throws FileHelper_Exception |
||
| 483 | * |
||
| 484 | * @see CSVHelper::ERROR_CSV_FILE_NOT_READABLE |
||
| 485 | * @see CSVHelper::ERROR_FILE_PARSING_FAILED |
||
| 486 | */ |
||
| 487 | public static function parseFile(string $path) : array |
||
| 488 | { |
||
| 489 | $path = FileHelper::requireFileReadable($path, self::ERROR_CSV_FILE_NOT_READABLE); |
||
| 490 | |||
| 491 | $parser = self::createParser(); |
||
| 492 | $parser->heading = false; |
||
| 493 | |||
| 494 | $result = $parser->auto($path); |
||
| 495 | |||
| 496 | if(is_string($result)) { |
||
| 497 | return $parser->data; |
||
| 498 | } |
||
| 499 | |||
| 500 | throw new CSVHelper_Exception( |
||
| 501 | 'Cannot parse CSV file', |
||
| 502 | sprintf( |
||
| 503 | 'The file [%s] could not be parsed.'.PHP_EOL. |
||
| 504 | 'Additional information: '.PHP_EOL. |
||
| 505 | '%s', |
||
| 506 | $path, |
||
| 507 | JSONConverter::var2jsonSilent($parser->error_info) |
||
| 508 | ), |
||
| 509 | self::ERROR_FILE_PARSING_FAILED |
||
| 510 | ); |
||
| 511 | } |
||
| 512 | |||
| 513 | /** |
||
| 514 | * Parses a CSV string in automatic mode (to detect the delimiter and |
||
| 515 | * enclosure), and returns the data rows, including the header row |
||
| 516 | * if any. |
||
| 517 | * |
||
| 518 | * @param string $string |
||
| 519 | * @return array<int,array<int,mixed>> |
||
| 520 | * @throws CSVHelper_Exception |
||
| 521 | * |
||
| 522 | * @see CSVHelper::ERROR_STRING_PARSING_FAILED |
||
| 523 | */ |
||
| 524 | public static function parseString(string $string) : array |
||
| 537 | ); |
||
| 538 | } |
||
| 539 | } |
||
| 540 |