CSVHelper::countColumns()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
/**
3
 * File containing the {@link CSVHelper} class.
4
 * 
5
 * @package Application Utils
6
 * @subpackage CSVHelper
7
 * @see CSVHelper
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
use AppUtils\ConvertHelper\JSONConverter;
15
use JsonException;
16
use ParseCsv\Csv;
17
18
/**
19
 * Helper class to parse and create/modify csv files or strings.
20
 *
21
 * Usage:
22
 * 
23
 * ```php
24
 * $csv = new CSVHelper();
25
 * $csv->setHeadersTop(); // has to be set before anything else.
26
 * 
27
 * // parse a csv file
28
 * $csv->loadFile('path/to/file');
29
 * 
30
 * // parse a csv string
31
 * $csv->loadString($csvString);
32
 * 
33
 * // retrieve data
34
 * $headers = $csv->getHeaders();
35
 * $row = $csv->getRow(4);
36
 * ```
37
 *
38
 * @package Application Utils
39
 * @subpackage CSVHelper
40
 * @author Sebastian Mordziol <[email protected]>
41
 */
42
class CSVHelper
43
{
44
    public const ERROR_INVALID_HEADERS_POSITION = 561002;
45
    public const ERROR_INVALID_FILE_ENCODING = 561003;
46
    public const ERROR_FILE_PARSING_FAILED = 561004;
47
    public const ERROR_CSV_FILE_NOT_READABLE = 561005;
48
    public const ERROR_STRING_PARSING_FAILED = 561006;
49
50
    public const DELIMITER_AUTO = 'auto';
51
52
    public const HEADERS_LEFT = 'hleft';
53
    public const HEADERS_TOP = 'htop';
54
    public const HEADERS_NONE = 'hnone';
55
56
    protected string $csv = '';
57
    protected string $headersPosition = self::HEADERS_NONE;
58
    protected string $separator = ';';
59
    protected int $columnCount = 0;
60
    protected int $rowCount = 0;
61
62
    /**
63
     * @var string[]
64
     */
65
    protected array $errors = array();
66
67
    /**
68
     * @var array<int,array<int,mixed>>
69
     */
70
    protected array $data = array();
71
72
    /**
73
     * @var string[]
74
     */
75
    protected array $headers = array();
76
77
    public function __construct()
78
    {
79
        
80
    }
81
82
   /**
83
    * Creates and returns a new instance of the CSV builder which
84
    * can be used to build CSV from scratch.
85
    * 
86
    * @return CSVHelper_Builder
87
    */
88
    public static function createBuilder() : CSVHelper_Builder
89
    {
90
        return new CSVHelper_Builder();
91
    }
92
93
94
95
   /**
96
    * Loads CSV data from a string. 
97
    * 
98
    * Note: Use the {@link hasErrors()} method to 
99
    * check if the string could be parsed correctly
100
    * afterwards.
101
    * 
102
    * @param string $string
103
    * @return $this
104
    */
105
    public function loadString(string $string) : self
106
    {
107
        // remove any UTF byte order marks that may still be present in the string
108
        $this->csv = ConvertHelper::stripUTFBom($string);
109
110
        // ensure the string is valid UTF8
111
        $this->csv = ConvertHelper::string2utf8($this->csv);
112
        
113
        $this->parse();
114
        
115
        return $this;
116
    }
117
    
118
   /**
119
    * Loads CSV data from a file.
120
    * 
121
    * Note: Use the {@link hasErrors()} method to 
122
    * check if the string could be parsed correctly
123
    * afterwards.
124
    * 
125
    * @param string $file
126
    * @throws FileHelper_Exception
127
    * @return CSVHelper
128
    * 
129
    * @see FileHelper::ERROR_FILE_DOES_NOT_EXIST
130
    * @see FileHelper::ERROR_CANNOT_READ_FILE_CONTENTS
131
    */
132
    public function loadFile(string $file) : self
133
    {
134
        $csv = FileHelper::readContents($file);
135
        
136
        return $this->loadString($csv);
137
    }
138
139
    /**
140
     * Specifies that headers are positioned on top, horizontally.
141
     * @return $this
142
     *
143
     * @throws CSVHelper_Exception
144
     */
145
    public function setHeadersTop() : self
146
    {
147
        return $this->setHeadersPosition(self::HEADERS_TOP);
148
    }
149
150
    /**
151
     * Specifies that headers are positioned on the left, vertically.
152
     * @return $this
153
     *
154
     * @throws CSVHelper_Exception
155
     */
156
    public function setHeadersLeft() : self
157
    {
158
        return $this->setHeadersPosition(self::HEADERS_LEFT);
159
    }
160
161
    /**
162
     * Specifies that there are no headers in the file (default).
163
     * @return $this
164
     *
165
     * @throws CSVHelper_Exception
166
     */
167
    public function setHeadersNone() : self
168
    {
169
        return $this->setHeadersPosition(self::HEADERS_NONE);
170
    }
171
172
    public function isHeadersLeft() : bool
173
    {
174
        return $this->isHeadersPosition(self::HEADERS_LEFT);
175
    }
176
    
177
    public function isHeadersTop() : bool
178
    {
179
        return $this->isHeadersPosition(self::HEADERS_TOP);
180
    }
181
    
182
    public function isHeadersNone() : bool
183
    {
184
        return $this->isHeadersPosition(self::HEADERS_NONE);
185
    }
186
    
187
    public function isHeadersPosition(string $position) : bool
188
    {
189
        return $this->headersPosition === $position;
190
    }
191
    
192
   /**
193
    * Specifies where the headers are positioned in the
194
    * CSV, or turns them off entirely. Use the class constants
195
    * to ensure the value is correct.
196
    * 
197
    * @param string $position
198
    * @throws CSVHelper_Exception
199
    * @return $this
200
    *
201
    * @see CSVHelper::HEADERS_LEFT
202
    * @see CSVHelper::HEADERS_TOP
203
    * @see CSVHelper::HEADERS_NONE
204
    */
205
    public function setHeadersPosition(string $position) : self
206
    {
207
        $validPositions = array(
208
            self::HEADERS_LEFT, 
209
            self::HEADERS_NONE, 
210
            self::HEADERS_TOP
211
        );
212
        
213
        if(!in_array($position, $validPositions)) {
214
            throw new CSVHelper_Exception(
215
                'Invalid headers position',
216
                sprintf(
217
                    'The header position [%s] is invalid. Valid positions are [%s]. '.
218
                    'It is recommended to use the class constants, for example [%s].',
219
                    $position,
220
                    implode(', ', $validPositions),
221
                    'CSVHelper::HEADERS_LEFT'
222
                ),
223
                self::ERROR_INVALID_HEADERS_POSITION
224
            );
225
        }
226
        
227
        $this->headersPosition = $position;
228
        return $this;
229
    }
230
    
231
   /**
232
    * Resets all internal data, allowing to start entirely anew
233
    * with a new file, or to start building a new CSV file from
234
    * scratch.
235
    * 
236
    * @return $this
237
    */
238
    public function reset() : self
239
    {
240
        $this->data = array();
241
        $this->headers = array();
242
        $this->errors = array();
243
        $this->columnCount = 0;
244
        $this->rowCount = 0;
245
        
246
        return $this;
247
    }
248
249
    /**
250
     * @return array<int,array<int,mixed>>
251
     */
252
    public function getData() : array
253
    {
254
        return $this->data;
255
    }
256
    
257
   /**
258
    * Retrieves the row at the specified index.
259
    * If there is no data at the index, this will
260
    * return an array populated with empty strings
261
    * for all available columns.
262
    * 
263
    * Tip: Use the {@link rowExists()} method to check
264
    * whether the specified row exists.
265
    * 
266
    * @param integer $index
267
    * @return array<int,mixed>
268
    * @see rowExists()
269
    */
270
    public function getRow(int $index) : array
271
    {
272
        return $this->data[$index] ?? array_fill(0, $this->rowCount, '');
273
    }
274
    
275
   /**
276
    * Checks whether the specified row exists in the data set.
277
    * @param integer $index
278
    * @return boolean
279
    */
280
    public function rowExists(int $index) : bool
281
    {
282
        return isset($this->data[$index]);
283
    }
284
    
285
   /**
286
    * Counts the amount of rows in the parsed CSV,
287
    * excluding the headers if any, depending on 
288
    * their position.
289
    * 
290
    * @return integer
291
    */
292
    public function countRows() : int
293
    {
294
        return $this->rowCount;
295
    }
296
    
297
   /**
298
    * Counts the amount of rows in the parsed CSV, 
299
    * excluding the headers if any, depending on
300
    * their position.
301
    * 
302
    * @return integer
303
    */
304
    public function countColumns() : int
305
    {
306
        return $this->columnCount;
307
    }
308
    
309
   /**
310
    * Retrieves the headers, if any. Specify the position of the
311
    * headers first to ensure this works correctly.
312
    * 
313
    * @return string[] Indexed array with header names.
314
    */
315
    public function getHeaders() : array
316
    {
317
        return $this->headers;
318
    }
319
    
320
   /**
321
    * Retrieves the column at the specified index. If there
322
    * is no column at the index, this returns an array
323
    * populated with empty strings.
324
    * 
325
    * Tip: Use the {@link columnExists()} method to check
326
    * whether a column exists.
327
    * 
328
    * @param integer $index
329
    * @return string[]
330
    * @see columnExists()
331
    */
332
    public function getColumn(int $index) : array
333
    {
334
        $data = array();
335
336
        for($i=0; $i < $this->rowCount; $i++)
337
        {
338
            $value = $this->data[$i][$index] ?? '';
339
340
            $data[] = $value;
341
        }
342
        
343
        return $data;
344
    }
345
    
346
   /**
347
    * Checks whether the specified column exists in the data set.
348
    * @param integer $index
349
    * @return boolean
350
    */
351
    public function columnExists(int $index) : bool
352
    {
353
        return $index < $this->columnCount;
354
    }
355
    
356
    protected function parse() : void
357
    {
358
        $this->reset();
359
        
360
        if(empty(trim($this->csv))) {
361
            $this->addError('Tried to parse an empty CSV string.');
362
            return;
363
        }
364
        
365
        // ensure that the last line in the CSV has
366
        // a linebreak afterwards, otherwise the line
367
        // will not be parsed.
368
        $this->csv = rtrim($this->csv).PHP_EOL;
369
        
370
        $parser = self::createParser();
371
372
        if(!$parser->parse($this->csv)) {
373
            $this->addError('The CSV string could not be parsed.');
374
            return;
375
        }
376
377
        $result = $parser->data;
378
379
        switch($this->headersPosition)
380
        {
381
            case self::HEADERS_TOP:
382
                $this->headers = array_shift($result);
383
                break;
384
                
385
            case self::HEADERS_LEFT:
386
                $keep = array();
387
388
                foreach ($result as $value)
389
                {
390
                    $row = $value;
391
                    $this->headers[] = array_shift($row);
392
                    $keep[] = $row;
393
                }
394
395
                $result = $keep;
396
                break;
397
        }
398
        
399
        $this->data = $result;
400
        $this->rowCount = count($this->data);
401
        
402
        for($i=0; $i < $this->rowCount; $i++) {
403
            $amount = count($this->data[$i]);
404
            if($amount > $this->columnCount) {
405
                $this->columnCount = $amount;
406
            }
407
        }
408
    }
409
    
410
   /**
411
    * Checks whether any errors have been encountered
412
    * while parsing the CSV.
413
    * 
414
    * @return boolean
415
    * @see getErrorMessages()
416
    */
417
    public function hasErrors() : bool
418
    {
419
        return !empty($this->errors);
420
    }
421
    
422
   /**
423
    * Retrieves all error messages.
424
    * @return string[]
425
    */
426
    public function getErrorMessages() : array
427
    {
428
        return $this->errors;
429
    }
430
    
431
    protected function addError(string $error) : self
432
    {
433
        $this->errors[] = $error;
434
        return $this;
435
    }
436
    
437
    protected function detectSeparator() : string
438
    {
439
        $search = array(
440
            "\"\t\"" => "\t",
441
            '";"' => ';',
442
            '","' => ',',
443
            ';;' => ';',
444
            ',,' => ','
445
        );
446
        
447
        foreach($search as $char => $separator) {
448
            if(strpos($this->csv, $char) !== false) {
449
                return $separator;
450
            }
451
        }
452
        
453
        return $this->separator;
454
    }
455
456
    /**
457
     * Creates a new CSV parser instance.
458
     *
459
     * @param string $delimiter
460
     * @return Csv
461
     */
462
    public static function createParser(string $delimiter=self::DELIMITER_AUTO) : Csv
463
    {
464
        $csv = new Csv();
465
466
        if($delimiter !== self::DELIMITER_AUTO) {
467
            $csv->delimiter = $delimiter;
468
        }
469
470
        return $csv;
471
    }
472
473
    /**
474
     * Parses a CSV file in automatic mode (to detect the delimiter and
475
     * enclosure), and returns the data rows, including the header row
476
     * if any.
477
     *
478
     * @param string $path
479
     * @return array<int,array<int,mixed>>
480
     *
481
     * @throws CSVHelper_Exception
482
     * @throws FileHelper_Exception
483
     *
484
     * @see CSVHelper::ERROR_CSV_FILE_NOT_READABLE
485
     * @see CSVHelper::ERROR_FILE_PARSING_FAILED
486
     */
487
    public static function parseFile(string $path) : array
488
    {
489
        $path = FileHelper::requireFileReadable($path, self::ERROR_CSV_FILE_NOT_READABLE);
490
491
        $parser = self::createParser();
492
        $parser->heading = false;
493
494
        $result = $parser->auto($path);
495
496
        if(is_string($result)) {
497
            return $parser->data;
498
        }
499
500
        throw new CSVHelper_Exception(
501
            'Cannot parse CSV file',
502
            sprintf(
503
                'The file [%s] could not be parsed.'.PHP_EOL.
504
                'Additional information: '.PHP_EOL.
505
                '%s',
506
                $path,
507
                JSONConverter::var2jsonSilent($parser->error_info)
508
            ),
509
            self::ERROR_FILE_PARSING_FAILED
510
        );
511
    }
512
513
    /**
514
     * Parses a CSV string in automatic mode (to detect the delimiter and
515
     * enclosure), and returns the data rows, including the header row
516
     * if any.
517
     *
518
     * @param string $string
519
     * @return array<int,array<int,mixed>>
520
     * @throws CSVHelper_Exception
521
     *
522
     * @see CSVHelper::ERROR_STRING_PARSING_FAILED
523
     */
524
    public static function parseString(string $string) : array
525
    {
526
        $parser = self::createParser();
527
        $result = $parser->parse($string);
528
529
        if($result === true) {
530
            return $parser->data;
531
        }
532
533
        throw new CSVHelper_Exception(
534
            'Cannot parse CSV string',
535
            'The string could not be parsed. No additional information is available.',
536
            self::ERROR_STRING_PARSING_FAILED
537
        );
538
    }
539
}
540