Passed
Push — master ( 68f91e...986f6c )
by Sebastian
04:32
created

CSVHelper::createParser()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
c 0
b 0
f 0
dl 0
loc 9
rs 10
cc 2
nc 2
nop 1
1
<?php
2
/**
3
 * File containing the {@link CSVHelper} class.
4
 * 
5
 * @package Application Utils
6
 * @subpackage CSVHelper
7
 * @see CSVHelper
8
 */
9
10
namespace AppUtils;
11
12
use ParseCsv\Csv;
13
14
/**
15
 * Helper class to parse and create/modify csv files or strings.
16
 *
17
 * Usage:
18
 * 
19
 * ```php
20
 * $csv = new CSVHelper();
21
 * $csv->setHeadersTop(); // has to be set before anything else.
22
 * 
23
 * // parse a csv file
24
 * $csv->loadFile('path/to/file');
25
 * 
26
 * // parse a csv string
27
 * $csv->loadString($csvString);
28
 * 
29
 * // retrieve data
30
 * $headers = $csv->getHeaders();
31
 * $row = $csv->getRow(4);
32
 * ```
33
 *
34
 * @package Application Utils
35
 * @subpackage CSVHelper
36
 * @author Sebastian Mordziol <[email protected]>
37
 */
38
class CSVHelper
39
{
40
    const ERROR_INVALID_HEADERS_POSITION = 561002;
41
    const ERROR_INVALID_FILE_ENCODING = 561003;
42
    const ERROR_FILE_PARSING_FAILED = 561004;
43
    const ERROR_CSV_FILE_NOT_READABLE = 561005;
44
    const ERROR_STRING_PARSING_FAILED = 561006;
45
46
    const DELIMITER_AUTO = 'auto';
47
48
    const HEADERS_LEFT = 'hleft';
49
    const HEADERS_TOP = 'htop';
50
    const HEADERS_NONE = 'hnone';
51
    
52
    public function __construct()
53
    {
54
        
55
    }
56
57
   /**
58
    * Creates and returns a new instance of the CSV builder which
59
    * can be used to build CSV from scratch.
60
    * 
61
    * @return CSVHelper_Builder
62
    */
63
    public static function createBuilder()
64
    {
65
        return new CSVHelper_Builder();
66
    }
67
68
   /**
69
    * @var string
70
    */
71
    protected $csv = '';
72
    
73
    protected $data = array();
74
    
75
    protected $headers = array();
76
    
77
    protected $headersPosition = self::HEADERS_NONE;
78
    
79
   /**
80
    * Loads CSV data from a string. 
81
    * 
82
    * Note: Use the {@link hasErrors()} method to 
83
    * check if the string could be parsed correctly
84
    * afterwards.
85
    * 
86
    * @param string $string
87
    * @return CSVHelper
88
    */
89
    public function loadString($string)
90
    {
91
        // remove any UTF byte order marks that may still be present in the string
92
        $this->csv = ConvertHelper::stripUTFBom($string);
93
94
        // ensure the string is valid UTF8
95
        $this->csv = ConvertHelper::string2utf8($this->csv);
96
        
97
        $this->parse();
98
        
99
        return $this;
100
    }
101
    
102
   /**
103
    * Loads CSV data from a file.
104
    * 
105
    * Note: Use the {@link hasErrors()} method to 
106
    * check if the string could be parsed correctly
107
    * afterwards.
108
    * 
109
    * @param string $file
110
    * @throws FileHelper_Exception
111
    * @return CSVHelper
112
    * 
113
    * @see FileHelper::ERROR_FILE_DOES_NOT_EXIST
114
    * @see FileHelper::ERROR_CANNOT_READ_FILE_CONTENTS
115
    */
116
    public function loadFile(string $file) : CSVHelper
117
    {
118
        $csv = FileHelper::readContents($file);
119
        
120
        return $this->loadString($csv);
121
    }
122
    
123
    protected $errors = array();
124
    
125
    protected $columnCount = 0;
126
    
127
    protected $rowCount = 0;
128
    
129
   /**
130
    * Specifies that headers are positioned on top, horizontally.
131
    * @return CSVHelper
132
    */
133
    public function setHeadersTop()
134
    {
135
        return $this->setHeadersPosition(self::HEADERS_TOP);
136
    }
137
    
138
   /**
139
    * Specifies that headers are positioned on the left, vertically.
140
    * @return CSVHelper
141
    */
142
    public function setHeadersLeft()
143
    {
144
        return $this->setHeadersPosition(self::HEADERS_LEFT);
145
    }
146
    
147
   /**
148
    * Specifies that there are no headers in the file (default).
149
    * @return CSVHelper
150
    */
151
    public function setHeadersNone()
152
    {
153
        return $this->setHeadersPosition(self::HEADERS_NONE);
154
    }
155
    
156
    public function isHeadersLeft()
157
    {
158
        return $this->isHeadersPosition(self::HEADERS_LEFT);
159
    }
160
    
161
    public function isHeadersTop()
162
    {
163
        return $this->isHeadersPosition(self::HEADERS_TOP);
164
    }
165
    
166
    public function isHeadersNone()
167
    {
168
        return $this->isHeadersPosition(self::HEADERS_NONE);
169
    }
170
    
171
    public function isHeadersPosition($position)
172
    {
173
        if($this->headersPosition === $position) {
174
            return true;
175
        }
176
        
177
        return false;
178
    }
179
    
180
   /**
181
    * Specifies where the headers are positioned in the
182
    * CSV, or turns them off entirely. Use the class constants
183
    * to ensure the value is correct.
184
    * 
185
    * @param string $position
186
    * @throws CSVHelper_Exception
187
    * @return CSVHelper
188
    * @see CSVHelper::HEADERS_LEFT
189
    * @see CSVHelper::HEADERS_TOP
190
    * @see CSVHelper::HEADERS_NONE
191
    */
192
    public function setHeadersPosition($position)
193
    {
194
        $validPositions = array(
195
            self::HEADERS_LEFT, 
196
            self::HEADERS_NONE, 
197
            self::HEADERS_TOP
198
        );
199
        
200
        if(!in_array($position, $validPositions)) {
201
            throw new CSVHelper_Exception(
202
                'Invalid headers position',
203
                sprintf(
204
                    'The header position [%s] is invalid. Valid positions are [%s]. '.
205
                    'It is recommended to use the class constants, for example [%s].',
206
                    $position,
207
                    implode(', ', $validPositions),
208
                    'CSVHelper::HEADERS_LEFT'
209
                ),
210
                self::ERROR_INVALID_HEADERS_POSITION
211
            );
212
        }
213
        
214
        $this->headersPosition = $position;
215
        return $this;
216
    }
217
    
218
   /**
219
    * Resets all internal data, allowing to start entirely anew
220
    * with a new file, or to start building a new CSV file from
221
    * scratch.
222
    * 
223
    * @return CSVHelper
224
    */
225
    public function reset()
226
    {
227
        $this->data = array();
228
        $this->headers = array();
229
        $this->errors = array();
230
        $this->columnCount = 0;
231
        $this->rowCount = 0;
232
        
233
        return $this;
234
    }
235
    
236
    public function getData()
237
    {
238
        return $this->data;
239
    }
240
    
241
   /**
242
    * Retrieves the row at the specified index.
243
    * If there is no data at the index, this will
244
    * return an array populated with empty strings
245
    * for all available columns.
246
    * 
247
    * Tip: Use the {@link rowExists()} method to check
248
    * whether the specified row exists.
249
    * 
250
    * @param integer $index
251
    * @return array()
252
    * @see rowExists()
253
    */
254
    public function getRow($index)
255
    {
256
        if(isset($this->data[$index])) {
257
            return $this->data[$index];
258
        }
259
        
260
        return array_fill(0, $this->rowCount, '');
261
    }
262
    
263
   /**
264
    * Checks whether the specified row exists in the data set.
265
    * @param integer $index
266
    * @return boolean
267
    */
268
    public function rowExists($index)
269
    {
270
        return isset($this->data[$index]);
271
    }
272
    
273
   /**
274
    * Counts the amount of rows in the parsed CSV,
275
    * excluding the headers if any, depending on 
276
    * their position.
277
    * 
278
    * @return integer
279
    */
280
    public function countRows()
281
    {
282
        return $this->rowCount;
283
    }
284
    
285
   /**
286
    * Counts the amount of rows in the parsed CSV, 
287
    * excluding the headers if any, depending on
288
    * their position.
289
    * 
290
    * @return integer
291
    */
292
    public function countColumns()
293
    {
294
        return $this->columnCount;
295
    }
296
    
297
   /**
298
    * Retrieves the headers, if any. Specify the position of the
299
    * headers first to ensure this works correctly.
300
    * 
301
    * @return array Indexed array with header names.
302
    */
303
    public function getHeaders()
304
    {
305
        return $this->headers;
306
    }
307
    
308
   /**
309
    * Retrieves the column at the specified index. If there
310
    * is no column at the index, this returns an array
311
    * populated with empty strings.
312
    * 
313
    * Tip: Use the {@link columnExists()} method to check
314
    * whether a column exists.
315
    * 
316
    * @param integer $index
317
    * @return string[]
318
    * @see columnExists()
319
    */
320
    public function getColumn($index)
321
    {
322
        $data = array();
323
        for($i=0; $i < $this->rowCount; $i++) {
324
            $value = '';
325
            if(isset($this->data[$i][$index])) {
326
                $value = $this->data[$i][$index];
327
            }
328
            
329
            $data[] = $value;
330
        }
331
        
332
        return $data;
333
    }
334
    
335
   /**
336
    * Checks whether the specified column exists in the data set.
337
    * @param integer $index
338
    * @return boolean
339
    */
340
    public function columnExists($index)
341
    {
342
        if($index < $this->columnCount) {
343
            return true;
344
        }
345
        
346
        return false;
347
    }
348
    
349
    protected function parse()
350
    {
351
        $this->reset();
352
        
353
        if(empty(trim($this->csv))) {
354
            $this->addError('Tried to parse an empty CSV string.');
355
            return;
356
        }
357
        
358
        // ensure that the last line in the CSV has
359
        // a linebreak afterwards, otherwise the line
360
        // will not be parsed.
361
        $this->csv = rtrim($this->csv).PHP_EOL;
362
        
363
        $parser = self::createParser();
364
365
        if(!$parser->parse($this->csv)) {
366
            $this->addError('The CSV string could not be parsed.');
367
            return;
368
        }
369
370
        $result = $parser->data;
371
372
        switch($this->headersPosition)
373
        {
374
            case self::HEADERS_TOP:
375
                $this->headers = array_shift($result);
376
                break;
377
                
378
            case self::HEADERS_LEFT:
379
                $keep = array();
380
                $total = count($result);
381
                for($i=0; $i < $total; $i++) {
382
                    $row = $result[$i];
383
                    $this->headers[] = array_shift($row);
384
                    $keep[] = $row;
385
                }
386
                
387
                $result = $keep;
388
                break;
389
        }
390
        
391
        $this->data = $result;
392
        $this->rowCount = count($this->data);
393
        
394
        for($i=0; $i < $this->rowCount; $i++) {
395
            $amount = count($this->data[$i]);
396
            if($amount > $this->columnCount) {
397
                $this->columnCount = $amount;
398
            }
399
        }
400
    }
401
    
402
   /**
403
    * Checks whether any errors have been encountered
404
    * while parsing the CSV.
405
    * 
406
    * @return boolean
407
    * @see getErrorMessages()
408
    */
409
    public function hasErrors()
410
    {
411
        return !empty($this->errors);
412
    }
413
    
414
   /**
415
    * Retrieves all error messages.
416
    * @return array
417
    */
418
    public function getErrorMessages()
419
    {
420
        return $this->errors;
421
    }
422
    
423
    protected function addError($error)
424
    {
425
        $this->errors[] = $error;
426
    }
427
    
428
    protected $separator = ';';
429
    
430
    protected function detectSeparator()
431
    {
432
        $search = array(
433
            "\"\t\"" => "\t",
434
            '";"' => ';',
435
            '","' => ',',
436
            ';;' => ';',
437
            ',,' => ','
438
        );
439
        
440
        foreach($search as $char => $separator) {
441
            if(strstr($this->csv, $char)) {
442
                return $separator;
443
            }
444
        }
445
        
446
        return $this->separator;
447
    }
448
449
    /**
450
     * Creates a new CSV parser instance.
451
     *
452
     * @param string $delimiter
453
     * @return Csv
454
     */
455
    public static function createParser(string $delimiter=self::DELIMITER_AUTO) : Csv
456
    {
457
        $csv = new Csv();
458
459
        if($delimiter !== self::DELIMITER_AUTO) {
460
            $csv->delimiter = $delimiter;
461
        }
462
463
        return $csv;
464
    }
465
466
    /**
467
     * Parses a CSV file in automatic mode (to detect the delimiter and
468
     * enclosure), and returns the data rows, including the header row
469
     * if any.
470
     *
471
     * @param string $path
472
     * @return array
473
     * @throws CSVHelper_Exception|FileHelper_Exception
474
     *
475
     * @see CSVHelper::ERROR_CSV_FILE_NOT_READABLE
476
     * @see CSVHelper::ERROR_FILE_PARSING_FAILED
477
     */
478
    public static function parseFile(string $path) : array
479
    {
480
        $path = FileHelper::requireFileReadable($path, self::ERROR_CSV_FILE_NOT_READABLE);
481
482
        $parser = self::createParser();
483
        $result = $parser->parse($path);
484
485
        if($result === true) {
486
            return $parser->data;
487
        }
488
489
        throw new CSVHelper_Exception(
490
            'Cannot parse CSV file',
491
            sprintf(
492
                'The file [%s] could not be parsed. No additional information is available.',
493
                $path
494
            ),
495
            self::ERROR_FILE_PARSING_FAILED
496
        );
497
    }
498
499
    /**
500
     * Parses a CSV string in automatic mode (to detect the delimiter and
501
     * enclosure), and returns the data rows, including the header row
502
     * if any.
503
     *
504
     * @param string $string
505
     * @return array
506
     * @throws CSVHelper_Exception
507
     *
508
     * @see CSVHelper::ERROR_STRING_PARSING_FAILED
509
     */
510
    public static function parseString(string $string) : array
511
    {
512
        $parser = self::createParser();
513
        $result = $parser->parse($string);
514
515
        if($result === true) {
516
            return $parser->data;
517
        }
518
519
        throw new CSVHelper_Exception(
520
            'Cannot parse CSV string',
521
            'The string could not be parsed. No additional information is available.',
522
            self::ERROR_STRING_PARSING_FAILED
523
        );
524
    }
525
}
526