Completed
Push — master ( c23027...27a0dd )
by ignace nyamagana
02:26
created

EmptyEscapeParser::extractRecord()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 2
nop 0
dl 0
loc 17
ccs 11
cts 11
cp 1
crap 3
rs 9.7
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv\Polyfill;
18
19
use Generator;
20
use League\Csv\Stream;
21
use SplFileObject;
22
use TypeError;
23
use function explode;
24
use function get_class;
25
use function gettype;
26
use function in_array;
27
use function is_object;
28
use function ltrim;
29
use function rtrim;
30
use function sprintf;
31
use function str_replace;
32
use function substr;
33
34
/**
35
 * A Polyfill to PHP's SplFileObject behavior when reading a CSV document
36
 * with the SplFileObject::READ_CSV and SplFileObject::SKIP_EMPTY flags on
37
 * and the empty string as the escape parameter.
38
 *
39
 * <code>
40
 * $file = new SplFileObject('/path/to/file.csv', 'r');
41
 * $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
42
 * $file->setCsvControl($delimiter, $enclosure, ''); //this does not currently in any PHP stable release
43
 * </code>
44
 *
45
 * instead you can do this
46
 *
47
 * <code>
48
 * $file = new SplFileObject('/path/to/file.csv', 'r');
49
 * $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
50
 * $file->setCsvControl($delimiter, $enclosure, $escape);
51
 * EmptyEscapeParser::parse($file); //parsing will be done while ignoring the escape character value.
52
 * </code>
53
 *
54
 * @see https://php.net/manual/en/function.fgetcsv.php
55
 * @see https://php.net/manual/en/function.fgets.php
56
 * @see https://tools.ietf.org/html/rfc4180
57
 * @see http://edoceo.com/utilitas/csv-file-format
58
 *
59
 * @internal used internally to parse a CSV document without using the escape character
60
 */
61
final class EmptyEscapeParser
62
{
63
    /**
64
     * @internal
65
     */
66
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
67
68
    /**
69
     * @var SplFileObject|Stream
70
     */
71
    private static $document;
72
73
    /**
74
     * @var string
75
     */
76
    private static $delimiter;
77
78
    /**
79
     * @var string
80
     */
81
    private static $enclosure;
82
83
    /**
84
     * @var string
85
     */
86
    private static $trim_mask;
87
88
    /**
89
     * @var string|bool
90
     */
91
    private static $line;
92
93
    /**
94
     * Converts the document into a CSV record iterator.
95
     *
96
     * Each record array contains strings elements.
97
     *
98
     * @param SplFileObject|Stream $document
99
     *
100
     * @return Generator|array[]
101
     */
102 36
    public static function parse($document): Generator
103
    {
104 36
        self::$document = self::filterDocument($document);
105 33
        list(self::$delimiter, self::$enclosure, ) = self::$document->getCsvControl();
106 33
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
107 33
        self::$document->setFlags(0);
108 33
        self::$document->rewind();
109 33
        while (self::$document->valid()) {
110 33
            $record = self::extractRecord();
111 33
            if ([null] !== $record) {
112 33
                yield $record;
113
            }
114
        }
115 33
    }
116
117
    /**
118
     * Filters the submitted document.
119
     *
120
     * @param SplFileObject|Stream $document
121
     *
122
     * @return SplFileObject|Stream
123
     */
124 9
    private static function filterDocument($document)
125
    {
126 9
        if ($document instanceof Stream || $document instanceof SplFileObject) {
127 6
            return $document;
128
        }
129
130 3
        throw new TypeError(sprintf(
131 3
            'Expected a %s or an SplFileObject object, %s given',
132 3
            Stream::class,
133 3
            is_object($document) ? get_class($document) : gettype($document)
134
        ));
135
    }
136
137
    /**
138
     * Extracts a record form the CSV document.
139
     */
140 3
    private static function extractRecord(): array
141
    {
142 3
        $record = [];
143 3
        self::$line = self::$document->fgets();
144
        do {
145 3
            $method = 'extractFieldContent';
146 3
            $buffer = ltrim(self::$line, self::$trim_mask);
147 3
            if (($buffer[0] ?? '') === self::$enclosure) {
148 3
                $method = 'extractEnclosedFieldContent';
149 3
                self::$line = $buffer;
150
            }
151
152 3
            $record[] = self::$method();
153 3
        } while (false !== self::$line);
154
155 3
        return $record;
156
    }
157
158
    /**
159
     * Extracts the content from a field without enclosure.
160
     *
161
     * - Leading and trailing whitespaces must be removed.
162
     * - trailing line-breaks must be removed.
163
     *
164
     * @return null|string
165
     */
166 33
    private static function extractFieldContent()
167
    {
168 33
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
169 3
            self::$line = false;
170
171 3
            return null;
172
        }
173
174 30
        list($content, self::$line) = explode(self::$delimiter, self::$line, 2) + [1 => false];
175 30
        if (false === self::$line) {
176 24
            return rtrim($content, "\r\n");
177
        }
178
179 30
        return $content;
180
    }
181
182
    /**
183
     * Extracts the content from a field with enclosure.
184
     *
185
     * - Field content can spread on multiple document lines.
186
     * - Content inside enclosure must be preserved.
187
     * - Double enclosure sequence must be replaced by single enclosure character.
188
     * - Trailing line break must be removed if they are not part of the field content.
189
     * - Invalid field content are treated as per fgetcsv behavior.
190
     */
191 24
    private static function extractEnclosedFieldContent(): string
192
    {
193 24
        if ((self::$line[0] ?? '') === self::$enclosure) {
194 24
            self::$line = substr(self::$line, 1);
195
        }
196
197 24
        $content = '';
198 24
        while (false !== self::$line) {
199 24
            list($buffer, $remainder) = explode(self::$enclosure, self::$line, 2) + [1 => false];
200 24
            $content .= $buffer;
201 24
            if (false !== $remainder) {
202 21
                self::$line = $remainder;
203 21
                break;
204
            }
205 9
            self::$line = self::$document->fgets();
206
        }
207
208 24
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
209 12
            self::$line = false;
210
211 12
            return rtrim($content, "\r\n");
212
        }
213
214 21
        $char = self::$line[0] ?? '';
215 21
        if (self::$delimiter === $char) {
216 15
            self::$line = substr(self::$line, 1);
217
218 15
            return $content;
219
        }
220
221 18
        if (self::$enclosure === $char) {
222 9
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
223
        }
224
225 9
        return $content.self::extractFieldContent();
226
    }
227
}
228