Completed
Push — master ( 63a6cf...ea30a2 )
by ignace nyamagana
12:10
created

EmptyEscapeParser::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 0
dl 0
loc 3
ccs 0
cts 0
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Csv\Polyfill;
15
16
use Generator;
17
use League\Csv\Stream;
18
use SplFileObject;
19
use TypeError;
20
use function explode;
21
use function get_class;
22
use function gettype;
23
use function in_array;
24
use function is_object;
25
use function ltrim;
26
use function rtrim;
27
use function sprintf;
28
use function str_replace;
29
use function substr;
30
31
/**
32
 * A Polyfill to PHP's SplFileObject to enable parsing the CSV document
33
 * without taking into account the escape character.
34
 *
35
 * @see https://php.net/manual/en/function.fgetcsv.php
36
 * @see https://php.net/manual/en/function.fgets.php
37
 * @see https://tools.ietf.org/html/rfc4180
38
 * @see http://edoceo.com/utilitas/csv-file-format
39
 *
40
 * @internal used internally to parse a CSV document without using the escape character
41
 */
42
final class EmptyEscapeParser
43
{
44
    /**
45
     * @internal
46
     */
47
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
48
49
    /**
50
     * @var SplFileObject|Stream
51
     */
52
    private static $document;
53
54
    /**
55
     * @var string
56
     */
57
    private static $delimiter;
58
59
    /**
60
     * @var string
61
     */
62
    private static $enclosure;
63
64
    /**
65
     * @var string
66
     */
67
    private static $trim_mask;
68
69
    /**
70
     * @var string|bool
71
     */
72
    private static $line;
73
74
    /**
75
     * @codeCoverageIgnore
76
     */
77
    private function __construct()
78
    {
79
    }
80
81
    /**
82
     * Converts the document into a CSV record iterator.
83
     *
84
     * In PHP7.4+ you'll be able to do
85
     *
86
     * <code>
87
     * $file = new SplFileObject('/path/to/file.csv', 'r');
88
     * $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
89
     * $file->setCsvControl($delimiter, $enclosure, '');
90
     * foreach ($file as $record) {
91
     *    //$record escape mechanism is blocked by the empty string
92
     * }
93
     * </code>
94
     *
95
     * In PHP7.3- you can do
96
     *
97
     * <code>
98
     * $file = new SplFileObject('/path/to/file.csv', 'r');
99
     * $it = EmptyEscapeParser::parse($file); //parsing will be done while ignoring the escape character value.
100
     * foreach ($it as $record) {
101
     *    //fgetcsv is not directly use hence the escape char is not taken into account
102
     * }
103
     * </code>
104
     *
105
     * Each record array contains strings elements.
106
     *
107 42
     * @param SplFileObject|Stream $document
108
     *
109 42
     * @return Generator|array[]
110 39
     */
111 39
    public static function parse($document): Generator
112 39
    {
113 39
        self::$document = self::filterDocument($document);
114 39
        list(self::$delimiter, self::$enclosure, ) = self::$document->getCsvControl();
115 39
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
116 39
        self::$document->setFlags(0);
117 36
        self::$document->rewind();
118
        while (self::$document->valid()) {
119
            $record = self::extractRecord();
120 39
            if (!in_array(null, $record, true)) {
121
                yield $record;
122
            }
123
        }
124
    }
125
126
    /**
127
     * Filters the submitted document.
128
     *
129 6
     * @param SplFileObject|Stream $document
130
     *
131 6
     * @return SplFileObject|Stream
132 3
     */
133
    private static function filterDocument($document)
134
    {
135 3
        if ($document instanceof Stream || $document instanceof SplFileObject) {
136 3
            return $document;
137 3
        }
138 3
139 3
        throw new TypeError(sprintf(
140
            '%s::parse expects parameter 1 to be a %s or a SplFileObject object, %s given',
141
            self::class,
142
            Stream::class,
143
            is_object($document) ? get_class($document) : gettype($document)
144
        ));
145
    }
146 39
147
    /**
148 39
     * Extracts a record form the CSV document.
149 39
     */
150
    private static function extractRecord(): array
151 39
    {
152 39
        $record = [];
153 39
        self::$line = self::$document->fgets();
154 30
        do {
155 30
            $method = 'extractFieldContent';
156
            $buffer = ltrim(self::$line, self::$trim_mask);
157
            if (($buffer[0] ?? '') === self::$enclosure) {
158 39
                $method = 'extractEnclosedFieldContent';
159 39
                self::$line = $buffer;
160
            }
161 39
162
            $record[] = self::$method();
163
        } while (false !== self::$line);
164
165
        return $record;
166
    }
167
168
    /**
169
     * Extracts the content from a field without enclosure.
170
     *
171
     * - Field content can not spread on multiple document lines.
172
     * - Content must be preserved.
173 39
     * - Trailing line-breaks must be removed.
174
     *
175 39
     * @return string|null
176 3
     */
177
    private static function extractFieldContent()
178 3
    {
179
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
180
            self::$line = false;
181 36
182 36
            return null;
183 24
        }
184
185
        list($content, self::$line) = explode(self::$delimiter, self::$line, 2) + [1 => false];
186 36
        if (false === self::$line) {
187
            return rtrim($content, "\r\n");
188
        }
189
190
        return $content;
191
    }
192
193
    /**
194
     * Extracts the content from a field with enclosure.
195
     *
196
     * - Field content can spread on multiple document lines.
197
     * - Content between consecutive enclosure characters must be preserved.
198
     * - Double enclosure sequence must be replaced by single enclosure character.
199
     * - Trailing line break must be removed if they are not part of the field content.
200 30
     * - Invalid field content is treated as per fgetcsv behavior.
201
     *
202 30
     * @return string|null
203 30
     */
204
    private static function extractEnclosedFieldContent()
205
    {
206 30
        if ((self::$line[0] ?? '') === self::$enclosure) {
207 30
            self::$line = substr(self::$line, 1);
208 30
        }
209 30
210 30
        $content = '';
211 30
        while (false !== self::$line) {
212 21
            list($buffer, $remainder) = explode(self::$enclosure, self::$line, 2) + [1 => false];
213
            $content .= $buffer;
214
            self::$line = $remainder;
215 15
            if (false !== self::$line) {
216 11
                break;
217 11
            }
218
219
            if (self::$document->valid()) {
220 9
                self::$line = self::$document->fgets();
221 3
                continue;
222
            }
223
224
            if ($buffer === rtrim($content, "\r\n")) {
225 27
                return null;
226 15
            }
227 15
        }
228 12
229
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
230
            self::$line = false;
231 9
            if (!self::$document->valid()) {
232
                return $content;
233
            }
234 21
235 21
            return rtrim($content, "\r\n");
236 15
        }
237
238 15
        $char = self::$line[0] ?? '';
239
        if ($char === self::$delimiter) {
240
            self::$line = substr(self::$line, 1);
241 18
242 9
            return $content;
243
        }
244
245 9
        if ($char === self::$enclosure) {
246
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
247
        }
248
249
        return $content.self::extractFieldContent();
250
    }
251
}
252