Completed
Pull Request — master (#376)
by ignace nyamagana
11:37 queued 10:22
created

EmptyEscapeParser   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 220
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Test Coverage

Coverage 92.86%

Importance

Changes 0
Metric Value
dl 0
loc 220
ccs 65
cts 70
cp 0.9286
rs 10
c 0
b 0
f 0
wmc 27
lcom 1
cbo 0

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A parse() 0 14 4
A filterDocument() 0 13 3
A extractRecord() 0 21 5
A extractFieldContent() 0 21 3
B extractEnclosedFieldContent() 0 49 11
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com)
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Csv\Polyfill;
15
16
use Generator;
17
use League\Csv\Stream;
18
use SplFileObject;
19
use TypeError;
20
use function explode;
21
use function get_class;
22
use function in_array;
23
use function ltrim;
24
use function rtrim;
25
use function sprintf;
26
use function str_replace;
27
use function substr;
28
29
/**
30
 * A Polyfill to PHP's SplFileObject to enable parsing the CSV document
31
 * without taking into account the escape character.
32
 *
33
 * @see https://php.net/manual/en/function.fgetcsv.php
34
 * @see https://php.net/manual/en/function.fgets.php
35
 * @see https://tools.ietf.org/html/rfc4180
36
 * @see http://edoceo.com/utilitas/csv-file-format
37
 *
38
 * @internal used internally to parse a CSV document without using the escape character
39
 */
40
final class EmptyEscapeParser
41
{
42
    /**
43
     * @internal
44
     */
45
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
46
47
    /**
48
     * @var SplFileObject|Stream
49
     */
50
    private static $document;
51
52
    /**
53
     * @var string
54
     */
55
    private static $delimiter;
56
57
    /**
58
     * @var string
59
     */
60
    private static $enclosure;
61
62
    /**
63
     * @var string
64
     */
65
    private static $trim_mask;
66
67
    /**
68
     * @var string|false
69
     */
70
    private static $line;
71
72
    /**
73
     * @codeCoverageIgnore
74
     */
75
    private function __construct()
76
    {
77
    }
78
79
    /**
80
     * Converts the document into a CSV record iterator.
81
     *
82
     * In PHP7.4+ you'll be able to do
83
     *
84
     * <code>
85
     * $file = new SplFileObject('/path/to/file.csv', 'r');
86
     * $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
87
     * $file->setCsvControl($delimiter, $enclosure, '');
88
     * foreach ($file as $record) {
89
     *    //$record escape mechanism is blocked by the empty string
90
     * }
91
     * </code>
92
     *
93
     * In PHP7.3- you can do
94
     *
95
     * <code>
96
     * $file = new SplFileObject('/path/to/file.csv', 'r');
97
     * $it = EmptyEscapeParser::parse($file); //parsing will be done while ignoring the escape character value.
98
     * foreach ($it as $record) {
99
     *    //fgetcsv is not directly use hence the escape char is not taken into account
100
     * }
101
     * </code>
102
     *
103
     * Each record array contains strings elements.
104
     *
105
     * @param SplFileObject|Stream $document
106
     *
107
     * @return Generator|array[]
108
     */
109 45
    public static function parse($document): Generator
110
    {
111 45
        self::$document = self::filterDocument($document);
0 ignored issues
show
Documentation introduced by
$document is of type object<SplFileObject>|object<League\Csv\Stream>, but the function expects a object<League\Csv\Polyfill\object>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
112 42
        list(self::$delimiter, self::$enclosure, ) = self::$document->getCsvControl();
113 42
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
114 42
        self::$document->setFlags(0);
115 42
        self::$document->rewind();
116 42
        while (self::$document->valid()) {
117 42
            $record = self::extractRecord();
118 42
            if ([null] === $record || !in_array(null, $record, true)) {
119 39
                yield $record;
120
            }
121
        }
122 42
    }
123
124
    /**
125
     * Filters the submitted document.
126
     *
127
     * @return SplFileObject|Stream
128
     */
129 3
    private static function filterDocument(object $document)
130
    {
131 3
        if ($document instanceof Stream || $document instanceof SplFileObject) {
132 3
            return $document;
133
        }
134
135
        throw new TypeError(sprintf(
136
            '%s::parse expects parameter 1 to be a %s or a SplFileObject object, %s given',
137
            self::class,
138
            Stream::class,
139
            get_class($document)
140
        ));
141
    }
142
143
    /**
144
     * Extracts a record form the CSV document.
145
     */
146 42
    private static function extractRecord(): array
147
    {
148 42
        $record = [];
149 42
        self::$line = self::$document->fgets();
150
        do {
151 42
            $is_field_enclosed = false;
152 42
            $buffer = '';
153 42
            if (false !== self::$line) {
154 42
                $buffer = ltrim(self::$line, self::$trim_mask);
155
            }
156
157 42
            if (($buffer[0] ?? '') === self::$enclosure) {
158 30
                $is_field_enclosed = true;
159 30
                self::$line = $buffer;
160
            }
161
162 42
            $record[] = $is_field_enclosed ? self::extractEnclosedFieldContent() : self::extractFieldContent();
163 42
        } while (false !== self::$line);
164
165 42
        return $record;
166
    }
167
168
    /**
169
     * Extracts the content from a field without enclosure.
170
     *
171
     * - Field content can not spread on multiple document lines.
172
     * - Content must be preserved.
173
     * - Trailing line-breaks must be removed.
174
     *
175
     * @return string|null
176
     */
177 42
    private static function extractFieldContent()
178
    {
179 42
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
180 6
            self::$line = false;
181
182 6
            return null;
183
        }
184
185
        /** @var array<string> $result */
186 39
        $result = explode(self::$delimiter, self::$line, 2);
187
        /** @var string $content */
188 39
        [$content, $remainder] = $result + [1 => false];
0 ignored issues
show
Bug introduced by
The variable $content does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $remainder does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
189
190
        /* @var string|false line */
191 39
        self::$line = $remainder;
192 39
        if (false === self::$line) {
193 27
            return rtrim($content, "\r\n");
194
        }
195
196 39
        return $content;
197
    }
198
199
    /**
200
     * Extracts the content from a field with enclosure.
201
     *
202
     * - Field content can spread on multiple document lines.
203
     * - Content between consecutive enclosure characters must be preserved.
204
     * - Double enclosure sequence must be replaced by single enclosure character.
205
     * - Trailing line break must be removed if they are not part of the field content.
206
     * - Invalid field content is treated as per fgetcsv behavior.
207
     *
208
     * @return string|null
209
     */
210 30
    private static function extractEnclosedFieldContent()
211
    {
212 30
        if (false !== self::$line && self::$line[0] === self::$enclosure) {
213 30
            self::$line = substr(self::$line, 1);
214
        }
215
216 30
        $content = '';
217 30
        while (false !== self::$line) {
218
            /** @var array $result */
219 30
            $result = explode(self::$enclosure, self::$line, 2);
220 30
            [$buffer, $remainder] = $result + [1 => false];
0 ignored issues
show
Bug introduced by
The variable $buffer does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $remainder does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
221 30
            $content .= $buffer;
222 30
            self::$line = $remainder;
223 30
            if (false !== self::$line) {
224 21
                break;
225
            }
226
227 15
            if (self::$document->valid()) {
228 9
                self::$line = self::$document->fgets();
229 9
                continue;
230
            }
231
232 9
            if ($buffer === rtrim($content, "\r\n")) {
233 3
                return null;
234
            }
235
        }
236
237 27
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
238 15
            self::$line = false;
239 15
            if (!self::$document->valid()) {
240 12
                return $content;
241
            }
242
243 9
            return rtrim($content, "\r\n");
244
        }
245
246 21
        $char = self::$line[0] ?? '';
247 21
        if ($char === self::$delimiter) {
248 15
            self::$line = substr(self::$line, 1);
249
250 15
            return $content;
251
        }
252
253 18
        if ($char === self::$enclosure) {
254 9
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
255
        }
256
257 9
        return $content.self::extractFieldContent();
258
    }
259
}
260