Completed
Pull Request — master (#342)
by ignace nyamagana
14:03
created

EmptyEscapeParser::extractEnclosedFieldContent()   C

Complexity

Conditions 12
Paths 32

Size

Total Lines 49

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 24
CRAP Score 12

Importance

Changes 0
Metric Value
cc 12
nc 32
nop 0
dl 0
loc 49
ccs 24
cts 24
cp 1
crap 12
rs 6.9666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com)
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Csv\Polyfill;
15
16
use Generator;
17
use League\Csv\Stream;
18
use SplFileObject;
19
use TypeError;
20
use function explode;
21
use function get_class;
22
use function gettype;
23
use function in_array;
24
use function is_object;
25
use function ltrim;
26
use function rtrim;
27
use function sprintf;
28
use function str_replace;
29
use function substr;
30
31
/**
32
 * A Polyfill to PHP's SplFileObject to enable parsing the CSV document
33
 * without taking into account the escape character.
34
 *
35
 * @see https://php.net/manual/en/function.fgetcsv.php
36
 * @see https://php.net/manual/en/function.fgets.php
37
 * @see https://tools.ietf.org/html/rfc4180
38
 * @see http://edoceo.com/utilitas/csv-file-format
39
 *
40
 * @internal used internally to parse a CSV document without using the escape character
41
 */
42
final class EmptyEscapeParser
43
{
44
    /**
45
     * @internal
46
     */
47
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
48
49
    /**
50
     * @var SplFileObject|Stream
51
     */
52
    private static $document;
53
54
    /**
55
     * @var string
56
     */
57
    private static $delimiter;
58
59
    /**
60
     * @var string
61
     */
62
    private static $enclosure;
63
64
    /**
65
     * @var string
66
     */
67
    private static $trim_mask;
68
69
    /**
70
     * @var mixed
71
     */
72
    private static $line;
73
74
    /**
75
     * @codeCoverageIgnore
76
     */
77
    private function __construct()
78
    {
79
    }
80
81
    /**
82
     * Converts the document into a CSV record iterator.
83
     *
84
     * In PHP7.4+ you'll be able to do
85
     *
86
     * <code>
87
     * $file = new SplFileObject('/path/to/file.csv', 'r');
88
     * $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
89
     * $file->setCsvControl($delimiter, $enclosure, '');
90
     * foreach ($file as $record) {
91
     *    //$record escape mechanism is blocked by the empty string
92
     * }
93
     * </code>
94
     *
95
     * In PHP7.3- you can do
96
     *
97
     * <code>
98
     * $file = new SplFileObject('/path/to/file.csv', 'r');
99
     * $it = EmptyEscapeParser::parse($file); //parsing will be done while ignoring the escape character value.
100
     * foreach ($it as $record) {
101
     *    //fgetcsv is not directly use hence the escape char is not taken into account
102
     * }
103
     * </code>
104
     *
105
     * Each record array contains strings elements.
106
     *
107
     * @param SplFileObject|Stream $document
108
     *
109
     * @return Generator|array[]
110
     */
111 42
    public static function parse($document): Generator
112
    {
113 42
        self::$document = self::filterDocument($document);
114 39
        [self::$delimiter, self::$enclosure, ] = self::$document->getCsvControl();
115 39
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
116 39
        self::$document->setFlags(0);
117 39
        self::$document->rewind();
118 39
        while (self::$document->valid()) {
119 39
            $record = self::extractRecord();
120 39
            if (!in_array(null, $record, true)) {
121 36
                yield $record;
122
            }
123
        }
124 39
    }
125
126
    /**
127
     * Filters the submitted document.
128
     *
129
     * @param mixed $document the submitted object
130
     *
131
     * @return SplFileObject|Stream
132
     */
133 6
    private static function filterDocument($document)
134
    {
135 6
        if ($document instanceof Stream || $document instanceof SplFileObject) {
136 3
            return $document;
137
        }
138
139 3
        throw new TypeError(sprintf(
140 3
            '%s::parse expects parameter 1 to be a %s or a SplFileObject object, %s given',
141 3
            self::class,
142 3
            Stream::class,
143 3
            is_object($document) ? get_class($document) : gettype($document)
144
        ));
145
    }
146
147
    /**
148
     * Extracts a record form the CSV document.
149
     */
150 39
    private static function extractRecord(): array
151
    {
152 39
        $record = [];
153 39
        self::$line = self::$document->fgets();
154
155 39
        do {
156 39
            $record[] = self::addFieldContent();
157 39
        } while (false !== self::$line);
158 30
159 30
        return $record;
160
    }
161
162 39
    /**
163 39
     * Extract a field from the record.
164
     */
165 39
    private static function addFieldContent(): ?string
166
    {
167
        $buffer = ltrim(self::$line, self::$trim_mask);
168
        if (($buffer[0] ?? '') === self::$enclosure) {
169
            self::$line = $buffer;
170
            return self::extractEnclosedFieldContent();
171
        }
172
173
        return self::extractFieldContent();
174
    }
175
176
    /**
177 39
     * Extracts the content from a field without enclosure.
178
     *
179 39
     * - Field content can not spread on multiple document lines.
180 3
     * - Content must be preserved.
181
     * - Trailing line-breaks must be removed.
182 3
     */
183
    private static function extractFieldContent(): ?string
184
    {
185 36
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
186 36
            self::$line = false;
187 24
188
            return null;
189
        }
190 36
191
        [$content, self::$line] = explode(self::$delimiter, self::$line, 2) + [1 => false];
0 ignored issues
show
Bug introduced by
The variable $content does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
192
        if (false === self::$line) {
193
            return rtrim($content, "\r\n");
194
        }
195
196
        return $content;
197
    }
198
199
    /**
200
     * Extracts the content from a field with enclosure.
201
     *
202
     * - Field content can spread on multiple document lines.
203
     * - Content between consecutive enclosure characters must be preserved.
204 30
     * - Double enclosure sequence must be replaced by single enclosure character.
205
     * - Trailing line break must be removed if they are not part of the field content.
206 30
     * - Invalid field content is treated as per fgetcsv behavior.
207 30
     */
208
    private static function extractEnclosedFieldContent(): ?string
209
    {
210 30
        self::$line = self::$line ?? '';
211 30
        if (false !== self::$line && '' !== self::$line && self::$line[0] === self::$enclosure) {
212 30
            self::$line = substr(self::$line, 1);
213 30
        }
214 30
215 30
        $content = '';
216 21
        while (false !== self::$line) {
217
            $res = explode(self::$enclosure, self::$line, 2);
218
            [$buffer, $remainder] = $res + [1 => false];
0 ignored issues
show
Bug introduced by
The variable $buffer does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $remainder does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
219 15
            $content .= $buffer;
220 10
            self::$line = $remainder;
221 10
            if (false !== self::$line) {
222
                break;
223
            }
224 9
225 3
            if (self::$document->valid()) {
226
                self::$line = self::$document->fgets();
227
                continue;
228
            }
229 27
230 15
            if ($buffer === rtrim($content, "\r\n")) {
231 15
                return null;
232 12
            }
233
        }
234
235 9
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
236
            self::$line = false;
237
            if (!self::$document->valid()) {
238 21
                return $content;
239 21
            }
240 15
241
            return rtrim($content, "\r\n");
242 15
        }
243
244
        $char = self::$line[0] ?? '';
245 18
        if ($char === self::$delimiter) {
246 9
            self::$line = substr(self::$line, 1);
247
248
            return $content;
249 9
        }
250
251
        if ($char === self::$enclosure) {
252
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
253
        }
254
255
        return $content.self::extractFieldContent();
256
    }
257
}
258