Completed
Push — master ( 1751a4...082053 )
by ignace nyamagana
02:55 queued 01:46
created

EmptyEscapeParser::extractEnclosedFieldContent()   C

Complexity

Conditions 11
Paths 30

Size

Total Lines 51

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 27
CRAP Score 11.0055

Importance

Changes 0
Metric Value
cc 11
nc 30
nop 0
dl 0
loc 51
ccs 27
cts 28
cp 0.9643
crap 11.0055
rs 6.9224
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv\Polyfill;
18
19
use Generator;
20
use League\Csv\Stream;
21
use SplFileObject;
22
use TypeError;
23
use function explode;
24
use function get_class;
25
use function gettype;
26
use function in_array;
27
use function is_object;
28
use function ltrim;
29
use function rtrim;
30
use function sprintf;
31
use function str_replace;
32
use function substr;
33
34
/**
35
 * A Polyfill to PHP's SplFileObject to enable parsing the CSV document
36
 * without taking into account the escape character.
37
 *
38
 * @see https://php.net/manual/en/function.fgetcsv.php
39
 * @see https://php.net/manual/en/function.fgets.php
40
 * @see https://tools.ietf.org/html/rfc4180
41
 * @see http://edoceo.com/utilitas/csv-file-format
42
 *
43
 * @internal used internally to parse a CSV document without using the escape character
44
 */
45
final class EmptyEscapeParser
46
{
47
    /**
48
     * @internal
49
     */
50
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
51
52
    /**
53
     * @var SplFileObject|Stream
54
     */
55
    private static $document;
56
57
    /**
58
     * @var string
59
     */
60
    private static $delimiter;
61
62
    /**
63
     * @var string
64
     */
65
    private static $enclosure;
66
67
    /**
68
     * @var string
69
     */
70
    private static $trim_mask;
71
72
    /**
73
     * @var string|bool
74
     */
75
    private static $line;
76
77
    /**
78
     * Converts the document into a CSV record iterator.
79
     *
80
     * In PH7.4+ you'll be able to do
81
     *
82
     * <code>
83
     * $file = new SplFileObject('/path/to/file.csv', 'r');
84
     * $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
85
     * $file->setCsvControl($delimiter, $enclosure, '');
86
     * foreach ($file as $record) {
87
     *    //$record escape mechanism is blocked by the empty string
88
     * }
89
     * </code>
90
     *
91
     * In PHP7.3- you can do
92
     *
93
     * <code>
94
     * $file = new SplFileObject('/path/to/file.csv', 'r');
95
     * $it = EmptyEscapeParser::parse($file); //parsing will be done while ignoring the escape character value.
96
     * foreach ($it as $record) {
97
     *    //fgetcsv is not directly use hence the escape char is not taken into account
98
     * }
99
     * </code>
100
     *
101
     * Each record array contains strings elements.
102
     *
103
     * @param SplFileObject|Stream $document
104
     *
105
     * @return Generator|array[]
106
     */
107 42
    public static function parse($document): Generator
108
    {
109 42
        self::$document = self::filterDocument($document);
110 39
        list(self::$delimiter, self::$enclosure, ) = self::$document->getCsvControl();
111 39
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
112 39
        self::$document->setFlags(0);
113 39
        self::$document->rewind();
114 39
        while (self::$document->valid()) {
115 39
            $record = self::extractRecord();
116 39
            if (!in_array(null, $record, true)) {
117 36
                yield $record;
118
            }
119
        }
120 39
    }
121
122
    /**
123
     * Filters the submitted document.
124
     *
125
     * @param SplFileObject|Stream $document
126
     *
127
     * @return SplFileObject|Stream
128
     */
129 6
    private static function filterDocument($document)
130
    {
131 6
        if ($document instanceof Stream || $document instanceof SplFileObject) {
132 3
            return $document;
133
        }
134
135 3
        throw new TypeError(sprintf(
136 3
            '%s::parse expects parameter 1 to be a %s or a SplFileObject object, %s given',
137 3
            static::class,
138 3
            Stream::class,
139 3
            is_object($document) ? get_class($document) : gettype($document)
140
        ));
141
    }
142
143
    /**
144
     * Extracts a record form the CSV document.
145
     */
146 39
    private static function extractRecord(): array
147
    {
148 39
        $record = [];
149 39
        self::$line = self::$document->fgets();
150
        do {
151 39
            $method = 'extractFieldContent';
152 39
            $buffer = ltrim(self::$line, self::$trim_mask);
153 39
            if (($buffer[0] ?? '') === self::$enclosure) {
154 30
                $method = 'extractEnclosedFieldContent';
155 30
                self::$line = $buffer;
156
            }
157
158 39
            $record[] = self::$method();
159 39
        } while (false !== self::$line);
160
161 39
        return $record;
162
    }
163
164
    /**
165
     * Extracts the content from a field without enclosure.
166
     *
167
     * - Field content can not spread on multiple document lines.
168
     * - Content must be preserved.
169
     * - Trailing line-breaks must be removed.
170
     *
171
     * @return string|null
172
     */
173 39
    private static function extractFieldContent()
174
    {
175 39
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
176 3
            self::$line = false;
177
178 3
            return null;
179
        }
180
181 36
        list($content, self::$line) = explode(self::$delimiter, self::$line, 2) + [1 => false];
182 36
        if (false === self::$line) {
183 27
            return rtrim($content, "\r\n");
184
        }
185
186 36
        return $content;
187
    }
188
189
    /**
190
     * Extracts the content from a field with enclosure.
191
     *
192
     * - Field content can spread on multiple document lines.
193
     * - Content between consecutive enclosure characters must be preserved.
194
     * - Double enclosure sequence must be replaced by single enclosure character.
195
     * - Trailing line break must be removed if they are not part of the field content.
196
     * - Invalid field content is treated as per fgetcsv behavior.
197
     *
198
     * @return string|null
199
     */
200 30
    private static function extractEnclosedFieldContent()
201
    {
202 30
        if ((self::$line[0] ?? '') === self::$enclosure) {
203 30
            self::$line = substr(self::$line, 1);
204
        }
205
206 30
        $content = '';
207 30
        while (false !== self::$line) {
208 30
            list($buffer, $remainder) = explode(self::$enclosure, self::$line, 2) + [1 => false];
209 30
            $content .= $buffer;
210
211 30
            if (false !== $remainder) {
212 21
                self::$line = $remainder;
213 21
                break;
214
            }
215
216 15
            if (!self::$document->valid() && $content === $buffer) {
217 4
                if ($content !== rtrim($content, "\r\n")) {
218 1
                    self::$line = false;
219 1
                    if (PHP_VERSION_ID < 70200) {
220
                        return rtrim($content, "\r\n");
221
                    }
222
223 1
                    return $content;
224
                }
225
226 3
                return null;
227
            }
228
229 11
            self::$line = self::$document->fgets();
230
        }
231
232 26
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
233 14
            self::$line = false;
234
235 14
            return rtrim($content, "\r\n");
236
        }
237
238 21
        $char = self::$line[0] ?? '';
239 21
        if (self::$delimiter === $char) {
240 15
            self::$line = substr(self::$line, 1);
241
242 15
            return $content;
243
        }
244
245 18
        if (self::$enclosure === $char) {
246 9
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
247
        }
248
249 9
        return $content.self::extractFieldContent();
250
    }
251
}
252