Completed
Push — master ( e48390...c23027 )
by ignace nyamagana
03:49 queued 02:40
created

EmptyEscapeParser::parse()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
nc 5
nop 1
dl 0
loc 26
ccs 19
cts 19
cp 1
crap 5
rs 9.1928
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv\Polyfill;
18
19
use Generator;
20
use League\Csv\Stream;
21
use SplFileObject;
22
use TypeError;
23
use function explode;
24
use function get_class;
25
use function gettype;
26
use function in_array;
27
use function is_object;
28
use function ltrim;
29
use function rtrim;
30
use function sprintf;
31
use function str_replace;
32
use function substr;
33
34
/**
35
 * A Polyfill to PHP's SplFileObject behavior when reading a CSV document
36
 * with the SplFileObject::READ_CSV and SplFileObject::SKIP_EMPTY flags on
37
 * and the empty string as the escape parameter.
38
 *
39
 * @see https://php.net/manual/en/function.fgetcsv.php
40
 * @see https://php.net/manual/en/function.fgets.php
41
 * @see https://tools.ietf.org/html/rfc4180
42
 * @see http://edoceo.com/utilitas/csv-file-format
43
 *
44
 * @internal used internally to parse a CSV document without using the escape character
45
 */
46
final class EmptyEscapeParser
47
{
48
    /**
49
     * @internal
50
     */
51
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
52
53
    /**
54
     * @var SplFileObject|Stream
55
     */
56
    private static $document;
57
58
    /**
59
     * @var string
60
     */
61
    private static $delimiter;
62
63
    /**
64
     * @var string
65
     */
66
    private static $enclosure;
67
68
    /**
69
     * @var string
70
     */
71
    private static $trim_mask;
72
73
    /**
74
     * @var string|bool
75
     */
76
    private static $line;
77
78
    /**
79
     * Converts the document into a CSV record iterator.
80
     *
81
     * The returned record array is similar to the returned value of fgetcsv
82
     *
83
     * - If the line is empty the record is skipped
84
     * - Otherwise the array contains strings.
85
     *
86
     * @param SplFileObject|Stream $document
87
     */
88 36
    public static function parse($document): Generator
89
    {
90 36
        self::$document = self::filterDocument($document);
91 33
        list(self::$delimiter, self::$enclosure, ) = self::$document->getCsvControl();
92 33
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
93 33
        self::$document->setFlags(0);
94 33
        self::$document->rewind();
95 33
        while (self::$document->valid()) {
96 33
            $record = [];
97 33
            self::$line = self::$document->fgets();
98
            do {
99 33
                $method = 'extractFieldContent';
100 33
                $buffer = ltrim(self::$line, self::$trim_mask);
101 33
                if (($buffer[0] ?? '') === self::$enclosure) {
102 24
                    $method = 'extractEnclosedFieldContent';
103 24
                    self::$line = $buffer;
104
                }
105
106 33
                $record[] = self::$method();
107 33
            } while (false !== self::$line);
108
109 33
            if ([null] !== $record) {
110 33
                yield $record;
111
            }
112
        }
113 33
    }
114
115
    /**
116
     * Filter the submitted document.
117
     *
118
     * @param SplFileObject|Stream $document
119
     *
120
     * @return SplFileObject|Stream
121
     */
122 9
    private static function filterDocument($document)
123
    {
124 9
        if ($document instanceof Stream || $document instanceof SplFileObject) {
125 6
            return $document;
126
        }
127
128 3
        throw new TypeError(sprintf(
129 3
            'Expected a %s or an SplFileObject object, %s given',
130 3
            Stream::class,
131 3
            is_object($document) ? get_class($document) : gettype($document)
132
        ));
133
    }
134
135
    /**
136
     * Extract field without enclosure as per RFC4180.
137
     *
138
     * - Leading and trailing whitespaces must be removed.
139
     * - trailing line-breaks must be removed.
140
     *
141
     * @return null|string
142
     */
143 33
    private static function extractFieldContent()
144
    {
145 33
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
146 3
            self::$line = false;
147
148 3
            return null;
149
        }
150
151 30
        list($content, self::$line) = explode(self::$delimiter, self::$line, 2) + [1 => false];
152 30
        if (false === self::$line) {
153 24
            return rtrim($content, "\r\n");
154
        }
155
156 30
        return $content;
157
    }
158
159
    /**
160
     * Extract field with enclosure as per RFC4180.
161
     *
162
     * - Field content can spread on multiple document lines.
163
     * - Content inside enclosure must be preserved.
164
     * - Double enclosure sequence must be replaced by single enclosure character.
165
     * - Trailing line break must be removed if they are not part of the field content.
166
     * - Invalid fields content are treated as per fgetcsv behavior.
167
     */
168 24
    private static function extractEnclosedFieldContent(): string
169
    {
170 24
        if ((self::$line[0] ?? '') === self::$enclosure) {
171 24
            self::$line = substr(self::$line, 1);
172
        }
173
174 24
        $content = '';
175 24
        while (false !== self::$line) {
176 24
            list($buffer, $remainder) = explode(self::$enclosure, self::$line, 2) + [1 => false];
177 24
            $content .= $buffer;
178 24
            if (false !== $remainder) {
179 21
                self::$line = $remainder;
180 21
                break;
181
            }
182 9
            self::$line = self::$document->fgets();
183
        }
184
185 24
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
186 12
            self::$line = false;
187
188 12
            return rtrim($content, "\r\n");
189
        }
190
191 21
        $char = self::$line[0] ?? '';
192 21
        if (self::$delimiter === $char) {
193 15
            self::$line = substr(self::$line, 1);
194
195 15
            return $content;
196
        }
197
198 18
        if (self::$enclosure === $char) {
199 9
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
200
        }
201
202 9
        return $content.self::extractFieldContent();
203
    }
204
}
205