Completed
Push — master ( 401201...7506b8 )
by ignace nyamagana
05:44 queued 04:08
created

EmptyEscapeParser::filterDocument()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
nc 2
nop 1
dl 0
loc 12
ccs 7
cts 7
cp 1
crap 4
rs 9.8666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv\Polyfill;
18
19
use Generator;
20
use League\Csv\Exception;
21
use League\Csv\Stream;
22
use SplFileObject;
23
use TypeError;
24
use function explode;
25
use function get_class;
26
use function gettype;
27
use function in_array;
28
use function is_object;
29
use function ltrim;
30
use function rtrim;
31
use function sprintf;
32
use function str_replace;
33
use function strlen;
34
use function substr;
35
36
/**
37
 * A Polyfill to PHP's fgetcsv behavior with the empty string as the escape parameter.
38
 *
39
 * @see https://php.net/manual/en/function.fgetcsv.php
40
 * @see https://php.net/manual/en/function.fgets.php
41
 * @see https://tools.ietf.org/html/rfc4180
42
 * @see http://edoceo.com/utilitas/csv-file-format
43
 *
44
 * @internal used internally to parse document without using the escape character
45
 */
46
final class EmptyEscapeParser
47
{
48
    /**
49
     * @internal
50
     */
51
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
52
53
    /**
54
     * @var SplFileObject|Stream
55
     */
56
    private static $document;
57
58
    /**
59
     * @var string
60
     */
61
    private static $delimiter;
62
63
    /**
64
     * @var string
65
     */
66
    private static $enclosure;
67
68
    /**
69
     * @var string
70
     */
71
    private static $trim_mask;
72
73
    /**
74
     * @var string|bool
75
     */
76
    private static $line;
77
78
    /**
79
     * Converts the document into a CSV record iterator.
80
     *
81
     * The returned record array is similar to the returned value of fgetcsv
82
     *
83
     * - If the line is empty the record will be an array with a single value equals to null
84
     * - Otherwise the array contains strings.
85
     *
86
     * @param SplFileObject|Stream $document
87
     */
88 42
    public static function parse($document, string $delimiter = ',', string $enclosure = '"'): Generator
89
    {
90 42
        self::$document = self::filterDocument($document);
91 39
        self::$delimiter = self::filterControl($delimiter, 'delimiter');
92 36
        self::$enclosure = self::filterControl($enclosure, 'enclosure');
93 33
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
94 33
        self::$document->setFlags(0);
95 33
        self::$document->rewind();
96 33
        while (self::$document->valid()) {
97 33
            $record = [];
98 33
            self::$line = self::$document->fgets();
99
            do {
100 33
                $method = 'extractFieldContent';
101 33
                $buffer = ltrim(self::$line, self::$trim_mask);
102 33
                if (($buffer[0] ?? '') === self::$enclosure) {
103 24
                    $method = 'extractEnclosedFieldContent';
104 24
                    self::$line = $buffer;
105
                }
106
107 33
                $record[] = self::$method();
108 33
            } while (false !== self::$line);
109
110 33
            yield $record;
111
        }
112 33
    }
113
114
    /**
115
     * Filter the submitted document.
116
     *
117
     * @param SplFileObject|Stream $document
118
     *
119
     * @return SplFileObject|Stream
120
     */
121 9
    private static function filterDocument($document)
122
    {
123 9
        if ($document instanceof Stream || $document instanceof SplFileObject) {
124 6
            return $document;
125
        }
126
127 3
        throw new TypeError(sprintf(
128 3
            'Expected a %s or an SplFileObject object, %s given',
129 3
            Stream::class,
130 3
            is_object($document) ? get_class($document) : gettype($document)
131
        ));
132
    }
133
134
    /**
135
     * Filter a control character.
136
     *
137
     * @throws Exception if the string is not a single byte character
138
     */
139 9
    private static function filterControl(string $value, string $name): string
140
    {
141 9
        if (1 === strlen($value)) {
142 6
            return $value;
143
        }
144
145 6
        throw new Exception(sprintf('Expected %s to be a single character %s given', $name, $value));
146
    }
147
148
    /**
149
     * Extract field without enclosure as per RFC4180.
150
     *
151
     * - Leading and trailing whitespaces must be removed.
152
     * - trailing line-breaks must be removed.
153
     *
154
     * @return null|string
155
     */
156 33
    private static function extractFieldContent()
157
    {
158 33
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
159 3
            self::$line = false;
160
161 3
            return null;
162
        }
163
164 30
        list($content, self::$line) = explode(self::$delimiter, self::$line, 2) + [1 => false];
165 30
        if (false === self::$line) {
166 24
            return rtrim($content, "\r\n");
167
        }
168
169 30
        return $content;
170
    }
171
172
    /**
173
     * Extract field with enclosure as per RFC4180.
174
     *
175
     * - Field content can spread on multiple document lines.
176
     * - Content inside enclosure must be preserved.
177
     * - Double enclosure sequence must be replaced by single enclosure character.
178
     * - Trailing line break must be removed if they are not part of the field content.
179
     * - Invalid fields content are treated as per fgetcsv behavior.
180
     */
181 24
    private static function extractEnclosedFieldContent(): string
182
    {
183 24
        if ((self::$line[0] ?? '') === self::$enclosure) {
184 24
            self::$line = substr(self::$line, 1);
185
        }
186
187 24
        $content = '';
188 24
        while (false !== self::$line) {
189 24
            list($buffer, $remainder) = explode(self::$enclosure, self::$line, 2) + [1 => false];
190 24
            $content .= $buffer;
191 24
            if (false !== $remainder) {
192 21
                self::$line = $remainder;
193 21
                break;
194
            }
195 9
            self::$line = self::$document->fgets();
196
        }
197
198 24
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
199 12
            self::$line = false;
200
201 12
            return rtrim($content, "\r\n");
202
        }
203
204 21
        $char = self::$line[0] ?? '';
205 21
        if (self::$delimiter === $char) {
206 15
            self::$line = substr(self::$line, 1);
207
208 15
            return $content;
209
        }
210
211 18
        if (self::$enclosure === $char) {
212 9
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
213
        }
214
215 9
        return $content.self::extractFieldContent();
216
    }
217
}
218