Completed
Push — master ( 40dc42...b54772 )
by ignace nyamagana
03:34 queued 02:16
created

Parser::extractFieldContent()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 3
nop 0
dl 0
loc 15
ccs 8
cts 8
cp 1
crap 3
rs 9.7666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use Generator;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function ltrim;
28
use function rtrim;
29
use function sprintf;
30
use function str_replace;
31
use function strlen;
32
use function substr;
33
34
/**
35
 * A Polyfill to PHP's fgetcsv behavior with the empty string as the escape parameter.
36
 *
37
 * @see https://php.net/manual/en/function.fgetcsv.php
38
 * @see https://php.net/manual/en/function.fgets.php
39
 * @see https://tools.ietf.org/html/rfc4180
40
 * @see http://edoceo.com/utilitas/csv-file-format
41
 *
42
 * @internal used internally to parse document without using the escape character
43
 */
44
final class Parser
45
{
46
    /**
47
     * @internal
48
     */
49
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
50
51
    /**
52
     * @var SplFileObject|Stream
53
     */
54
    private static $document;
55
56
    /**
57
     * @var string
58
     */
59
    private static $delimiter;
60
61
    /**
62
     * @var string
63
     */
64
    private static $enclosure;
65
66
    /**
67
     * @var string
68
     */
69
    private static $trim_mask;
70
71
    /**
72
     * @var string|bool
73
     */
74
    private static $line;
75
76
    /**
77
     * Converts the document into a CSV record iterator.
78
     *
79
     * The returned record array is similar to the returned value of fgetcsv
80
     *
81
     * - If the line is empty the record will be an array with a single value equals to null
82
     * - Otherwise the array contains strings.
83
     *
84
     * @param SplFileObject|Stream $document
85
     */
86 42
    public static function parse($document, string $delimiter = ',', string $enclosure = '"'): Generator
87
    {
88 42
        self::$document = self::filterDocument($document);
89 39
        self::$delimiter = self::filterControl($delimiter, 'delimiter');
90 36
        self::$enclosure = self::filterControl($enclosure, 'enclosure');
91 33
        self::$trim_mask = str_replace([self::$delimiter, self::$enclosure], '', " \t\0\x0B");
92 33
        self::$document->setFlags(0);
93 33
        self::$document->rewind();
94 33
        while (self::$document->valid()) {
95 33
            $record = [];
96 33
            self::$line = self::$document->fgets();
97
            do {
98 33
                $method = 'extractFieldContent';
99 33
                $buffer = ltrim(self::$line, self::$trim_mask);
100 33
                if (($buffer[0] ?? '') === self::$enclosure) {
101 24
                    $method = 'extractEnclosedFieldContent';
102 24
                    self::$line = $buffer;
103
                }
104
105 33
                $record[] = self::$method();
106 33
            } while (false !== self::$line);
107
108 33
            yield $record;
109
        }
110 33
    }
111
112
    /**
113
     * Filter the submitted document.
114
     *
115
     * @param SplFileObject|Stream $document
116
     *
117
     * @return SplFileObject|Stream
118
     */
119 9
    private static function filterDocument($document)
120
    {
121 9
        if ($document instanceof Stream || $document instanceof SplFileObject) {
122 6
            return $document;
123
        }
124
125 3
        throw new TypeError(sprintf(
126 3
            'Expected a %s or an SplFileObject object, %s given',
127 3
            Stream::class,
128 3
            is_object($document) ? get_class($document) : gettype($document)
129
        ));
130
    }
131
132
    /**
133
     * Filter a control character.
134
     *
135
     * @throws Exception if the string is not a single byte character
136
     */
137 9
    private static function filterControl(string $value, string $name): string
138
    {
139 9
        if (1 === strlen($value)) {
140 6
            return $value;
141
        }
142
143 6
        throw new Exception(sprintf('Expected %s to be a single character %s given', $name, $value));
144
    }
145
146
    /**
147
     * Extract field without enclosure as per RFC4180.
148
     *
149
     * - Leading and trailing whitespaces must be removed.
150
     * - trailing line-breaks must be removed.
151
     *
152
     * @return null|string
153
     */
154 33
    private static function extractFieldContent()
155
    {
156 33
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
157 3
            self::$line = false;
158
159 3
            return null;
160
        }
161
162 30
        list($content, self::$line) = explode(self::$delimiter, self::$line, 2) + [1 => false];
163 30
        if (false === self::$line) {
164 24
            return rtrim($content, "\r\n");
165
        }
166
167 30
        return $content;
168
    }
169
170
    /**
171
     * Extract field with enclosure as per RFC4180.
172
     *
173
     * - Field content can spread on multiple document lines.
174
     * - Content inside enclosure must be preserved.
175
     * - Double enclosure sequence must be replaced by single enclosure character.
176
     * - Trailing line break must be removed if they are not part of the field content.
177
     * - Invalid fields content are treated as per fgetcsv behavior.
178
     */
179 24
    private static function extractEnclosedFieldContent(): string
180
    {
181 24
        if ((self::$line[0] ?? '') === self::$enclosure) {
182 24
            self::$line = substr(self::$line, 1);
183
        }
184
185 24
        $content = '';
186 24
        while (false !== self::$line) {
187 24
            list($buffer, $remainder) = explode(self::$enclosure, self::$line, 2) + [1 => false];
188 24
            $content .= $buffer;
189 24
            if (false !== $remainder) {
190 21
                self::$line = $remainder;
191 21
                break;
192
            }
193 9
            self::$line = self::$document->fgets();
194
        }
195
196 24
        if (in_array(self::$line, self::FIELD_BREAKS, true)) {
197 12
            self::$line = false;
198
199 12
            return rtrim($content, "\r\n");
200
        }
201
202 21
        $char = self::$line[0] ?? '';
203 21
        if (self::$delimiter === $char) {
204 15
            self::$line = substr(self::$line, 1);
205
206 15
            return $content;
207
        }
208
209 18
        if (self::$enclosure === $char) {
210 9
            return $content.self::$enclosure.self::extractEnclosedFieldContent();
211
        }
212
213
214 9
        return $content.self::extractFieldContent();
215
    }
216
}
217