Completed
Push — master ( 7e8982...eec61a )
by ignace nyamagana
03:31 queued 02:15
created

RFC4180Parser::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 3
dl 0
loc 7
ccs 6
cts 6
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function rtrim;
28
use function sprintf;
29
use function str_replace;
30
use function strlen;
31
use function substr;
32
use function trim;
33
34
/**
35
 * A RFC4180 Compliant Parser in Pure PHP.
36
 *
37
 * @see https://php.net/manual/en/function.fgetcsv.php
38
 * @see https://php.net/manual/en/function.fgets.php
39
 * @see https://tools.ietf.org/html/rfc4180
40
 * @see http://edoceo.com/utilitas/csv-file-format
41
 *
42
 * @internal used internally to produce RFC4180 compliant records
43
 */
44
final class RFC4180Parser implements IteratorAggregate
45
{
46
    /**
47
     * @internal
48
     */
49
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
50
51
    /**
52
     * @var SplFileObject|Stream
53
     */
54
    private $document;
55
56
    /**
57
     * @var string
58
     */
59
    private $delimiter;
60
61
    /**
62
     * @var string
63
     */
64
    private $enclosure;
65
66
    /**
67
     * @var string
68
     */
69
    private $trim_mask;
70
71
    /**
72
     * New instance.
73
     *
74
     * @param SplFileObject|Stream $document
75
     */
76 12
    public function __construct($document, string $delimiter = ',', string $enclosure = '"')
77
    {
78 12
        $this->document = $this->filterDocument($document);
79 9
        $this->delimiter = $this->filterControl($delimiter, 'delimiter');
80 6
        $this->enclosure = $this->filterControl($enclosure, 'enclosure');
81 3
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
82 3
    }
83
84
    /**
85
     * Filter the submitted document.
86
     *
87
     * @param SplFileObject|Stream $document
88
     *
89
     * @return SplFileObject|Stream
90
     */
91 6
    private function filterDocument($document)
92
    {
93 6
        if ($document instanceof Stream || $document instanceof SplFileObject) {
94 3
            return $document;
95
        }
96
97 3
        throw new TypeError(sprintf(
98 3
            'Expected a %s or an SplFileObject object, % given',
99 3
            Stream::class,
100 3
            is_object($document) ? get_class($document) : gettype($document)
101
        ));
102
    }
103
104
    /**
105
     * Filter the control characters.
106
     */
107 6
    private function filterControl(string $control, string $name): string
108
    {
109 6
        if (1 === strlen($control)) {
110 3
            return $control;
111
        }
112
113 6
        throw new Exception(sprintf('Expected %s to be a single character %s given', $name, $control));
114
    }
115
116
    /**
117
     * @inheritdoc
118
     *
119
     * Converts the stream into a CSV record iterator by extracting records one by one
120
     *
121
     * The returned record array is similar to the returned value of fgetcsv
122
     *
123
     * - If the line is empty the record will be an array with a single value equals to null
124
     * - Otherwise the array contains strings.
125
     */
126 30
    public function getIterator()
127
    {
128 30
        $this->document->setFlags(0);
129 30
        $this->document->rewind();
130
        do {
131 30
            $record = [];
132 30
            $line = $this->document->fgets();
133
            do {
134 30
                $method = 'extractFieldContent';
135 30
                if (($line[0] ?? '') === $this->enclosure) {
136 24
                    $method = 'extractEnclosedFieldContent';
137
                }
138 30
                $record[] = $this->$method($line);
139 30
            } while (false !== $line);
140
141 30
            yield $record;
142 30
        } while ($this->document->valid());
143 30
    }
144
145
    /**
146
     * Extract field without enclosure as per RFC4180.
147
     *
148
     * - Leading and trailing whitespaces must be removed.
149
     * - trailing line-breaks must be removed.
150
     *
151
     * @param bool|string $line
152
     *
153
     * @return null|string
154
     */
155 30
    private function extractFieldContent(&$line)
156
    {
157 30
        if (in_array($line, self::FIELD_BREAKS, true)) {
158 3
            $line = false;
159
160 3
            return null;
161
        }
162
163 27
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
164 27
        if (false === $line) {
165 24
            return trim(rtrim($content, "\r\n"), $this->trim_mask);
166
        }
167
168 27
        return trim($content, $this->trim_mask);
169
    }
170
171
    /**
172
     * Extract field with enclosure as per RFC4180.
173
     *
174
     * - Field content can spread on multiple document lines.
175
     * - Content inside enclosure must be preserved.
176
     * - Double enclosure sequence must be replaced by single enclosure character.
177
     * - Trailing line break must be removed if they are not part of the field content.
178
     * - Invalid field do not throw as per fgetcsv behavior.
179
     *
180
     * @param bool|string $line
181
     */
182 24
    private function extractEnclosedFieldContent(&$line): string
183
    {
184 24
        if (($line[0] ?? '') === $this->enclosure) {
185 24
            $line = substr($line, 1);
186
        }
187
188 24
        $content = '';
189 24
        while (false !== $line) {
190 24
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
191 24
            $content .= $buffer;
192 24
            if (false !== $line) {
193 21
                break;
194
            }
195 9
            $line = $this->document->fgets();
196
        }
197
198 24
        if (in_array($line, self::FIELD_BREAKS, true)) {
199 9
            $line = false;
200
201 9
            return rtrim($content, "\r\n");
202
        }
203
204 21
        $char = $line[0] ?? '';
205 21
        if ($char === $this->delimiter) {
206 15
            $line = substr($line, 1);
207
208 15
            return $content;
209
        }
210
211 15
        if ($char === $this->enclosure) {
212 9
            return $content.$this->enclosure.$this->extractEnclosedFieldContent($line);
213
        }
214
215 6
        return $content.$this->extractFieldContent($line);
216
    }
217
}
218