Completed
Pull Request — master (#309)
by ignace nyamagana
01:30
created

RFC4180Iterator::__construct()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
nc 2
nop 1
dl 0
loc 12
ccs 8
cts 8
cp 1
crap 4
rs 9.8666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.1.5
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function get_class;
23
use function gettype;
24
use function is_object;
25
use function sprintf;
26
use function str_split;
27
use function substr;
28
use function trim;
29
30
/**
31
 * A RFC4180 Compliant Parser in Pure PHP.
32
 *
33
 * @see https://php.net/manual/en/function.fgetcsv.php
34
 * @see https://php.net/manual/en/function.fgets.php
35
 * @see https://tools.ietf.org/html/rfc4180
36
 * @see http://edoceo.com/utilitas/csv-file-format
37
 *
38
 * @package League.csv
39
 * @since   9.2.0
40
 * @author  Ignace Nyamagana Butera <[email protected]>
41
 * @internal used internally to produce RFC4180 compliant records
42
 */
43
final class RFC4180Iterator implements IteratorAggregate
44
{
45
    /**
46
     * @var SplFileObject|Stream
47
     */
48
    private $document;
49
50
    /**
51
     * @var string
52
     */
53
    private $delimiter;
54
    /**
55
     * @var string
56
     */
57
    private $enclosure;
58
59
    /**
60
     * @var string|null
61
     */
62
    private $buffer;
63
64
    /**
65
     * @var string
66
     */
67
    private $previous_char;
68
69
    /**
70
     * @var bool
71
     */
72
    private $enclosed_field;
73
74
    /**
75
     * @var string
76
     */
77
    private $trim_mask;
78
79
    /**
80
     * New instance.
81
     *
82
     * @param SplFileObject|Stream $document
83
     */
84 18
    public function __construct($document)
85
    {
86 18
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
87 3
            throw new TypeError(sprintf(
88 3
                'Expected a %s or an SplFileObject object, % given',
89 3
                Stream::class,
90 3
                is_object($document) ? get_class($document) : gettype($document)
91
            ));
92
        }
93
94 15
        $this->document = $document;
95 15
    }
96
97
    /**
98
     * @inheritdoc
99
     *
100
     * Converts the stream into a CSV record iterator
101
     */
102 18
    public function getIterator()
103
    {
104
        //initialisation
105 18
        $this->init();
106 18
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
107 18
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
108 18
        $this->document->setFlags(0);
109 18
        $this->document->rewind();
110
111 18
        $record = [];
112
        do {
113 18
            $line = (string) $this->document->fgets();
114 18
            foreach (str_split($line) as $char) {
115 18
                if (!in_array($char, [$this->delimiter, "\n", "\r"], true)) {
116 18
                    $this->processEnclosure($char);
117 18
                    continue;
118
                }
119
120 18
                $field = $this->processBreaks($char);
121 18
                if (null !== $this->buffer) {
122 6
                    continue;
123
                }
124
125 18
                $record[] = $field;
126 18
                if ($char === $this->delimiter) {
127 18
                    continue;
128
                }
129
130 9
                yield $record;
131
132 9
                $record = [];
133
            }
134 18
        } while ($this->document->valid());
135
136 18
        $record[] = $this->clean();
137
138 18
        yield $record;
139 18
    }
140
141
    /**
142
     * Flushes and returns the last field content.
143
     *
144
     * @return string|null
145
     */
146 18
    private function clean()
147
    {
148
        //yield the remaining buffer
149 18
        if ($this->enclosed_field && $this->enclosure === $this->previous_char) {
150
            //strip the enclosure character present at the
151
            //end of the buffer; this is the end of en enclosed field
152 3
            $this->buffer = substr($this->buffer, 0, -1);
153
        }
154
155 18
        return $this->flush();
156
    }
157
158
    /**
159
     * Flushes and returns the field content.
160
     *
161
     * If the field is not enclose we trim white spaces cf RFC4180
162
     *
163
     * @return string|null
164
     */
165 18
    private function flush()
166
    {
167 18
        if (null !== $this->buffer && !$this->enclosed_field) {
168 15
            $this->buffer = trim($this->buffer, $this->trim_mask);
169
        }
170
171 18
        $field = $this->buffer;
172 18
        $this->init();
173
        
174 18
        return $field;
175
    }
176
177
    /**
178
     * Initialize the internal properties.
179
     */
180 15
    private function init()
181
    {
182 15
        $this->buffer = null;
183 15
        $this->previous_char = '';
184 15
        $this->enclosed_field = false;
185 15
    }
186
187
    /**
188
     * Handles enclosure presence according to RFC4180.
189
     *
190
     * - detect enclosed field
191
     * - convert the double enclosure to one enclosure
192
     */
193 18
    private function processEnclosure(string $char)
194
    {
195 18
        if ($char !== $this->enclosure) {
196 18
            $this->previous_char = $char;
197 18
            $this->buffer .= $char;
198 18
            return;
199
        }
200
201 18
        if (!$this->enclosed_field) {
202 18
            if (null === $this->buffer) {
203 12
                $this->enclosed_field = true;
204 12
                return;
205
            }
206
            //invalid CSV content
207 9
            $this->previous_char = $char;
208 9
            $this->buffer .= $char;
209 9
            return;
210
        }
211
212
        //double enclosure
213 9
        if ($this->previous_char === $char) {
214
            //safe check to only strip double enclosure characters
215 3
            $this->previous_char = '';
216 3
            return;
217
        }
218
219 9
        $this->previous_char = $char;
220 9
        $this->buffer .= $char;
221 9
    }
222
223
    /**
224
     * Handles delimiter and line breaks according to RFC4180.
225
     *
226
     * @return null|string
227
     */
228 18
    private function processBreaks(string $char)
229
    {
230 18
        if ($char === $this->delimiter) {
231 18
            $this->buffer = (string) $this->buffer;
232
        }
233
234 18
        if (!$this->enclosed_field) {
235 18
            return $this->flush();
236
        }
237
238
        //the delimiter or the line break is enclosed
239 9
        if ($this->previous_char !== $this->enclosure) {
240 6
            $this->previous_char = $char;
241 6
            $this->buffer .= $char;
242 6
            return null;
243
        }
244
245
        //strip the enclosure character present at the
246
        //end of the buffer; this is the end of a field
247 9
        $this->buffer = substr($this->buffer, 0, -1);
248
249 9
        return $this->flush();
250
    }
251
}
252