Completed
Pull Request — master (#309)
by ignace nyamagana
02:32
created

RFC4180Iterator::getIterator()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
nc 1
nop 0
dl 0
loc 12
ccs 9
cts 9
cp 1
crap 2
rs 9.8666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.1.5
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function rtrim;
28
use function sprintf;
29
use function str_replace;
30
use function substr;
31
use function trim;
32
33
/**
34
 * A RFC4180 Compliant Parser in Pure PHP.
35
 *
36
 * @see https://php.net/manual/en/function.fgetcsv.php
37
 * @see https://php.net/manual/en/function.fgets.php
38
 * @see https://tools.ietf.org/html/rfc4180
39
 * @see http://edoceo.com/utilitas/csv-file-format
40
 *
41
 * @package League.csv
42
 * @since   9.2.0
43
 * @author  Ignace Nyamagana Butera <[email protected]>
44
 * @internal used internally to produce RFC4180 compliant records
45
 */
46
final class RFC4180Iterator implements IteratorAggregate
47
{
48
    /**
49
     * @internal
50
     */
51
    const FIELD_BREAKS = [false, "\r", "\r\n", "\n", ''];
52
53
    /**
54
     * @var SplFileObject|Stream
55
     */
56
    private $document;
57
58
    /**
59
     * @var string
60
     */
61
    private $delimiter;
62
63
    /**
64
     * @var string
65
     */
66
    private $enclosure;
67
68
    /**
69
     * @var string
70
     */
71
    private $double_enclosure;
72
73
    /**
74
     * @var string
75
     */
76
    private $trim_mask;
77
78
    /**
79
     * New instance.
80
     *
81
     * @param SplFileObject|Stream $document
82
     */
83 24
    public function __construct($document)
84
    {
85 24
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
86 3
            throw new TypeError(sprintf(
87 3
                'Expected a %s or an SplFileObject object, % given',
88 3
                Stream::class,
89 3
                is_object($document) ? get_class($document) : gettype($document)
90
            ));
91
        }
92
93 21
        $this->document = $document;
94 21
    }
95
96
    /**
97
     * @inheritdoc
98
     *
99
     * Converts the stream into a CSV record iterator
100
     */
101 24
    public function getIterator()
102
    {
103
        //initialisation
104 24
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
105 24
        $this->double_enclosure = $this->enclosure.$this->enclosure;
106 24
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
107 24
        $this->document->setFlags(0);
108 24
        $this->document->rewind();
109
        do {
110 24
            yield $this->extractRecord($this->document->fgets());
111 24
        } while ($this->document->valid());
112 24
    }
113
114
    /**
115
     * Extract a record from the Stream document.
116
     *
117
     * @param string|bool $line
118
     */
119 21
    private function extractRecord($line): array
120
    {
121 21
        $record = [];
122
        do {
123 21
            $method = 'extractField';
124 21
            if (($line[0] ?? '') === $this->enclosure) {
125 15
                $method = 'extractFieldEnclosed';
126
            }
127 21
            $record[] = $this->$method($line);
128 21
        } while (false !== $line);
129
130 21
        return $record;
131
    }
132
133
    /**
134
     * Extract field without enclosure.
135
     *
136
     * @param bool|string $line
137
     *
138
     * @return null|string
139
     */
140 21
    private function extractField(&$line)
141
    {
142 21
        if (in_array($line, self::FIELD_BREAKS, true)) {
143 3
            $line = false;
144
145 3
            return null;
146
        }
147
148
        //explode the line on the next delimiter character if any
149 18
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
150
151
        //remove line breaks characters as per RFC4180
152 18
        if (false === $line) {
153 12
            $content = rtrim($content, "\r\n");
154
        }
155
156
        //remove whitespaces as per RFC4180
157 18
        return trim($content, $this->trim_mask);
158
    }
159
160
    /**
161
     * Extract field with enclosure.
162
     *
163
     * @param bool|string $line
164
     *
165
     * @return null|string
166
     */
167 15
    private function extractFieldEnclosed(&$line)
168
    {
169
        //remove the starting enclosure char to ease explode usage
170 15
        if ($line[0] ?? '' === $this->enclosure) {
171 15
            $line = substr($line, 1);
172
        }
173
174 15
        $content = '';
175
        //cover multiline field
176
        do {
177
            //explode the line on the next enclosure character if any
178 15
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
179 15
            $content .= $buffer;
180 15
        } while (false === $line && $this->document->valid() && false !== ($line = $this->document->fgets()));
181
182
        //decode the field content as per RFC4180
183 15
        $content = str_replace($this->double_enclosure, $this->enclosure, $content);
184
185
        //remove line breaks characters as per RFC4180
186 15
        if (in_array($line, self::FIELD_BREAKS, true)) {
187 9
            $line = false;
188
189 9
            return rtrim($content, "\r\n");
190
        }
191
192
        //the field data is extracted since we have a delimiter
193 12
        if (($line[0] ?? '') === $this->delimiter) {
194 9
            $line = substr($line, 1);
195
196 9
            return $content;
197
        }
198
199
        //handles enclosure as per RFC4180 or malformed CSV like fgetcsv
200 6
        return $content.($line[0] ?? '').$this->extractFieldEnclosed($line);
201
    }
202
}
203