Completed
Pull Request — master (#309)
by ignace nyamagana
02:44
created

RFC4180Iterator::extractRecord()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 2
nop 1
dl 0
loc 13
ccs 8
cts 8
cp 1
crap 3
rs 9.8333
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.1.5
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function rtrim;
28
use function sprintf;
29
use function str_replace;
30
use function substr;
31
use function trim;
32
33
/**
34
 * A RFC4180 Compliant Parser in Pure PHP.
35
 *
36
 * @see https://php.net/manual/en/function.fgetcsv.php
37
 * @see https://php.net/manual/en/function.fgets.php
38
 * @see https://tools.ietf.org/html/rfc4180
39
 * @see http://edoceo.com/utilitas/csv-file-format
40
 *
41
 * @package League.csv
42
 * @since   9.2.0
43
 * @author  Ignace Nyamagana Butera <[email protected]>
44
 * @internal used internally to produce RFC4180 compliant records
45
 */
46
final class RFC4180Iterator implements IteratorAggregate
47
{
48
    /**
49
     * @internal
50
     */
51
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
52
53
    /**
54
     * @var SplFileObject|Stream
55
     */
56
    private $document;
57
58
    /**
59
     * @var string
60
     */
61
    private $delimiter;
62
63
    /**
64
     * @var string
65
     */
66
    private $enclosure;
67
68
    /**
69
     * @var string
70
     */
71
    private $trim_mask;
72
73
    /**
74
     * New instance.
75
     *
76
     * @param SplFileObject|Stream $document
77
     */
78 24
    public function __construct($document)
79
    {
80 24
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
81 3
            throw new TypeError(sprintf(
82 3
                'Expected a %s or an SplFileObject object, % given',
83 3
                Stream::class,
84 3
                is_object($document) ? get_class($document) : gettype($document)
85
            ));
86
        }
87
88 21
        $this->document = $document;
89 21
    }
90
91
    /**
92
     * @inheritdoc
93
     *
94
     * Converts the stream into a CSV record iterator
95
     */
96 24
    public function getIterator()
97
    {
98
        //initialisation
99 24
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
100 24
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
101 24
        $this->document->setFlags(0);
102 24
        $this->document->rewind();
103
        do {
104 24
            yield $this->extractRecord($this->document->fgets());
105 24
        } while ($this->document->valid());
106 24
    }
107
108
    /**
109
     * Extract a record from the Stream document.
110
     *
111
     * The return array is similar as to the returned value of fgetcsv
112
     * If this the an empty line the record will be an array with a single value
113
     * equals to null otherwise the array contains string data.
114
     *
115
     * @param string|bool $line
116
     */
117 21
    private function extractRecord($line): array
118
    {
119 21
        $record = [];
120
        do {
121 21
            $method = 'extractField';
122 21
            if (($line[0] ?? '') === $this->enclosure) {
123 15
                $method = 'extractFieldEnclosed';
124
            }
125 21
            $record[] = $this->$method($line);
126 21
        } while (false !== $line);
127
128 21
        return $record;
129
    }
130
131
    /**
132
     * Extract field without enclosure as per RFC4180.
133
     *
134
     * Leading and trailing whitespaces are trimmed because the field
135
     * is not enclosed. trailing line-breaks are also removed.
136
     *
137
     * @param bool|string $line
138
     *
139
     * @return null|string
140
     */
141 21
    private function extractField(&$line)
142
    {
143 21
        if (in_array($line, self::FIELD_BREAKS, true)) {
144 3
            $line = false;
145
146 3
            return null;
147
        }
148
149 18
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
150 18
        if (false === $line) {
151 12
            return trim(rtrim($content, "\r\n"), $this->trim_mask);
152
        }
153
154 18
        return trim($content, $this->trim_mask);
155
    }
156
157
    /**
158
     * Extract field with enclosure as per RFC4180.
159
     *
160
     * - Leading and trailing whitespaces are preserved because the field
161
     * is enclosed.
162
     * - The field content can spread on multiple document lines.
163
     * - Double enclosure character muse be replaced by single enclosure character.
164
     * - Trailing line break are remove if they are not part of the field content.
165
     * - Invalid field do not throw as per fgetcsv behavior.
166
     *
167
     * @param bool|string $line
168
     *
169
     * @return null|string
170
     */
171 15
    private function extractFieldEnclosed(&$line)
172
    {
173
        //remove the starting enclosure character if present
174 15
        if (($line[0] ?? '') === $this->enclosure) {
175 15
            $line = substr($line, 1);
176
        }
177
178 15
        $content = '';
179
        do {
180 15
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
181 15
            $content .= $buffer;
182 15
        } while (false === $line && $this->document->valid() && false !== ($line = $this->document->fgets()));
183
184 15
        if (in_array($line, self::FIELD_BREAKS, true)) {
185 9
            $line = false;
186
187 9
            return rtrim($content, "\r\n");
188
        }
189
190 12
        $char = $line[0] ?? '';
191
        //handle end of content by delimiter
192 12
        if ($char === $this->delimiter) {
193 9
            $line = substr($line, 1);
194
195 9
            return $content;
196
        }
197
198
        //handles double quoted data
199 6
        if ($char === $this->enclosure) {
200 3
            return $content.$char.$this->extractFieldEnclosed($line);
201
        }
202
203
        //handles malformed CSV like fgetcsv by skipping the enclosure character
204 3
        return $content.$this->extractFieldEnclosed($line);
205
    }
206
}
207