Completed
Pull Request — master (#309)
by ignace nyamagana
03:49 queued 01:21
created

RFC4180Iterator::extractFieldEnclosed()   B

Complexity

Conditions 7
Paths 6

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
nc 6
nop 1
dl 0
loc 30
ccs 16
cts 16
cp 1
crap 7
rs 8.5066
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.1.5
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function rtrim;
28
use function sprintf;
29
use function str_replace;
30
use function substr;
31
use function trim;
32
33
/**
34
 * A RFC4180 Compliant Parser in Pure PHP.
35
 *
36
 * @see https://php.net/manual/en/function.fgetcsv.php
37
 * @see https://php.net/manual/en/function.fgets.php
38
 * @see https://tools.ietf.org/html/rfc4180
39
 * @see http://edoceo.com/utilitas/csv-file-format
40
 *
41
 * @package League.csv
42
 * @since   9.2.0
43
 * @author  Ignace Nyamagana Butera <[email protected]>
44
 * @internal used internally to produce RFC4180 compliant records
45
 */
46
final class RFC4180Iterator implements IteratorAggregate
47
{
48
    /**
49
     * @internal
50
     */
51
    const FIELD_BREAKS = [false, "\r", "\r\n", "\n", ''];
52
53
    /**
54
     * @var SplFileObject|Stream
55
     */
56
    private $document;
57
58
    /**
59
     * @var string
60
     */
61
    private $delimiter;
62
63
    /**
64
     * @var string
65
     */
66
    private $enclosure;
67
68
    /**
69
     * @var string
70
     */
71
    private $double_enclosure;
72
73
    /**
74
     * @var string
75
     */
76
    private $trim_mask;
77
78
    /**
79
     * New instance.
80
     *
81
     * @param SplFileObject|Stream $document
82
     */
83 24
    public function __construct($document)
84
    {
85 24
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
86 3
            throw new TypeError(sprintf(
87 3
                'Expected a %s or an SplFileObject object, % given',
88 3
                Stream::class,
89 3
                is_object($document) ? get_class($document) : gettype($document)
90
            ));
91
        }
92
93 21
        $this->document = $document;
94 21
    }
95
96
    /**
97
     * @inheritdoc
98
     *
99
     * Converts the stream into a CSV record iterator
100
     */
101 24
    public function getIterator()
102
    {
103
        //initialisation
104 24
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
105 24
        $this->double_enclosure = $this->enclosure.$this->enclosure;
106 24
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
107 24
        $this->document->setFlags(0);
108 24
        $this->document->rewind();
109
        do {
110 24
            yield $this->extractRecord($this->document->fgets());
111 24
        } while ($this->document->valid());
112 24
    }
113
114
    /**
115
     * Extract a record from the Stream document.
116
     *
117
     * The return array is similar as to the returned value of fgetcsv
118
     * If this the an empty line the record will be an array with a single value
119
     * equals to null otherwise the array contains string data.
120
     *
121
     * @param string|bool $line
122
     */
123 21
    private function extractRecord($line): array
124
    {
125 21
        $record = [];
126
        do {
127 21
            $method = 'extractField';
128 21
            if (($line[0] ?? '') === $this->enclosure) {
129 15
                $method = 'extractFieldEnclosed';
130
            }
131 21
            $record[] = $this->$method($line);
132 21
        } while (false !== $line);
133
134 21
        return $record;
135
    }
136
137
    /**
138
     * Extract field without enclosure as per RFC4180.
139
     *
140
     * Leading and trailing whitespaces are trimmed because the field
141
     * is not enclosed. trailing line-breaks are also removed.
142
     *
143
     * @param bool|string $line
144
     *
145
     * @return null|string
146
     */
147 21
    private function extractField(&$line)
148
    {
149 21
        if (in_array($line, self::FIELD_BREAKS, true)) {
150 3
            $line = false;
151
152 3
            return null;
153
        }
154
155 18
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
156 18
        if (false === $line) {
157 12
            return trim(rtrim($content, "\r\n"), $this->trim_mask);
158
        }
159
160 18
        return trim($content, $this->trim_mask);
161
    }
162
163
    /**
164
     * Extract field with enclosure as per RFC4180.
165
     *
166
     * - Leading and trailing whitespaces are preserved because the field
167
     * is enclosed.
168
     * - The field content can spread on multiple document lines.
169
     * - Double enclosure character muse be replaced by single enclosure character.
170
     * - Trailing line break are remove if they are not part of the field content.
171
     * - Invalid field do not throw as per fgetcsv behavior.
172
     *
173
     * @param bool|string $line
174
     *
175
     * @return null|string
176
     */
177 15
    private function extractFieldEnclosed(&$line)
178
    {
179
        //remove the starting enclosure character if present
180 15
        if ($line[0] ?? '' === $this->enclosure) {
181 15
            $line = substr($line, 1);
182
        }
183
184 15
        $content = '';
185
        do {
186 15
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
187 15
            $content .= $buffer;
188 15
        } while (false === $line && $this->document->valid() && false !== ($line = $this->document->fgets()));
189
190 15
        $content = str_replace($this->double_enclosure, $this->enclosure, $content);
191 15
        if (in_array($line, self::FIELD_BREAKS, true)) {
192 9
            $line = false;
193
194 9
            return rtrim($content, "\r\n");
195
        }
196
197 12
        $char = $line[0] ?? '';
198 12
        if ($char === $this->delimiter) {
199 9
            $line = substr($line, 1);
200
201 9
            return $content;
202
        }
203
204
        //handles enclosure as per RFC4180 or malformed CSV like fgetcsv
205 6
        return $content.$char.$this->extractFieldEnclosed($line);
206
    }
207
}
208