Completed
Pull Request — master (#309)
by ignace nyamagana
01:52
created

RFC4180Iterator::addCharacter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 5
ccs 4
cts 4
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.1.5
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function get_class;
23
use function gettype;
24
use function is_object;
25
use function sprintf;
26
use function str_split;
27
use function substr;
28
use function trim;
29
30
/**
31
 * A RFC4180 Compliant Parser in Pure PHP.
32
 *
33
 * @see https://php.net/manual/en/function.fgetcsv.php
34
 * @see https://php.net/manual/en/function.fgets.php
35
 * @see https://tools.ietf.org/html/rfc4180
36
 * @see http://edoceo.com/utilitas/csv-file-format
37
 *
38
 * @package League.csv
39
 * @since   9.2.0
40
 * @author  Ignace Nyamagana Butera <[email protected]>
41
 * @internal used internally to produce RFC4180 compliant records
42
 */
43
final class RFC4180Iterator implements IteratorAggregate
44
{
45
    /**
46
     * The CSV document.
47
     *
48
     * @var SplFileObject|Stream
49
     */
50
    private $document;
51
52
    /**
53
     * @var string
54
     */
55
    private $delimiter;
56
    /**
57
     * @var string
58
     */
59
    private $enclosure;
60
61
    /**
62
     * @var string|null
63
     */
64
    private $buffer;
65
66
    /**
67
     * @var string
68
     */
69
    private $previous_char = '';
70
71
    /**
72
     * @var bool
73
     */
74
    private $enclosed_field = false;
75
76
    /**
77
     * @var string
78
     */
79
    private $trim_mask;
80
81
    /**
82
     * New instance.
83
     *
84
     * @param SplFileObject|Stream $document
85
     */
86 18
    public function __construct($document)
87
    {
88 18
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
89 3
            throw new TypeError(sprintf(
90 3
                'Expected a %s or an SplFileObject object, % given',
91 3
                Stream::class,
92 3
                is_object($document) ? get_class($document) : gettype($document)
93
            ));
94
        }
95
96 15
        $this->document = $document;
97 15
    }
98
99
    /**
100
     * @inheritdoc
101
     *
102
     * Converts the stream into a CSV record iterator
103
     */
104 18
    public function getIterator()
105
    {
106
        //initialisation
107 18
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
108 18
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
109 18
        $this->document->setFlags(0);
110 18
        $this->document->rewind();
111 18
        $this->flush();
112
113
        $methodList = [
114 18
            $this->enclosure => 'processEnclosure',
115 18
            $this->delimiter => 'processBreaks',
116 18
            "\n" => 'processBreaks',
117 18
            "\r" => 'processBreaks',
118
        ];
119
120 18
        $record = [];
121 18
        while ($this->document->valid()) {
122
            //let's walk through the stream char by char
123 18
            foreach (str_split((string) $this->document->fgets()) as $char) {
124 18
                $method = $methodList[$char] ?? 'addCharacter';
125 18
                if ('processBreaks' !== $method) {
126 18
                    $this->$method($char);
127 18
                    continue;
128
                }
129
130 18
                $field = $this->$method($char);
131 18
                if (null !== $this->buffer) {
132 6
                    continue;
133
                }
134
135 18
                $record[] = $field;
136 18
                if ($char !== $this->delimiter) {
137 9
                    yield $record;
138
139 18
                    $record = [];
140
                }
141
            }
142
        }
143
144 18
        $record[] = $this->clean();
145
146 18
        yield $record;
147 18
    }
148
149 18
    private function clean()
150
    {
151
        //yield the remaining buffer
152 18
        if ($this->enclosed_field && $this->enclosure === $this->previous_char) {
153
            //strip the enclosure character present at the
154
            //end of the buffer; this is the end of en enclosed field
155 3
            $this->buffer = substr($this->buffer, 0, -1);
156
        }
157
158 18
        return $this->flush();
159
    }
160
161
    /**
162
     * Format and return the field content.
163
     *
164
     * @return string|null
165
     */
166 18
    private function flush()
167
    {
168
        //if the field is not enclose we trim white spaces
169 18
        if (null !== $this->buffer && !$this->enclosed_field) {
170 15
            $this->buffer = trim($this->buffer, $this->trim_mask);
171
        }
172
173
        //adding field content to the record
174 18
        $field = $this->buffer;
175
176
        //reset parameters
177 18
        $this->buffer = null;
178 18
        $this->previous_char = '';
179 18
        $this->enclosed_field = false;
180
        
181 18
        return $field;
182
    }
183
184
    /**
185
     * Append a character to the buffer.
186
     *
187
     */
188 18
    private function addCharacter(string $char)
189
    {
190 18
        $this->previous_char = $char;
191 18
        $this->buffer .= $char;
192 18
    }
193
194
    /**
195
     * Handle enclosure presence.
196
     */
197 18
    private function processEnclosure(string $char)
198
    {
199 18
        if (!$this->enclosed_field) {
200
            //the enclosure is at the start of the record
201
            //so we have an enclosed field
202 18
            if (null === $this->buffer) {
203 12
                $this->enclosed_field = true;
204 12
                return;
205
            }
206
            //invalid CSV content let's deal with it like fgetcsv
207
            //we add the character to the buffer and we move on
208 6
            return $this->addCharacter($char);
209
        }
210
211
        //double enclosure let's skip the character and move on
212 9
        if ($this->previous_char === $char) {
213
            //we reset the previous character to the empty string
214
            //to only strip double enclosure characters
215 3
            $this->previous_char = '';
216 3
            return;
217
        }
218
219 9
        return $this->addCharacter($char);
220
    }
221
222
    /**
223
     * Handle delimiter and line breaks.
224
     *
225
     * @return null|string
226
     */
227 18
    private function processBreaks(string $char)
228
    {
229 18
        if ($char === $this->delimiter) {
230 18
            $this->buffer = (string) $this->buffer;
231
        }
232
233 18
        if (!$this->enclosed_field) {
234 18
            return $this->flush();
235
        }
236
237
        //the line break is enclosed let's add it to the buffer and move on
238 9
        if ($this->previous_char !== $this->enclosure) {
239 6
            return $this->addCharacter($char);
240
        }
241
242
        //strip the enclosure character present at the
243
        //end of the buffer; this is the end of a record
244 9
        $this->buffer = substr($this->buffer, 0, -1);
245
246 9
        return $this->flush();
247
    }
248
}
249