Completed
Pull Request — master (#309)
by ignace nyamagana
03:03
created

RFC4180Iterator::extractField()   B

Complexity

Conditions 7
Paths 3

Size

Total Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
nc 3
nop 1
dl 0
loc 20
ccs 8
cts 8
cp 1
crap 7
rs 8.6666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.1.5
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function is_object;
26
use function rtrim;
27
use function sprintf;
28
use function str_replace;
29
use function substr;
30
use function trim;
31
32
/**
33
 * A RFC4180 Compliant Parser in Pure PHP.
34
 *
35
 * @see https://php.net/manual/en/function.fgetcsv.php
36
 * @see https://php.net/manual/en/function.fgets.php
37
 * @see https://tools.ietf.org/html/rfc4180
38
 * @see http://edoceo.com/utilitas/csv-file-format
39
 *
40
 * @package League.csv
41
 * @since   9.2.0
42
 * @author  Ignace Nyamagana Butera <[email protected]>
43
 * @internal used internally to produce RFC4180 compliant records
44
 */
45
final class RFC4180Iterator implements IteratorAggregate
46
{
47
    /**
48
     * @var SplFileObject|Stream
49
     */
50
    private $document;
51
52
    /**
53
     * @var string
54
     */
55
    private $delimiter;
56
    /**
57
     * @var string
58
     */
59
    private $enclosure;
60
61
    /**
62
     * @var string
63
     */
64
    private $trim_mask;
65
66
    /**
67
     * New instance.
68
     *
69
     * @param SplFileObject|Stream $document
70
     */
71 21
    public function __construct($document)
72
    {
73 21
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
74 3
            throw new TypeError(sprintf(
75 3
                'Expected a %s or an SplFileObject object, % given',
76 3
                Stream::class,
77 3
                is_object($document) ? get_class($document) : gettype($document)
78
            ));
79
        }
80
81 18
        $this->document = $document;
82 18
    }
83
84
    /**
85
     * @inheritdoc
86
     *
87
     * Converts the stream into a CSV record iterator
88
     */
89 21
    public function getIterator()
90
    {
91
        //initialisation
92 21
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
93 21
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
94 21
        $this->document->setFlags(0);
95 21
        $this->document->rewind();
96
        do {
97 21
            $line = $this->document->fgets();
98 21
            yield $this->extractRecord($line);
99 21
        } while ($this->document->valid());
100 21
    }
101
102
    /**
103
     * Extract a record from the Stream document.
104
     *
105
     * @param string|bool $line
106
     */
107 18
    private function extractRecord($line): array
108
    {
109 18
        $record = [];
110
        do {
111 18
            $method = ($line[0] ?? '') === $this->enclosure ? 'extractEnclosedField' : 'extractField';
112 18
            $record[] = $this->$method($line);
113 18
        } while (false !== $line);
114
115 18
        return $record;
116
    }
117
118
    /**
119
     * Extract field without enclosure.
120
     *
121
     * @param bool|string $line
122
     *
123
     * @return null|string
124
     */
125 18
    private function extractField(& $line)
126
    {
127
        //process the line if it is only a line-break or the empty string
128 18
        if ($line === false || $line === "\r" || $line === "\r\n" || $line === "\n" || $line === '') {
129 3
            $line = false;
130
131 3
            return null;
132
        }
133
134
        //explode the line on the next delimiter character
135 15
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
136
137
        //if this is the end of line remove line breaks
138 15
        if (false === $line) {
139 12
            $content = rtrim($content, "\r\n");
140
        }
141
142
        //remove whitespaces
143 15
        return trim($content, $this->trim_mask);
144
    }
145
146
    /**
147
     * Extract field with enclosure.
148
     *
149
     * @param bool|string $line
150
     *
151
     * @return null|string
152
     */
153 12
    private function extractEnclosedField(& $line)
154
    {
155
        //remove the first enclosure from the line if present to easily use explode
156 12
        if ($line[0] ?? '' === $this->enclosure) {
157 12
            $line = substr($line, 1);
158
        }
159
160
        //covers multiline fields
161 12
        $content = '';
162
        do {
163
            //explode the line on the next enclosure character found
164 12
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
165 12
            $content .= $buffer;
166 12
        } while (false === $line && $this->document->valid() && false !== ($line = $this->document->fgets()));
167
168
        //format the field content by removing double quoting if present
169 12
        $content = str_replace($this->enclosure.$this->enclosure, $this->enclosure, $content);
170
171
        //process the line if it is only a line-break or the empty string
172 12
        if ($line === false || $line === "\r" || $line === "\r\n" || $line === "\n" || $line === '') {
173 6
            $line = false;
174
175 6
            return rtrim($content, "\r\n");
176
        }
177
178
        //the field data is extracted since we have a delimiter
179 9
        if (($line[0] ?? '') === $this->delimiter) {
180 9
            $line = substr($line, 1);
181
182 9
            return $content;
183
        }
184
185
        //double quote content found
186 3
        if (($line[0] ?? '') === $this->enclosure) {
187 3
            $content .= '"'.$this->extractEnclosedField($line);
188
        }
189
190 3
        return $content;
191
    }
192
}
193