Completed
Push — master ( 39bb04...677cc4 )
by ignace nyamagana
02:22
created

RFC4180Iterator::extractEnclosedFieldContent()   B

Complexity

Conditions 6
Paths 18

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 6

Importance

Changes 0
Metric Value
cc 6
nc 18
nop 1
dl 0
loc 31
ccs 18
cts 18
cp 1
crap 6
rs 8.8017
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function rtrim;
28
use function sprintf;
29
use function str_replace;
30
use function substr;
31
use function trim;
32
33
/**
34
 * A RFC4180 Compliant Parser in Pure PHP.
35
 *
36
 * @see https://php.net/manual/en/function.fgetcsv.php
37
 * @see https://php.net/manual/en/function.fgets.php
38
 * @see https://tools.ietf.org/html/rfc4180
39
 * @see http://edoceo.com/utilitas/csv-file-format
40
 *
41
 * @package League.csv
42
 * @since   9.2.0
43
 * @author  Ignace Nyamagana Butera <[email protected]>
44
 * @internal used internally to produce RFC4180 compliant records
45
 */
46
final class RFC4180Iterator implements IteratorAggregate
47
{
48
    /**
49
     * @internal
50
     */
51
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
52
53
    /**
54
     * @var SplFileObject|Stream
55
     */
56
    private $document;
57
58
    /**
59
     * @var string
60
     */
61
    private $delimiter;
62
63
    /**
64
     * @var string
65
     */
66
    private $enclosure;
67
68
    /**
69
     * @var string
70
     */
71
    private $trim_mask;
72
73
    /**
74
     * New instance.
75
     *
76
     * @param SplFileObject|Stream $document
77
     */
78 6
    public function __construct($document)
79
    {
80 6
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
81 3
            throw new TypeError(sprintf(
82 3
                'Expected a %s or an SplFileObject object, % given',
83 3
                Stream::class,
84 3
                is_object($document) ? get_class($document) : gettype($document)
85
            ));
86
        }
87
88 3
        $this->document = $document;
89 3
    }
90
91
    /**
92
     * @inheritdoc
93
     *
94
     * Converts the stream into a CSV record iterator by extracting records one by one
95
     *
96
     * The returned record array is similar to the returned value of fgetcsv
97
     *
98
     * - If the line is empty the record will be an array with a single value equals to null
99
     * - Otherwise the array contains strings.
100
     */
101 27
    public function getIterator()
102
    {
103
        //initialisation
104 27
        list($this->delimiter, $this->enclosure, ) = $this->document->getCsvControl();
105 27
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
106 27
        $this->document->setFlags(0);
107 27
        $this->document->rewind();
108
        do {
109 27
            $record = [];
110 27
            $line = $this->document->fgets();
111
            do {
112 27
                $method = 'extractFieldContent';
113 27
                if (($line[0] ?? '') === $this->enclosure) {
114 21
                    $method = 'extractEnclosedFieldContent';
115
                }
116 27
                $record[] = $this->$method($line);
117 27
            } while (false !== $line);
118
119 27
            yield $record;
120 27
        } while ($this->document->valid());
121 27
    }
122
123
    /**
124
     * Extract field without enclosure as per RFC4180.
125
     *
126
     * - Leading and trailing whitespaces must be removed.
127
     * - trailing line-breaks must be removed.
128
     *
129
     * @param bool|string $line
130
     *
131
     * @return null|string
132
     */
133 27
    private function extractFieldContent(&$line)
134
    {
135 27
        if (in_array($line, self::FIELD_BREAKS, true)) {
136 3
            $line = false;
137
138 3
            return null;
139
        }
140
141 24
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
142 24
        if (false === $line) {
143 18
            return trim(rtrim($content, "\r\n"), $this->trim_mask);
144
        }
145
146 24
        return trim($content, $this->trim_mask);
147
    }
148
149
    /**
150
     * Extract field with enclosure as per RFC4180.
151
     *
152
     * - Field content can spread on multiple document lines.
153
     * - Content inside enclosure must be preserved.
154
     * - Double enclosure sequence must be replaced by single enclosure character.
155
     * - Trailing line break must be removed if they are not part of the field content.
156
     * - Invalid field do not throw as per fgetcsv behavior.
157
     *
158
     * @param bool|string $line
159
     *
160
     * @return null|string
161
     */
162 21
    private function extractEnclosedFieldContent(&$line)
163
    {
164 21
        if (($line[0] ?? '') === $this->enclosure) {
165 21
            $line = substr($line, 1);
166
        }
167
168 21
        $content = '';
169 21
        while (false !== $line) {
170 21
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
171 21
            $content .= $buffer;
172 21
            if (false !== $line) {
173 18
                break;
174
            }
175 12
            $line = $this->document->fgets();
176
        }
177
178 21
        if (in_array($line, self::FIELD_BREAKS, true)) {
179 9
            $line = false;
180
181 9
            return rtrim($content, "\r\n");
182
        }
183
184 18
        $char = $line[0] ?? '';
185 18
        if ($char === $this->delimiter) {
186 15
            $line = substr($line, 1);
187
188 15
            return $content;
189
        }
190
191 12
        return $content.$this->enclosure.$this->extractEnclosedFieldContent($line);
192
    }
193
}
194