Completed
Push — master ( 778a24...7e8982 )
by ignace nyamagana
03:09
created

RFC4180Parser::extractEnclosedFieldContent()   B

Complexity

Conditions 7
Paths 24

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
nc 24
nop 1
dl 0
loc 35
ccs 20
cts 20
cp 1
crap 7
rs 8.4266
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use IteratorAggregate;
20
use SplFileObject;
21
use TypeError;
22
use function explode;
23
use function get_class;
24
use function gettype;
25
use function in_array;
26
use function is_object;
27
use function rtrim;
28
use function sprintf;
29
use function str_replace;
30
use function strlen;
31
use function substr;
32
use function trim;
33
34
/**
35
 * A RFC4180 Compliant Parser in Pure PHP.
36
 *
37
 * @see https://php.net/manual/en/function.fgetcsv.php
38
 * @see https://php.net/manual/en/function.fgets.php
39
 * @see https://tools.ietf.org/html/rfc4180
40
 * @see http://edoceo.com/utilitas/csv-file-format
41
 *
42
 * @internal used internally to produce RFC4180 compliant records
43
 */
44
final class RFC4180Parser implements IteratorAggregate
45
{
46
    /**
47
     * @internal
48
     */
49
    const FIELD_BREAKS = [false, '', "\r\n", "\n", "\r"];
50
51
    /**
52
     * @var SplFileObject|Stream
53
     */
54
    private $document;
55
56
    /**
57
     * @var string
58
     */
59
    private $delimiter;
60
61
    /**
62
     * @var string
63
     */
64
    private $enclosure;
65
66
    /**
67
     * @var string
68
     */
69
    private $trim_mask;
70
71
    /**
72
     * New instance.
73
     *
74
     * @param SplFileObject|Stream $document
75
     */
76 12
    public function __construct($document, string $delimiter = ',', string $enclosure = '"')
77
    {
78 12
        if (!$document instanceof Stream && !$document instanceof SplFileObject) {
79 3
            throw new TypeError(sprintf(
80 3
                'Expected a %s or an SplFileObject object, % given',
81 3
                Stream::class,
82 3
                is_object($document) ? get_class($document) : gettype($document)
83
            ));
84
        }
85
86 9
        if (1 !== strlen($delimiter)) {
87 3
            throw new Exception(sprintf('%s() expects delimiter to be a single character %s given', __METHOD__, $delimiter));
88
        }
89
90 6
        if (1 !== strlen($enclosure)) {
91 3
            throw new Exception(sprintf('%s() expects enclosure to be a single character %s given', __METHOD__, $enclosure));
92
        }
93
94 3
        $this->document = $document;
95 3
        $this->delimiter = $delimiter;
96 3
        $this->enclosure = $enclosure;
97 3
        $this->trim_mask = str_replace([$this->delimiter, $this->enclosure], '', " \t\0\x0B");
98 3
    }
99
100
    /**
101
     * @inheritdoc
102
     *
103
     * Converts the stream into a CSV record iterator by extracting records one by one
104
     *
105
     * The returned record array is similar to the returned value of fgetcsv
106
     *
107
     * - If the line is empty the record will be an array with a single value equals to null
108
     * - Otherwise the array contains strings.
109
     */
110 30
    public function getIterator()
111
    {
112 30
        $this->document->setFlags(0);
113 30
        $this->document->rewind();
114
        do {
115 30
            $record = [];
116 30
            $line = $this->document->fgets();
117
            do {
118 30
                $method = 'extractFieldContent';
119 30
                if (($line[0] ?? '') === $this->enclosure) {
120 24
                    $method = 'extractEnclosedFieldContent';
121
                }
122 30
                $record[] = $this->$method($line);
123 30
            } while (false !== $line);
124
125 30
            yield $record;
126 30
        } while ($this->document->valid());
127 30
    }
128
129
    /**
130
     * Extract field without enclosure as per RFC4180.
131
     *
132
     * - Leading and trailing whitespaces must be removed.
133
     * - trailing line-breaks must be removed.
134
     *
135
     * @param bool|string $line
136
     *
137
     * @return null|string
138
     */
139 30
    private function extractFieldContent(&$line)
140
    {
141 30
        if (in_array($line, self::FIELD_BREAKS, true)) {
142 3
            $line = false;
143
144 3
            return null;
145
        }
146
147 27
        list($content, $line) = explode($this->delimiter, $line, 2) + [1 => false];
148 27
        if (false === $line) {
149 24
            return trim(rtrim($content, "\r\n"), $this->trim_mask);
150
        }
151
152 27
        return trim($content, $this->trim_mask);
153
    }
154
155
    /**
156
     * Extract field with enclosure as per RFC4180.
157
     *
158
     * - Field content can spread on multiple document lines.
159
     * - Content inside enclosure must be preserved.
160
     * - Double enclosure sequence must be replaced by single enclosure character.
161
     * - Trailing line break must be removed if they are not part of the field content.
162
     * - Invalid field do not throw as per fgetcsv behavior.
163
     *
164
     * @param bool|string $line
165
     */
166 24
    private function extractEnclosedFieldContent(&$line): string
167
    {
168 24
        if (($line[0] ?? '') === $this->enclosure) {
169 24
            $line = substr($line, 1);
170
        }
171
172 24
        $content = '';
173 24
        while (false !== $line) {
174 24
            list($buffer, $line) = explode($this->enclosure, $line, 2) + [1 => false];
175 24
            $content .= $buffer;
176 24
            if (false !== $line) {
177 21
                break;
178
            }
179 9
            $line = $this->document->fgets();
180
        }
181
182 24
        if (in_array($line, self::FIELD_BREAKS, true)) {
183 9
            $line = false;
184
185 9
            return rtrim($content, "\r\n");
186
        }
187
188 21
        $char = $line[0] ?? '';
189 21
        if ($char === $this->delimiter) {
190 15
            $line = substr($line, 1);
191
192 15
            return $content;
193
        }
194
195 15
        if ($char === $this->enclosure) {
196 9
            return $content.$this->enclosure.$this->extractEnclosedFieldContent($line);
197
        }
198
199 6
        return $content.$this->extractFieldContent($line);
200
    }
201
}
202