StreamTokeniserTest::testTokeniser()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 22
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 1 Features 1
Metric Value
c 4
b 1
f 1
dl 0
loc 22
rs 9.2
cc 2
eloc 13
nc 2
nop 3
1
<?php
2
/**
3
 * This file is part of graze/csv-token
4
 *
5
 * Copyright (c) 2016 Nature Delivered Ltd. <https://www.graze.com>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 *
10
 * @license https://github.com/graze/csv-token/blob/master/LICENSE.md
11
 * @link    https://github.com/graze/csv-token
12
 */
13
14
namespace Graze\CsvToken\Test\Unit;
15
16
use Graze\CsvToken\Csv\Bom;
17
use Graze\CsvToken\Csv\CsvConfiguration;
18
use Graze\CsvToken\Csv\CsvConfigurationInterface;
19
use Graze\CsvToken\Test\TestCase;
20
use Graze\CsvToken\Tokeniser\StreamTokeniser;
21
use Graze\CsvToken\Tokeniser\Token\Token;
22
use GuzzleHttp\Psr7\Stream;
23
24
class StreamTokeniserTest extends TestCase
25
{
26
    /**
27
     * @dataProvider tokeniserTestData
28
     *
29
     * @param CsvConfigurationInterface $config
30
     * @param string                    $csv
31
     * @param array                     $tokens
32
     */
33
    public function testTokeniser(CsvConfigurationInterface $config, $csv, array $tokens)
34
    {
35
        $tokeniser = new StreamTokeniser($config, $this->getStream($csv));
36
37
        /** @var Token[] $actual */
38
        $actual = iterator_to_array($tokeniser->getTokens());
39
40
        $tokensOnly = array_map(function ($token) {
41
            return [$token[0], $token[1]];
42
        }, $actual);
43
44
        static::assertEquals($tokens, $tokensOnly);
45
46
        $count = count($actual);
47
        for ($i = 1; $i < $count; $i++) {
48
            static::assertEquals(
49
                $actual[$i][2],
50
                $actual[$i - 1][2] + $actual[$i - 1][3],
51
                "There should be no missing gaps in the data"
52
            );
53
        }
54
    }
55
56
    /**
57
     * @return array
58
     */
59
    public function tokeniserTestData()
60
    {
61
        return [
62
            [
63
                new CsvConfiguration(),
64
                '"some","test","","data"',
65
                [
66
                    [Token::T_QUOTE, '"'],
67
                    [Token::T_CONTENT, 'some'],
68
                    [Token::T_QUOTE, '"'],
69
                    [Token::T_DELIMITER, ','],
70
                    [Token::T_QUOTE, '"'],
71
                    [Token::T_CONTENT, 'test'],
72
                    [Token::T_QUOTE, '"'],
73
                    [Token::T_DELIMITER, ','],
74
                    [Token::T_QUOTE, '"'],
75
                    [Token::T_QUOTE, '"'],
76
                    [Token::T_DELIMITER, ','],
77
                    [Token::T_QUOTE, '"'],
78
                    [Token::T_CONTENT, 'data'],
79
                    [Token::T_QUOTE, '"'],
80
                ],
81
            ],
82
            [
83
                new CsvConfiguration(),
84
                'some',
85
                [
86
                    [Token::T_CONTENT, 'some'],
87
                ],
88
            ],
89
            [
90
                new CsvConfiguration([
91
                    CsvConfiguration::OPTION_DOUBLE_QUOTE => true,
92
                ]),
93
                '"end""","""start","""both""","","""",""""""""',
94
                [
95
                    [Token::T_QUOTE, '"'],
96
                    [Token::T_CONTENT, 'end'],
97
                    [Token::T_DOUBLE_QUOTE, '""'],
98
                    [Token::T_QUOTE, '"'],
99
                    [Token::T_DELIMITER, ','],
100
                    [Token::T_QUOTE, '"'],
101
                    [Token::T_DOUBLE_QUOTE, '""'],
102
                    [Token::T_CONTENT, 'start'],
103
                    [Token::T_QUOTE, '"'],
104
                    [Token::T_DELIMITER, ','],
105
                    [Token::T_QUOTE, '"'],
106
                    [Token::T_DOUBLE_QUOTE, '""'],
107
                    [Token::T_CONTENT, 'both'],
108
                    [Token::T_DOUBLE_QUOTE, '""'],
109
                    [Token::T_QUOTE, '"'],
110
                    [Token::T_DELIMITER, ','],
111
                    [Token::T_QUOTE, '"'],
112
                    [Token::T_QUOTE, '"'],
113
                    [Token::T_DELIMITER, ','],
114
                    [Token::T_QUOTE, '"'],
115
                    [Token::T_DOUBLE_QUOTE, '""'],
116
                    [Token::T_QUOTE, '"'],
117
                    [Token::T_DELIMITER, ','],
118
                    [Token::T_QUOTE, '"'],
119
                    [Token::T_DOUBLE_QUOTE, '""'],
120
                    [Token::T_DOUBLE_QUOTE, '""'],
121
                    [Token::T_DOUBLE_QUOTE, '""'],
122
                    [Token::T_QUOTE, '"'],
123
                ],
124
            ],
125
            [
126
                new CsvConfiguration(),
127
                '"some",test,"with \" escape","\\\\"',
128
                [
129
                    [Token::T_QUOTE, '"'],
130
                    [Token::T_CONTENT, 'some'],
131
                    [Token::T_QUOTE, '"'],
132
                    [Token::T_DELIMITER, ','],
133
                    [Token::T_CONTENT, 'test'],
134
                    [Token::T_DELIMITER, ','],
135
                    [Token::T_QUOTE, '"'],
136
                    [Token::T_CONTENT, 'with '],
137
                    [Token::T_ESCAPE, '\\'],
138
                    [Token::T_CONTENT, '" escape'],
139
                    [Token::T_QUOTE, '"'],
140
                    [Token::T_DELIMITER, ','],
141
                    [Token::T_QUOTE, '"'],
142
                    [Token::T_ESCAPE, '\\'],
143
                    [Token::T_CONTENT, '\\'],
144
                    [Token::T_QUOTE, '"'],
145
                ],
146
            ],
147
            [
148
                new CsvConfiguration([
149
                    CsvConfiguration::OPTION_DELIMITER    => '|',
150
                    CsvConfiguration::OPTION_QUOTE        => "'",
151
                    CsvConfiguration::OPTION_ESCAPE       => '\\',
152
                    CsvConfiguration::OPTION_DOUBLE_QUOTE => true,
153
                    CsvConfiguration::OPTION_NEW_LINES    => ['---'],
154
                    CsvConfiguration::OPTION_NULL         => '\\N',
155
                ]),
156
                "'some'|text|'\\'here'|\\N|'with''quotes'---'another'|'line'",
157
                [
158
                    [Token::T_QUOTE, "'"],
159
                    [Token::T_CONTENT, 'some'],
160
                    [Token::T_QUOTE, "'"],
161
                    [Token::T_DELIMITER, '|'],
162
                    [Token::T_CONTENT, 'text'],
163
                    [Token::T_DELIMITER, '|'],
164
                    [Token::T_QUOTE, "'"],
165
                    [Token::T_ESCAPE, '\\'],
166
                    [Token::T_CONTENT, "'here"],
167
                    [Token::T_QUOTE, "'"],
168
                    [Token::T_DELIMITER, '|'],
169
                    [Token::T_NULL, '\\N'],
170
                    [Token::T_DELIMITER, '|'],
171
                    [Token::T_QUOTE, "'"],
172
                    [Token::T_CONTENT, 'with'],
173
                    [Token::T_DOUBLE_QUOTE, "''"],
174
                    [Token::T_CONTENT, 'quotes'],
175
                    [Token::T_QUOTE, "'"],
176
                    [Token::T_NEW_LINE, '---'],
177
                    [Token::T_QUOTE, "'"],
178
                    [Token::T_CONTENT, 'another'],
179
                    [Token::T_QUOTE, "'"],
180
                    [Token::T_DELIMITER, '|'],
181
                    [Token::T_QUOTE, "'"],
182
                    [Token::T_CONTENT, 'line'],
183
                    [Token::T_QUOTE, "'"],
184
                ],
185
            ],
186
            [
187
                new CsvConfiguration(),
188
                '"some","new' . "\n" . 'line",with\\' . "\n" . 'escaped,"in\\' . "\n" . 'quotes"',
189
                [
190
                    [Token::T_QUOTE, '"'],
191
                    [Token::T_CONTENT, 'some'],
192
                    [Token::T_QUOTE, '"'],
193
                    [Token::T_DELIMITER, ','],
194
                    [Token::T_QUOTE, '"'],
195
                    [Token::T_CONTENT, 'new' . "\n" . 'line'],
196
                    [Token::T_QUOTE, '"'],
197
                    [Token::T_DELIMITER, ','],
198
                    [Token::T_CONTENT, 'with'],
199
                    [Token::T_ESCAPE, '\\'],
200
                    [Token::T_CONTENT, "\n" . 'escaped'],
201
                    [Token::T_DELIMITER, ','],
202
                    [Token::T_QUOTE, '"'],
203
                    [Token::T_CONTENT, 'in'],
204
                    [Token::T_ESCAPE, '\\'],
205
                    [Token::T_CONTENT, "\n" . 'quotes'],
206
                    [Token::T_QUOTE, '"'],
207
                ],
208
            ],
209
            [
210
                new CsvConfiguration(),
211
                '',
212
                [],
213
            ],
214
            [
215
                new CsvConfiguration(),
216
                "한국말\n조선말,한국말",
217
                [
218
                    [Token::T_CONTENT, '한국말'],
219
                    [Token::T_NEW_LINE, "\n"],
220
                    [Token::T_CONTENT, '조선말'],
221
                    [Token::T_DELIMITER, ','],
222
                    [Token::T_CONTENT, '한국말'],
223
                ],
224
            ],
225
            [
226
                new CsvConfiguration([]),
227
                'text\\Nthing,\\Nstart,end\\N,\\N,"\\N"',
228
                [
229
                    [Token::T_CONTENT, 'text'],
230
                    [Token::T_NULL, '\\N'],
231
                    [Token::T_CONTENT, 'thing'],
232
                    [Token::T_DELIMITER, ','],
233
                    [Token::T_NULL, '\N'],
234
                    [Token::T_CONTENT, 'start'],
235
                    [Token::T_DELIMITER, ','],
236
                    [Token::T_CONTENT, 'end'],
237
                    [Token::T_NULL, '\\N'],
238
                    [Token::T_DELIMITER, ','],
239
                    [Token::T_NULL, '\\N'],
240
                    [Token::T_DELIMITER, ','],
241
                    [Token::T_QUOTE, '"'],
242
                    [Token::T_ESCAPE, '\\'],
243
                    [Token::T_CONTENT, 'N'],
244
                    [Token::T_QUOTE, '"'],
245
                ],
246
            ],
247
            [
248
                new CsvConfiguration([
249
                    CsvConfiguration::OPTION_QUOTE => '',
250
                ]),
251
                'text,stuff"and,things',
252
                [
253
                    [Token::T_CONTENT, 'text'],
254
                    [Token::T_DELIMITER, ','],
255
                    [Token::T_CONTENT, 'stuff"and'],
256
                    [Token::T_DELIMITER, ','],
257
                    [Token::T_CONTENT, 'things'],
258
                ],
259
            ],
260
            [
261
                new CsvConfiguration([
262
                    CsvConfiguration::OPTION_ESCAPE => '',
263
                ]),
264
                '"some","text,","here\\"',
265
                [
266
                    [Token::T_QUOTE, '"'],
267
                    [Token::T_CONTENT, 'some'],
268
                    [Token::T_QUOTE, '"'],
269
                    [Token::T_DELIMITER, ','],
270
                    [Token::T_QUOTE, '"'],
271
                    [Token::T_CONTENT, 'text,'],
272
                    [Token::T_QUOTE, '"'],
273
                    [Token::T_DELIMITER, ','],
274
                    [Token::T_QUOTE, '"'],
275
                    [Token::T_CONTENT, 'here\\'],
276
                    [Token::T_QUOTE, '"'],
277
                ],
278
            ],
279
            [
280
                new CsvConfiguration(),
281
                "\xEF\xBB\xBF" . mb_convert_encoding('"some","text","here"', 'utf8'),
282
                [
283
                    [Token::T_BOM, "\xEF\xBB\xBF"],
284
                    [Token::T_QUOTE, '"'],
285
                    [Token::T_CONTENT, 'some'],
286
                    [Token::T_QUOTE, '"'],
287
                    [Token::T_DELIMITER, ','],
288
                    [Token::T_QUOTE, '"'],
289
                    [Token::T_CONTENT, 'text'],
290
                    [Token::T_QUOTE, '"'],
291
                    [Token::T_DELIMITER, ','],
292
                    [Token::T_QUOTE, '"'],
293
                    [Token::T_CONTENT, 'here'],
294
                    [Token::T_QUOTE, '"'],
295
                ],
296
            ],
297
            [
298
                new CsvConfiguration(),
299
                Bom::BOM_UTF32_BE . mb_convert_encoding('"some","text","here"', 'UTF-32BE'),
300
                [
301
                    [Token::T_BOM, "\x00\x00\xFE\xFF"],
302
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-32BE')],
303
                    [Token::T_CONTENT, mb_convert_encoding('some', 'UTF-32BE')],
304
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-32BE')],
305
                    [Token::T_DELIMITER, mb_convert_encoding(',', 'UTF-32BE')],
306
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-32BE')],
307
                    [Token::T_CONTENT, mb_convert_encoding('text', 'UTF-32BE')],
308
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-32BE')],
309
                    [Token::T_DELIMITER, mb_convert_encoding(',', 'UTF-32BE')],
310
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-32BE')],
311
                    [Token::T_CONTENT, mb_convert_encoding('here', 'UTF-32BE')],
312
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-32BE')],
313
                ],
314
            ],
315
            [
316
                new CsvConfiguration([
317
                    CsvConfiguration::OPTION_ENCODING => 'UTF-16',
318
                ]),
319
                mb_convert_encoding('"sõme","tēxt","hêre"', 'UTF-16'),
320
                [
321
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16')],
322
                    [Token::T_CONTENT, mb_convert_encoding('sõme', 'UTF-16')],
323
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16')],
324
                    [Token::T_DELIMITER, mb_convert_encoding(',', 'UTF-16')],
325
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16')],
326
                    [Token::T_CONTENT, mb_convert_encoding('tēxt', 'UTF-16')],
327
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16')],
328
                    [Token::T_DELIMITER, mb_convert_encoding(',', 'UTF-16')],
329
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16')],
330
                    [Token::T_CONTENT, mb_convert_encoding('hêre', 'UTF-16')],
331
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16')],
332
                ],
333
            ],
334
            [
335
                new CsvConfiguration([
336
                    CsvConfiguration::OPTION_BOMS => [Bom::BOM_UTF16_BE],
337
                ]),
338
                Bom::BOM_UTF16_BE . mb_convert_encoding('"sõme","tēxt","hêre"', 'UTF-16BE'),
339
                [
340
                    [Token::T_BOM, Bom::BOM_UTF16_BE],
341
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16BE')],
342
                    [Token::T_CONTENT, mb_convert_encoding('sõme', 'UTF-16BE')],
343
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16BE')],
344
                    [Token::T_DELIMITER, mb_convert_encoding(',', 'UTF-16BE')],
345
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16BE')],
346
                    [Token::T_CONTENT, mb_convert_encoding('tēxt', 'UTF-16BE')],
347
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16BE')],
348
                    [Token::T_DELIMITER, mb_convert_encoding(',', 'UTF-16BE')],
349
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16BE')],
350
                    [Token::T_CONTENT, mb_convert_encoding('hêre', 'UTF-16BE')],
351
                    [Token::T_QUOTE, mb_convert_encoding('"', 'UTF-16BE')],
352
                ],
353
            ],
354
        ];
355
    }
356
}
357