FullTest::testTokenize()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 6
c 1
b 0
f 0
nc 1
nop 2
dl 0
loc 8
rs 10
1
<?php declare(strict_types = 1);
2
3
namespace Apicart\FQL\Tests\Tokenizer;
4
5
use Apicart\FQL\Token\Token\GroupBegin;
6
use Apicart\FQL\Token\Token\GroupBegin as GroupBeginToken;
7
use Apicart\FQL\Token\Token\Phrase as PhraseToken;
8
use Apicart\FQL\Token\Token\Range as RangeToken;
9
use Apicart\FQL\Token\Token\Tag as TagToken;
10
use Apicart\FQL\Token\Token\User as UserToken;
11
use Apicart\FQL\Token\Token\Word as WordToken;
12
use Apicart\FQL\Tokenizer\AbstractTokenExtractor;
13
use Apicart\FQL\Tokenizer\Full;
14
use Apicart\FQL\Tokenizer\Tokenizer;
15
use Apicart\FQL\Value\Token;
16
use Apicart\FQL\Value\TokenSequence;
17
use PHPUnit\Framework\TestCase;
18
19
class FullTest extends TestCase
20
{
21
22
    public function providerForTestTokenize(): array
23
    {
24
        return [
25
            [" \n", [new Token(Tokenizer::TOKEN_WHITESPACE, " \n", 0)]],
26
            ['word', [new WordToken('word', 0, '', 'word')]],
27
            ["word\n", [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)]],
28
            ['word ', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)]],
29
            ['word(', [new WordToken('word', 0, '', 'word'), new GroupBeginToken('(', 4, '(', null)]],
30
            ['word)', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]],
31
            ['šđčćž', [new WordToken('šđčćž', 0, '', 'šđčćž')]],
32
            [
33
                $jajeNaOko = mb_convert_encoding('&#x1F373;', 'UTF-8', 'HTML-ENTITIES'),
34
                [new WordToken($jajeNaOko, 0, '', $jajeNaOko)],
0 ignored issues
show
Bug introduced by
It seems like $jajeNaOko can also be of type array; however, parameter $lexeme of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

34
                [new WordToken(/** @scrutinizer ignore-type */ $jajeNaOko, 0, '', $jajeNaOko)],
Loading history...
Bug introduced by
It seems like $jajeNaOko can also be of type array; however, parameter $word of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

34
                [new WordToken($jajeNaOko, 0, '', /** @scrutinizer ignore-type */ $jajeNaOko)],
Loading history...
35
            ],
36
            [
37
                $blah = mb_convert_encoding(
38
                    '&#x1F469;&#x200D;&#x1F469;&#x200D;&#x1F467;&#x200D;&#x1F467;',
39
                    'UTF-8',
40
                    'HTML-ENTITIES'
41
                ),
42
                [new WordToken($blah, 0, '', $blah)],
43
            ],
44
            ['word-word', [new WordToken('word-word', 0, '', 'word-word')]],
45
            [
46
                "word\nword",
47
                [
48
                    new WordToken('word', 0, '', 'word'),
49
                    new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4),
50
                    new WordToken('word', 5, '', 'word'),
51
                ],
52
            ],
53
            [
54
                'word word',
55
                [
56
                    new WordToken('word', 0, '', 'word'),
57
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4),
58
                    new WordToken('word', 5, '', 'word'),
59
                ],
60
            ],
61
            ['word\\ word', [new WordToken('word\\ word', 0, '', 'word word')]],
62
            ['[a  TO b]', [new RangeToken('[a  TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive')]],
63
            ['[a TO b}', [new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive')]],
64
            ['{a TO b}', [new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive')]],
65
            ['{a TO b]', [new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive')]],
66
            [
67
                '[2017-01-01 TO 2017-01-05]',
68
                [
69
                    new RangeToken(
70
                        '[2017-01-01 TO 2017-01-05]',
71
                        0,
72
                        '',
73
                        '2017-01-01',
74
                        '2017-01-05',
75
                        'inclusive',
76
                        'inclusive'
77
                    ),
78
                ],
79
            ],
80
            ['[20    TO   *]', [new RangeToken('[20    TO   *]', 0, '', '20', '*', 'inclusive', 'inclusive')]],
81
            ['[* TO 20]', [new RangeToken('[* TO 20]', 0, '', '*', '20', 'inclusive', 'inclusive')]],
82
            ['"phrase"', [new PhraseToken('"phrase"', 0, '', '"', 'phrase')]],
83
            [
84
                '"phrase" "phrase"',
85
                [
86
                    new PhraseToken('"phrase"', 0, '', '"', 'phrase'),
87
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8),
88
                    new PhraseToken('"phrase"', 9, '', '"', 'phrase'),
89
                ],
90
            ],
91
            ["\"phrase\nphrase\"", [new PhraseToken("\"phrase\nphrase\"", 0, '', '"', "phrase\nphrase")]],
92
            ["'phrase'", [new WordToken("'phrase'", 0, '', "'phrase'")]],
93
            [
94
                "'phrase' 'phrase'",
95
                [
96
                    new WordToken("'phrase'", 0, '', "'phrase'"),
97
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8),
98
                    new WordToken("'phrase'", 9, '', "'phrase'"),
99
                ],
100
            ],
101
            [
102
                "'phrase\nphrase'",
103
                [
104
                    new WordToken("'phrase", 0, '', "'phrase"),
105
                    new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 7),
106
                    new WordToken("phrase'", 8, '', "phrase'"),
107
                ],
108
            ],
109
            ['"phrase\"phrase"', [new PhraseToken('"phrase\"phrase"', 0, '', '"', 'phrase"phrase')]],
110
            ["'phrase\\'phrase'", [new WordToken("'phrase\\'phrase'", 0, '', "'phrase\\'phrase'")]],
111
            ['"phrase\'phrase"', [new PhraseToken('"phrase\'phrase"', 0, '', '"', 'phrase\'phrase')]],
112
            [
113
                "'phrase\"phrase'",
114
                [
115
                    new WordToken("'phrase", 0, '', "'phrase"),
116
                    new Token(Tokenizer::TOKEN_BAILOUT, '"', 7),
117
                    new WordToken("phrase'", 8, '', "phrase'"),
118
                ],
119
            ],
120
            ['\"not_phrase\"', [new WordToken('\"not_phrase\"', 0, '', '"not_phrase"')]],
121
            ["\\'not_phrase\\'", [new WordToken("\\'not_phrase\\'", 0, '', "\\'not_phrase\\'")]],
122
            [
123
                '"phrase + - ! ( ) AND OR NOT \\ phrase"',
124
                [
125
                    new PhraseToken(
126
                        '"phrase + - ! ( ) AND OR NOT \\ phrase"',
127
                        0,
128
                        '',
129
                        '"',
130
                        'phrase + - ! ( ) AND OR NOT \\ phrase'
131
                    ),
132
                ],
133
            ],
134
            [
135
                "'word + - ! ( ) AND OR NOT \\ word'",
136
                [
137
                    new WordToken("'word", 0, '', "'word"),
138
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
139
                    new Token(Tokenizer::TOKEN_MANDATORY, '+', 6),
140
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7),
141
                    new Token(Tokenizer::TOKEN_PROHIBITED, '-', 8),
142
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 9),
143
                    new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 10),
144
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11),
145
                    new GroupBegin('(', 12, '(', ''),
146
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 13),
147
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 14),
148
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 15),
149
                    new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 16),
150
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 19),
151
                    new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 20),
152
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 22),
153
                    new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 23),
154
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 26),
155
                    new WordToken("\\ word'", 27, '', " word'"),
156
                ],
157
            ],
158
            [
159
                '"phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase"',
160
                [
161
                    new PhraseToken(
162
                        '"phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase"',
163
                        0,
164
                        '',
165
                        '"',
166
                        'phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase'
167
                    ),
168
                ],
169
            ],
170
            [
171
                "'word \\+ \\- \\! \\( \\) \\AND \\OR \\NOT \\\\ word'",
172
                [
173
                    new WordToken("'word", 0, '', "'word"),
174
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
175
                    new WordToken('\\+', 6, '', '+'),
176
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8),
177
                    new WordToken('\\-', 9, '', '-'),
178
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11),
179
                    new WordToken('\\!', 12, '', '!'),
180
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 14),
181
                    new WordToken('\\(', 15, '', '('),
182
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 17),
183
                    new WordToken('\\)', 18, '', ')'),
184
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 20),
185
                    new WordToken('\\AND', 21, '', '\AND'),
186
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 25),
187
                    new WordToken('\\OR', 26, '', '\OR'),
188
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 29),
189
                    new WordToken('\\NOT', 30, '', '\NOT'),
190
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 34),
191
                    new WordToken('\\\\', 35, '', '\\'),
192
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 37),
193
                    new WordToken("word'", 38, '', "word'"),
194
                ],
195
            ],
196
            ['#tag', [new TagToken('#tag', 0, '#', 'tag')]],
197
            ['\#tag', [new WordToken('\#tag', 0, '', '#tag')]],
198
            ['#tagšđčćž', [new WordToken('#tagšđčćž', 0, '', '#tagšđčćž')]],
199
            ['#_tag-tag', [new TagToken('#_tag-tag', 0, '#', '_tag-tag')]],
200
            ['#-not-tag', [new WordToken('#-not-tag', 0, '', '#-not-tag')]],
201
            ['#tag+', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_MANDATORY, '+', 4)]],
202
            ['#tag-', [new TagToken('#tag-', 0, '#', 'tag-')]],
203
            ['#tag!', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 4)]],
204
            ["#tag\n", [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)]],
205
            ['#tag ', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)]],
206
            ['#tag(', [new TagToken('#tag', 0, '#', 'tag'), new GroupBeginToken('(', 4, '(', null)]],
207
            ['#tag)', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]],
208
            ['@user', [new UserToken('@user', 0, '@', 'user')]],
209
            ['@user.user', [new UserToken('@user.user', 0, '@', 'user.user')]],
210
            ['\@user', [new WordToken('\@user', 0, '', '@user')]],
211
            ['@useršđčćž', [new WordToken('@useršđčćž', 0, '', '@useršđčćž')]],
212
            ['@_user-user', [new UserToken('@_user-user', 0, '@', '_user-user')]],
213
            ['@-not-user', [new WordToken('@-not-user', 0, '', '@-not-user')]],
214
            ['@user+', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_MANDATORY, '+', 5)]],
215
            ['@user-', [new UserToken('@user-', 0, '@', 'user-')]],
216
            ['@user!', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 5)]],
217
            ["@user\n", [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 5)]],
218
            ['@user ', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5)]],
219
            ['@user(', [new UserToken('@user', 0, '@', 'user'), new GroupBeginToken('(', 5, '(', null)]],
220
            ['@user)', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 5)]],
221
            ['domain:', [new WordToken('domain:', 0, '', 'domain:')]],
222
            ['some.domain:', [new WordToken('some.domain:', 0, '', 'some.domain:')]],
223
            ['domain:domain:', [new WordToken('domain:domain:', 0, 'domain', 'domain:')]],
224
            ['some.domain:some.domain:', [new WordToken('some.domain:some.domain:', 0, 'some.domain', 'some.domain:')]],
225
            [
226
                'domain:domain:domain:domain',
227
                [new WordToken('domain:domain:domain:domain', 0, 'domain', 'domain:domain:domain')],
228
            ],
229
            ['domain\:', [new WordToken('domain\:', 0, '', 'domain:')]],
230
            ['domain\::', [new WordToken('domain\::', 0, '', 'domain::')]],
231
            ['domain:word', [new WordToken('domain:word', 0, 'domain', 'word')]],
232
            ['domain\:word', [new WordToken('domain\:word', 0, '', 'domain:word')]],
233
            ['domain:"phrase"', [new PhraseToken('domain:"phrase"', 0, 'domain', '"', 'phrase')]],
234
            ['some.domain:"phrase"', [new PhraseToken('some.domain:"phrase"', 0, 'some.domain', '"', 'phrase')]],
235
            [
236
                'domain\:"phrase"',
237
                [new WordToken('domain\:', 0, '', 'domain:'), new PhraseToken('"phrase"', 8, '', '"', 'phrase')],
238
            ],
239
            [
240
                'domain:(one)',
241
                [
242
                    new GroupBeginToken('domain:(', 0, '(', 'domain'),
243
                    new WordToken('one', 8, '', 'one'),
244
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 11),
245
                ],
246
            ],
247
            [
248
                'some.domain:(one)',
249
                [
250
                    new GroupBeginToken('some.domain:(', 0, '(', 'some.domain'),
251
                    new WordToken('one', 13, '', 'one'),
252
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 16),
253
                ],
254
            ],
255
            [
256
                'one AND two',
257
                [
258
                    new WordToken('one', 0, '', 'one'),
259
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3),
260
                    new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 4),
261
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7),
262
                    new WordToken('two', 8, '', 'two'),
263
                ],
264
            ],
265
            [
266
                'one && two',
267
                [
268
                    new WordToken('one', 0, '', 'one'),
269
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3),
270
                    new Token(Tokenizer::TOKEN_LOGICAL_AND, '&&', 4),
271
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6),
272
                    new WordToken('two', 7, '', 'two'),
273
                ],
274
            ],
275
            [
276
                'one OR two',
277
                [
278
                    new WordToken('one', 0, '', 'one'),
279
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3),
280
                    new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 4),
281
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6),
282
                    new WordToken('two', 7, '', 'two'),
283
                ],
284
            ],
285
            [
286
                'one || two',
287
                [
288
                    new WordToken('one', 0, '', 'one'),
289
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3),
290
                    new Token(Tokenizer::TOKEN_LOGICAL_OR, '||', 4),
291
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6),
292
                    new WordToken('two', 7, '', 'two'),
293
                ],
294
            ],
295
            [
296
                'one NOT two',
297
                [
298
                    new WordToken('one', 0, '', 'one'),
299
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3),
300
                    new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 4),
301
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7),
302
                    new WordToken('two', 8, '', 'two'),
303
                ],
304
            ],
305
            ['AND', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0)]],
306
            ['ANDword', [new WordToken('ANDword', 0, '', 'ANDword')]],
307
            ['wordAND', [new WordToken('wordAND', 0, '', 'wordAND')]],
308
            [
309
                'AND+',
310
                [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 3)],
311
            ],
312
            ['AND\+', [new WordToken('AND\+', 0, '', 'AND+')]],
313
            [
314
                '+AND',
315
                [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)],
316
            ],
317
            [
318
                'AND-',
319
                [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 3)],
320
            ],
321
            ['AND\-', [new WordToken('AND\-', 0, '', 'AND-')]],
322
            [
323
                '-AND',
324
                [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)],
325
            ],
326
            [
327
                'AND!',
328
                [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 3)],
329
            ],
330
            ['AND\!', [new WordToken('AND\!', 0, '', 'AND!')]],
331
            [
332
                '!AND',
333
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)],
334
            ],
335
            [
336
                "AND\n",
337
                [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 3)],
338
            ],
339
            [
340
                'AND ',
341
                [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3)],
342
            ],
343
            ['AND(', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new GroupBeginToken('(', 3, '(', null)]],
344
            [
345
                'AND)',
346
                [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 3)],
347
            ],
348
            ['ORword', [new WordToken('ORword', 0, '', 'ORword')]],
349
            ['ORword', [new WordToken('ORword', 0, '', 'ORword')]],
350
            ['OR', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0)]],
351
            ['OR+', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 2)]],
352
            ['OR\+', [new WordToken('OR\+', 0, '', 'OR+')]],
353
            ['+OR', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)]],
354
            ['OR-', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 2)]],
355
            ['OR\+', [new WordToken('OR\+', 0, '', 'OR+')]],
356
            ['-OR', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)]],
357
            [
358
                'OR!',
359
                [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 2)],
360
            ],
361
            ['OR\!', [new WordToken('OR\!', 0, '', 'OR!')]],
362
            [
363
                '!OR',
364
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)],
365
            ],
366
            [
367
                "OR\n",
368
                [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 2)],
369
            ],
370
            ['OR ', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2)]],
371
            ['OR(', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new GroupBeginToken('(', 2, '(', null)]],
372
            ['OR)', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 2)]],
373
            ['NOT', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0)]],
374
            ['NOTword', [new WordToken('NOTword', 0, '', 'NOTword')]],
375
            ['wordNOT', [new WordToken('wordNOT', 0, '', 'wordNOT')]],
376
            [
377
                'NOT+',
378
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 3)],
379
            ],
380
            [
381
                '+NOT',
382
                [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)],
383
            ],
384
            [
385
                'NOT-',
386
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 3)],
387
            ],
388
            [
389
                '-NOT',
390
                [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)],
391
            ],
392
            [
393
                'NOT!',
394
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 3)],
395
            ],
396
            [
397
                '!NOT',
398
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)],
399
            ],
400
            [
401
                "NOT\n",
402
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 3)],
403
            ],
404
            [
405
                'NOT ',
406
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3)],
407
            ],
408
            ['NOT(', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new GroupBeginToken('(', 3, '(', null)]],
409
            [
410
                'NOT)',
411
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 3)],
412
            ],
413
            ['+', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0)]],
414
            ['++', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 1)]],
415
            ['-', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0)]],
416
            ['--', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 1)]],
417
            ['!', [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0)]],
418
            [
419
                '!!',
420
                [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 1)],
421
            ],
422
            ['+word', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new WordToken('word', 1, '', 'word')]],
423
            ['-word', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new WordToken('word', 1, '', 'word')]],
424
            ['!word', [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new WordToken('word', 1, '', 'word')]],
425
            ['(word', [new GroupBeginToken('(', 0, '(', null), new WordToken('word', 1, '', 'word')]],
426
            [')word', [new Token(Tokenizer::TOKEN_GROUP_END, ')', 0), new WordToken('word', 1, '', 'word')]],
427
            ['word+', [new WordToken('word+', 0, '', 'word+')]],
428
            ['word-', [new WordToken('word-', 0, '', 'word-')]],
429
            ['word!', [new WordToken('word!', 0, '', 'word!')]],
430
            ['word(', [new WordToken('word', 0, '', 'word'), new GroupBeginToken('(', 4, '(', null)]],
431
            ['word)', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]],
432
            ['one+two+', [new WordToken('one+two+', 0, '', 'one+two+')]],
433
            ['one-two-', [new WordToken('one-two-', 0, '', 'one-two-')]],
434
            ['one!two!', [new WordToken('one!two!', 0, '', 'one!two!')]],
435
            [
436
                'one(two(',
437
                [
438
                    new WordToken('one', 0, '', 'one'),
439
                    new GroupBeginToken('(', 3, '(', null),
440
                    new WordToken('two', 4, '', 'two'),
441
                    new GroupBeginToken('(', 7, '(', null),
442
                ],
443
            ],
444
            [
445
                'one)two)',
446
                [
447
                    new WordToken('one', 0, '', 'one'),
448
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 3),
449
                    new WordToken('two', 4, '', 'two'),
450
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 7),
451
                ],
452
            ],
453
            ['word\+', [new WordToken('word\+', 0, '', 'word+')]],
454
            ['word\-', [new WordToken('word\-', 0, '', 'word-')]],
455
            ['word\!', [new WordToken('word\!', 0, '', 'word!')]],
456
            ['word\(', [new WordToken('word\(', 0, '', 'word(')]],
457
            ['word\)', [new WordToken('word\)', 0, '', 'word)')]],
458
            ['\+word', [new WordToken('\+word', 0, '', '+word')]],
459
            ['\-word', [new WordToken('\-word', 0, '', '-word')]],
460
            ['\!word', [new WordToken('\!word', 0, '', '!word')]],
461
            ['\(word', [new WordToken('\(word', 0, '', '(word')]],
462
            ['\)word', [new WordToken('\)word', 0, '', ')word')]],
463
            ['one\+two\+', [new WordToken('one\+two\+', 0, '', 'one+two+')]],
464
            ['one\-two\-', [new WordToken('one\-two\-', 0, '', 'one-two-')]],
465
            ['one\!two\!', [new WordToken('one\!two\!', 0, '', 'one!two!')]],
466
            ['one\(two\(', [new WordToken('one\(two\(', 0, '', 'one(two(')]],
467
            ['one\)two\)', [new WordToken('one\)two\)', 0, '', 'one)two)')]],
468
            [
469
                'one\\\\\)two\\\\\(one\\\\\+two\\\\\-one\\\\\!two',
470
                [
471
                    new WordToken(
472
                        'one\\\\\)two\\\\\(one\\\\\+two\\\\\-one\\\\\!two',
473
                        0,
474
                        '',
475
                        'one\)two\(one\+two\-one\!two'
476
                    ),
477
                ],
478
            ],
479
            [
480
                'one\\\\)two\\\\(one\\\\+two\\\\-one\\\\!two',
481
                [
482
                    new WordToken('one\\\\', 0, '', 'one\\'),
483
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 5),
484
                    new WordToken('two\\\\', 6, '', 'two\\'),
485
                    new GroupBeginToken('(', 11, '(', null),
486
                    new WordToken('one\\\\+two\\\\-one\\\\!two', 12, '', 'one\+two\-one\!two'),
487
                ],
488
            ],
489
            ['one+two-one!two', [new WordToken('one+two-one!two', 0, '', 'one+two-one!two')]],
490
            ['one\\\'two', [new WordToken('one\\\'two', 0, '', "one\\'two")]],
491
            ['one\\"two', [new WordToken('one\\"two', 0, '', 'one"two')]],
492
            ['\\', [new WordToken('\\', 0, '', '\\')]],
493
            ['one\\two', [new WordToken('one\\two', 0, '', 'one\\two')]],
494
            ['one\\\\+\\-\\!\\(\\)two', [new WordToken('one\\\\+\\-\\!\\(\\)two', 0, '', 'one\\+-!()two')]],
495
            ['\\\\', [new WordToken('\\\\', 0, '', '\\')]],
496
            [
497
                '(type:)',
498
                [
499
                    new GroupBeginToken('(', 0, '(', null),
500
                    new WordToken('type:', 1, '', 'type:'),
501
                    new Token(Tokenizer::TOKEN_GROUP_END, ')', 6),
502
                ],
503
            ],
504
            [
505
                'type: AND',
506
                [
507
                    new WordToken('type:', 0, '', 'type:'),
508
                    new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
509
                    new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 6),
510
                ],
511
            ],
512
            ["word'", [new WordToken("word'", 0, '', "word'")]],
513
            ['one\'two', [new WordToken("one'two", 0, '', "one'two")]],
514
            ["AND'", [new WordToken("AND'", 0, '', "AND'")]],
515
            ["OR'", [new WordToken("OR'", 0, '', "OR'")]],
516
            ["NOT'", [new WordToken("NOT'", 0, '', "NOT'")]],
517
        ];
518
    }
519
520
521
    /**
522
     * @dataProvider providerForTestTokenize
523
     *
524
     * @param string $string
525
     * @param Token[] $expectedTokens
526
     */
527
    public function testTokenize($string, array $expectedTokens): void
528
    {
529
        $tokenExtractor = $this->getTokenExtractor();
530
        $tokenizer = new Tokenizer($tokenExtractor);
531
        $tokenSequence = $tokenizer->tokenize($string);
532
        self::assertInstanceOf(TokenSequence::class, $tokenSequence);
533
        self::assertEquals($expectedTokens, $tokenSequence->getTokens());
534
        self::assertEquals($string, $tokenSequence->getSource());
535
    }
536
537
538
    public function providerForTestTokenizeNotRecognized(): array
539
    {
540
        return [
541
            [
542
                (
543
                $blah = mb_convert_encoding(
0 ignored issues
show
Bug introduced by
Are you sure $blah = mb_convert_encod...TF-8', 'HTML-ENTITIES') of type array|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

543
                /** @scrutinizer ignore-type */ $blah = mb_convert_encoding(
Loading history...
544
                    '&#x1F469;&#x200D;&#x1F469;&#x200D;&#x1F467;&#x200D;&#x1F467;',
545
                    'UTF-8',
546
                    'HTML-ENTITIES'
547
                )
548
                ) . '"',
549
                [new WordToken($blah, 0, '', $blah), new Token(Tokenizer::TOKEN_BAILOUT, '"', 7)],
0 ignored issues
show
Bug introduced by
It seems like $blah can also be of type array; however, parameter $word of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

549
                [new WordToken($blah, 0, '', /** @scrutinizer ignore-type */ $blah), new Token(Tokenizer::TOKEN_BAILOUT, '"', 7)],
Loading history...
Bug introduced by
It seems like $blah can also be of type array; however, parameter $lexeme of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

549
                [new WordToken(/** @scrutinizer ignore-type */ $blah, 0, '', $blah), new Token(Tokenizer::TOKEN_BAILOUT, '"', 7)],
Loading history...
550
            ],
551
            ['"' . $blah, [new Token(Tokenizer::TOKEN_BAILOUT, '"', 0), new WordToken($blah, 1, '', $blah)]],
0 ignored issues
show
Bug introduced by
Are you sure $blah of type array|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

551
            ['"' . /** @scrutinizer ignore-type */ $blah, [new Token(Tokenizer::TOKEN_BAILOUT, '"', 0), new WordToken($blah, 1, '', $blah)]],
Loading history...
552
            ['word"', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_BAILOUT, '"', 4)]],
553
            [
554
                'one"two',
555
                [
556
                    new WordToken('one', 0, '', 'one'),
557
                    new Token(Tokenizer::TOKEN_BAILOUT, '"', 3),
558
                    new WordToken('two', 4, '', 'two'),
559
                ],
560
            ],
561
            [
562
                'šđ"čćž',
563
                [
564
                    new WordToken('šđ', 0, '', 'šđ'),
565
                    new Token(Tokenizer::TOKEN_BAILOUT, '"', 2),
566
                    new WordToken('čćž', 3, '', 'čćž'),
567
                ],
568
            ],
569
            ['AND"', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]],
570
            ['OR"', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 2)]],
571
            ['NOT"', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]],
572
        ];
573
    }
574
575
576
    /**
577
     * @dataProvider providerForTestTokenizeNotRecognized
578
     *
579
     * @param string $string
580
     * @param Token[] $expectedTokens
581
     */
582
    public function testTokenizeNotRecognized($string, array $expectedTokens): void
583
    {
584
        $tokenExtractor = $this->getTokenExtractor();
585
        $tokenizer = new Tokenizer($tokenExtractor);
586
        $tokenSequence = $tokenizer->tokenize($string);
587
        self::assertInstanceOf(TokenSequence::class, $tokenSequence);
588
        self::assertEquals($expectedTokens, $tokenSequence->getTokens());
589
        self::assertEquals($string, $tokenSequence->getSource());
590
    }
591
592
593
    protected function getTokenExtractor(): AbstractTokenExtractor
594
    {
595
        return new Full;
596
    }
597
598
}
599