1 | <?php declare(strict_types = 1); |
||||||
2 | |||||||
3 | namespace Apicart\FQL\Tests\Tokenizer; |
||||||
4 | |||||||
5 | use Apicart\FQL\Token\Token\GroupBegin; |
||||||
6 | use Apicart\FQL\Token\Token\GroupBegin as GroupBeginToken; |
||||||
7 | use Apicart\FQL\Token\Token\Phrase as PhraseToken; |
||||||
8 | use Apicart\FQL\Token\Token\Range as RangeToken; |
||||||
9 | use Apicart\FQL\Token\Token\Tag as TagToken; |
||||||
10 | use Apicart\FQL\Token\Token\User as UserToken; |
||||||
11 | use Apicart\FQL\Token\Token\Word as WordToken; |
||||||
12 | use Apicart\FQL\Tokenizer\AbstractTokenExtractor; |
||||||
13 | use Apicart\FQL\Tokenizer\Full; |
||||||
14 | use Apicart\FQL\Tokenizer\Tokenizer; |
||||||
15 | use Apicart\FQL\Value\Token; |
||||||
16 | use Apicart\FQL\Value\TokenSequence; |
||||||
17 | use PHPUnit\Framework\TestCase; |
||||||
18 | |||||||
19 | class FullTest extends TestCase |
||||||
20 | { |
||||||
21 | |||||||
22 | public function providerForTestTokenize(): array |
||||||
23 | { |
||||||
24 | return [ |
||||||
25 | [" \n", [new Token(Tokenizer::TOKEN_WHITESPACE, " \n", 0)]], |
||||||
26 | ['word', [new WordToken('word', 0, '', 'word')]], |
||||||
27 | ["word\n", [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)]], |
||||||
28 | ['word ', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)]], |
||||||
29 | ['word(', [new WordToken('word', 0, '', 'word'), new GroupBeginToken('(', 4, '(', null)]], |
||||||
30 | ['word)', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]], |
||||||
31 | ['šđčćž', [new WordToken('šđčćž', 0, '', 'šđčćž')]], |
||||||
32 | [ |
||||||
33 | $jajeNaOko = mb_convert_encoding('🍳', 'UTF-8', 'HTML-ENTITIES'), |
||||||
34 | [new WordToken($jajeNaOko, 0, '', $jajeNaOko)], |
||||||
0 ignored issues
–
show
Bug
introduced
by
![]() It seems like
$jajeNaOko can also be of type array ; however, parameter $word of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
35 | ], |
||||||
36 | [ |
||||||
37 | $blah = mb_convert_encoding( |
||||||
38 | '👩‍👩‍👧‍👧', |
||||||
39 | 'UTF-8', |
||||||
40 | 'HTML-ENTITIES' |
||||||
41 | ), |
||||||
42 | [new WordToken($blah, 0, '', $blah)], |
||||||
43 | ], |
||||||
44 | ['word-word', [new WordToken('word-word', 0, '', 'word-word')]], |
||||||
45 | [ |
||||||
46 | "word\nword", |
||||||
47 | [ |
||||||
48 | new WordToken('word', 0, '', 'word'), |
||||||
49 | new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4), |
||||||
50 | new WordToken('word', 5, '', 'word'), |
||||||
51 | ], |
||||||
52 | ], |
||||||
53 | [ |
||||||
54 | 'word word', |
||||||
55 | [ |
||||||
56 | new WordToken('word', 0, '', 'word'), |
||||||
57 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4), |
||||||
58 | new WordToken('word', 5, '', 'word'), |
||||||
59 | ], |
||||||
60 | ], |
||||||
61 | ['word\\ word', [new WordToken('word\\ word', 0, '', 'word word')]], |
||||||
62 | ['[a TO b]', [new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive')]], |
||||||
63 | ['[a TO b}', [new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive')]], |
||||||
64 | ['{a TO b}', [new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive')]], |
||||||
65 | ['{a TO b]', [new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive')]], |
||||||
66 | [ |
||||||
67 | '[2017-01-01 TO 2017-01-05]', |
||||||
68 | [ |
||||||
69 | new RangeToken( |
||||||
70 | '[2017-01-01 TO 2017-01-05]', |
||||||
71 | 0, |
||||||
72 | '', |
||||||
73 | '2017-01-01', |
||||||
74 | '2017-01-05', |
||||||
75 | 'inclusive', |
||||||
76 | 'inclusive' |
||||||
77 | ), |
||||||
78 | ], |
||||||
79 | ], |
||||||
80 | ['[20 TO *]', [new RangeToken('[20 TO *]', 0, '', '20', '*', 'inclusive', 'inclusive')]], |
||||||
81 | ['[* TO 20]', [new RangeToken('[* TO 20]', 0, '', '*', '20', 'inclusive', 'inclusive')]], |
||||||
82 | ['"phrase"', [new PhraseToken('"phrase"', 0, '', '"', 'phrase')]], |
||||||
83 | [ |
||||||
84 | '"phrase" "phrase"', |
||||||
85 | [ |
||||||
86 | new PhraseToken('"phrase"', 0, '', '"', 'phrase'), |
||||||
87 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8), |
||||||
88 | new PhraseToken('"phrase"', 9, '', '"', 'phrase'), |
||||||
89 | ], |
||||||
90 | ], |
||||||
91 | ["\"phrase\nphrase\"", [new PhraseToken("\"phrase\nphrase\"", 0, '', '"', "phrase\nphrase")]], |
||||||
92 | ["'phrase'", [new WordToken("'phrase'", 0, '', "'phrase'")]], |
||||||
93 | [ |
||||||
94 | "'phrase' 'phrase'", |
||||||
95 | [ |
||||||
96 | new WordToken("'phrase'", 0, '', "'phrase'"), |
||||||
97 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8), |
||||||
98 | new WordToken("'phrase'", 9, '', "'phrase'"), |
||||||
99 | ], |
||||||
100 | ], |
||||||
101 | [ |
||||||
102 | "'phrase\nphrase'", |
||||||
103 | [ |
||||||
104 | new WordToken("'phrase", 0, '', "'phrase"), |
||||||
105 | new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 7), |
||||||
106 | new WordToken("phrase'", 8, '', "phrase'"), |
||||||
107 | ], |
||||||
108 | ], |
||||||
109 | ['"phrase\"phrase"', [new PhraseToken('"phrase\"phrase"', 0, '', '"', 'phrase"phrase')]], |
||||||
110 | ["'phrase\\'phrase'", [new WordToken("'phrase\\'phrase'", 0, '', "'phrase\\'phrase'")]], |
||||||
111 | ['"phrase\'phrase"', [new PhraseToken('"phrase\'phrase"', 0, '', '"', 'phrase\'phrase')]], |
||||||
112 | [ |
||||||
113 | "'phrase\"phrase'", |
||||||
114 | [ |
||||||
115 | new WordToken("'phrase", 0, '', "'phrase"), |
||||||
116 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 7), |
||||||
117 | new WordToken("phrase'", 8, '', "phrase'"), |
||||||
118 | ], |
||||||
119 | ], |
||||||
120 | ['\"not_phrase\"', [new WordToken('\"not_phrase\"', 0, '', '"not_phrase"')]], |
||||||
121 | ["\\'not_phrase\\'", [new WordToken("\\'not_phrase\\'", 0, '', "\\'not_phrase\\'")]], |
||||||
122 | [ |
||||||
123 | '"phrase + - ! ( ) AND OR NOT \\ phrase"', |
||||||
124 | [ |
||||||
125 | new PhraseToken( |
||||||
126 | '"phrase + - ! ( ) AND OR NOT \\ phrase"', |
||||||
127 | 0, |
||||||
128 | '', |
||||||
129 | '"', |
||||||
130 | 'phrase + - ! ( ) AND OR NOT \\ phrase' |
||||||
131 | ), |
||||||
132 | ], |
||||||
133 | ], |
||||||
134 | [ |
||||||
135 | "'word + - ! ( ) AND OR NOT \\ word'", |
||||||
136 | [ |
||||||
137 | new WordToken("'word", 0, '', "'word"), |
||||||
138 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), |
||||||
139 | new Token(Tokenizer::TOKEN_MANDATORY, '+', 6), |
||||||
140 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7), |
||||||
141 | new Token(Tokenizer::TOKEN_PROHIBITED, '-', 8), |
||||||
142 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 9), |
||||||
143 | new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 10), |
||||||
144 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), |
||||||
145 | new GroupBegin('(', 12, '(', ''), |
||||||
146 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 13), |
||||||
147 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 14), |
||||||
148 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 15), |
||||||
149 | new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 16), |
||||||
150 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 19), |
||||||
151 | new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 20), |
||||||
152 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 22), |
||||||
153 | new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 23), |
||||||
154 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 26), |
||||||
155 | new WordToken("\\ word'", 27, '', " word'"), |
||||||
156 | ], |
||||||
157 | ], |
||||||
158 | [ |
||||||
159 | '"phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase"', |
||||||
160 | [ |
||||||
161 | new PhraseToken( |
||||||
162 | '"phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase"', |
||||||
163 | 0, |
||||||
164 | '', |
||||||
165 | '"', |
||||||
166 | 'phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase' |
||||||
167 | ), |
||||||
168 | ], |
||||||
169 | ], |
||||||
170 | [ |
||||||
171 | "'word \\+ \\- \\! \\( \\) \\AND \\OR \\NOT \\\\ word'", |
||||||
172 | [ |
||||||
173 | new WordToken("'word", 0, '', "'word"), |
||||||
174 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), |
||||||
175 | new WordToken('\\+', 6, '', '+'), |
||||||
176 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8), |
||||||
177 | new WordToken('\\-', 9, '', '-'), |
||||||
178 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), |
||||||
179 | new WordToken('\\!', 12, '', '!'), |
||||||
180 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 14), |
||||||
181 | new WordToken('\\(', 15, '', '('), |
||||||
182 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 17), |
||||||
183 | new WordToken('\\)', 18, '', ')'), |
||||||
184 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 20), |
||||||
185 | new WordToken('\\AND', 21, '', '\AND'), |
||||||
186 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 25), |
||||||
187 | new WordToken('\\OR', 26, '', '\OR'), |
||||||
188 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 29), |
||||||
189 | new WordToken('\\NOT', 30, '', '\NOT'), |
||||||
190 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 34), |
||||||
191 | new WordToken('\\\\', 35, '', '\\'), |
||||||
192 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 37), |
||||||
193 | new WordToken("word'", 38, '', "word'"), |
||||||
194 | ], |
||||||
195 | ], |
||||||
196 | ['#tag', [new TagToken('#tag', 0, '#', 'tag')]], |
||||||
197 | ['\#tag', [new WordToken('\#tag', 0, '', '#tag')]], |
||||||
198 | ['#tagšđčćž', [new WordToken('#tagšđčćž', 0, '', '#tagšđčćž')]], |
||||||
199 | ['#_tag-tag', [new TagToken('#_tag-tag', 0, '#', '_tag-tag')]], |
||||||
200 | ['#-not-tag', [new WordToken('#-not-tag', 0, '', '#-not-tag')]], |
||||||
201 | ['#tag+', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_MANDATORY, '+', 4)]], |
||||||
202 | ['#tag-', [new TagToken('#tag-', 0, '#', 'tag-')]], |
||||||
203 | ['#tag!', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 4)]], |
||||||
204 | ["#tag\n", [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)]], |
||||||
205 | ['#tag ', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)]], |
||||||
206 | ['#tag(', [new TagToken('#tag', 0, '#', 'tag'), new GroupBeginToken('(', 4, '(', null)]], |
||||||
207 | ['#tag)', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]], |
||||||
208 | ['@user', [new UserToken('@user', 0, '@', 'user')]], |
||||||
209 | ['@user.user', [new UserToken('@user.user', 0, '@', 'user.user')]], |
||||||
210 | ['\@user', [new WordToken('\@user', 0, '', '@user')]], |
||||||
211 | ['@useršđčćž', [new WordToken('@useršđčćž', 0, '', '@useršđčćž')]], |
||||||
212 | ['@_user-user', [new UserToken('@_user-user', 0, '@', '_user-user')]], |
||||||
213 | ['@-not-user', [new WordToken('@-not-user', 0, '', '@-not-user')]], |
||||||
214 | ['@user+', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_MANDATORY, '+', 5)]], |
||||||
215 | ['@user-', [new UserToken('@user-', 0, '@', 'user-')]], |
||||||
216 | ['@user!', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 5)]], |
||||||
217 | ["@user\n", [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 5)]], |
||||||
218 | ['@user ', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5)]], |
||||||
219 | ['@user(', [new UserToken('@user', 0, '@', 'user'), new GroupBeginToken('(', 5, '(', null)]], |
||||||
220 | ['@user)', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 5)]], |
||||||
221 | ['domain:', [new WordToken('domain:', 0, '', 'domain:')]], |
||||||
222 | ['some.domain:', [new WordToken('some.domain:', 0, '', 'some.domain:')]], |
||||||
223 | ['domain:domain:', [new WordToken('domain:domain:', 0, 'domain', 'domain:')]], |
||||||
224 | ['some.domain:some.domain:', [new WordToken('some.domain:some.domain:', 0, 'some.domain', 'some.domain:')]], |
||||||
225 | [ |
||||||
226 | 'domain:domain:domain:domain', |
||||||
227 | [new WordToken('domain:domain:domain:domain', 0, 'domain', 'domain:domain:domain')], |
||||||
228 | ], |
||||||
229 | ['domain\:', [new WordToken('domain\:', 0, '', 'domain:')]], |
||||||
230 | ['domain\::', [new WordToken('domain\::', 0, '', 'domain::')]], |
||||||
231 | ['domain:word', [new WordToken('domain:word', 0, 'domain', 'word')]], |
||||||
232 | ['domain\:word', [new WordToken('domain\:word', 0, '', 'domain:word')]], |
||||||
233 | ['domain:"phrase"', [new PhraseToken('domain:"phrase"', 0, 'domain', '"', 'phrase')]], |
||||||
234 | ['some.domain:"phrase"', [new PhraseToken('some.domain:"phrase"', 0, 'some.domain', '"', 'phrase')]], |
||||||
235 | [ |
||||||
236 | 'domain\:"phrase"', |
||||||
237 | [new WordToken('domain\:', 0, '', 'domain:'), new PhraseToken('"phrase"', 8, '', '"', 'phrase')], |
||||||
238 | ], |
||||||
239 | [ |
||||||
240 | 'domain:(one)', |
||||||
241 | [ |
||||||
242 | new GroupBeginToken('domain:(', 0, '(', 'domain'), |
||||||
243 | new WordToken('one', 8, '', 'one'), |
||||||
244 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 11), |
||||||
245 | ], |
||||||
246 | ], |
||||||
247 | [ |
||||||
248 | 'some.domain:(one)', |
||||||
249 | [ |
||||||
250 | new GroupBeginToken('some.domain:(', 0, '(', 'some.domain'), |
||||||
251 | new WordToken('one', 13, '', 'one'), |
||||||
252 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 16), |
||||||
253 | ], |
||||||
254 | ], |
||||||
255 | [ |
||||||
256 | 'one AND two', |
||||||
257 | [ |
||||||
258 | new WordToken('one', 0, '', 'one'), |
||||||
259 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
260 | new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 4), |
||||||
261 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7), |
||||||
262 | new WordToken('two', 8, '', 'two'), |
||||||
263 | ], |
||||||
264 | ], |
||||||
265 | [ |
||||||
266 | 'one && two', |
||||||
267 | [ |
||||||
268 | new WordToken('one', 0, '', 'one'), |
||||||
269 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
270 | new Token(Tokenizer::TOKEN_LOGICAL_AND, '&&', 4), |
||||||
271 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), |
||||||
272 | new WordToken('two', 7, '', 'two'), |
||||||
273 | ], |
||||||
274 | ], |
||||||
275 | [ |
||||||
276 | 'one OR two', |
||||||
277 | [ |
||||||
278 | new WordToken('one', 0, '', 'one'), |
||||||
279 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
280 | new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 4), |
||||||
281 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), |
||||||
282 | new WordToken('two', 7, '', 'two'), |
||||||
283 | ], |
||||||
284 | ], |
||||||
285 | [ |
||||||
286 | 'one || two', |
||||||
287 | [ |
||||||
288 | new WordToken('one', 0, '', 'one'), |
||||||
289 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
290 | new Token(Tokenizer::TOKEN_LOGICAL_OR, '||', 4), |
||||||
291 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), |
||||||
292 | new WordToken('two', 7, '', 'two'), |
||||||
293 | ], |
||||||
294 | ], |
||||||
295 | [ |
||||||
296 | 'one NOT two', |
||||||
297 | [ |
||||||
298 | new WordToken('one', 0, '', 'one'), |
||||||
299 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
300 | new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 4), |
||||||
301 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7), |
||||||
302 | new WordToken('two', 8, '', 'two'), |
||||||
303 | ], |
||||||
304 | ], |
||||||
305 | ['AND', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0)]], |
||||||
306 | ['ANDword', [new WordToken('ANDword', 0, '', 'ANDword')]], |
||||||
307 | ['wordAND', [new WordToken('wordAND', 0, '', 'wordAND')]], |
||||||
308 | [ |
||||||
309 | 'AND+', |
||||||
310 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 3)], |
||||||
311 | ], |
||||||
312 | ['AND\+', [new WordToken('AND\+', 0, '', 'AND+')]], |
||||||
313 | [ |
||||||
314 | '+AND', |
||||||
315 | [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)], |
||||||
316 | ], |
||||||
317 | [ |
||||||
318 | 'AND-', |
||||||
319 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 3)], |
||||||
320 | ], |
||||||
321 | ['AND\-', [new WordToken('AND\-', 0, '', 'AND-')]], |
||||||
322 | [ |
||||||
323 | '-AND', |
||||||
324 | [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)], |
||||||
325 | ], |
||||||
326 | [ |
||||||
327 | 'AND!', |
||||||
328 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 3)], |
||||||
329 | ], |
||||||
330 | ['AND\!', [new WordToken('AND\!', 0, '', 'AND!')]], |
||||||
331 | [ |
||||||
332 | '!AND', |
||||||
333 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)], |
||||||
334 | ], |
||||||
335 | [ |
||||||
336 | "AND\n", |
||||||
337 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 3)], |
||||||
338 | ], |
||||||
339 | [ |
||||||
340 | 'AND ', |
||||||
341 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3)], |
||||||
342 | ], |
||||||
343 | ['AND(', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new GroupBeginToken('(', 3, '(', null)]], |
||||||
344 | [ |
||||||
345 | 'AND)', |
||||||
346 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 3)], |
||||||
347 | ], |
||||||
348 | ['ORword', [new WordToken('ORword', 0, '', 'ORword')]], |
||||||
349 | ['ORword', [new WordToken('ORword', 0, '', 'ORword')]], |
||||||
350 | ['OR', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0)]], |
||||||
351 | ['OR+', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 2)]], |
||||||
352 | ['OR\+', [new WordToken('OR\+', 0, '', 'OR+')]], |
||||||
353 | ['+OR', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)]], |
||||||
354 | ['OR-', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 2)]], |
||||||
355 | ['OR\+', [new WordToken('OR\+', 0, '', 'OR+')]], |
||||||
356 | ['-OR', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)]], |
||||||
357 | [ |
||||||
358 | 'OR!', |
||||||
359 | [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 2)], |
||||||
360 | ], |
||||||
361 | ['OR\!', [new WordToken('OR\!', 0, '', 'OR!')]], |
||||||
362 | [ |
||||||
363 | '!OR', |
||||||
364 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)], |
||||||
365 | ], |
||||||
366 | [ |
||||||
367 | "OR\n", |
||||||
368 | [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 2)], |
||||||
369 | ], |
||||||
370 | ['OR ', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2)]], |
||||||
371 | ['OR(', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new GroupBeginToken('(', 2, '(', null)]], |
||||||
372 | ['OR)', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 2)]], |
||||||
373 | ['NOT', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0)]], |
||||||
374 | ['NOTword', [new WordToken('NOTword', 0, '', 'NOTword')]], |
||||||
375 | ['wordNOT', [new WordToken('wordNOT', 0, '', 'wordNOT')]], |
||||||
376 | [ |
||||||
377 | 'NOT+', |
||||||
378 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 3)], |
||||||
379 | ], |
||||||
380 | [ |
||||||
381 | '+NOT', |
||||||
382 | [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)], |
||||||
383 | ], |
||||||
384 | [ |
||||||
385 | 'NOT-', |
||||||
386 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 3)], |
||||||
387 | ], |
||||||
388 | [ |
||||||
389 | '-NOT', |
||||||
390 | [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)], |
||||||
391 | ], |
||||||
392 | [ |
||||||
393 | 'NOT!', |
||||||
394 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 3)], |
||||||
395 | ], |
||||||
396 | [ |
||||||
397 | '!NOT', |
||||||
398 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)], |
||||||
399 | ], |
||||||
400 | [ |
||||||
401 | "NOT\n", |
||||||
402 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 3)], |
||||||
403 | ], |
||||||
404 | [ |
||||||
405 | 'NOT ', |
||||||
406 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3)], |
||||||
407 | ], |
||||||
408 | ['NOT(', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new GroupBeginToken('(', 3, '(', null)]], |
||||||
409 | [ |
||||||
410 | 'NOT)', |
||||||
411 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 3)], |
||||||
412 | ], |
||||||
413 | ['+', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0)]], |
||||||
414 | ['++', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 1)]], |
||||||
415 | ['-', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0)]], |
||||||
416 | ['--', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 1)]], |
||||||
417 | ['!', [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0)]], |
||||||
418 | [ |
||||||
419 | '!!', |
||||||
420 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 1)], |
||||||
421 | ], |
||||||
422 | ['+word', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new WordToken('word', 1, '', 'word')]], |
||||||
423 | ['-word', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new WordToken('word', 1, '', 'word')]], |
||||||
424 | ['!word', [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new WordToken('word', 1, '', 'word')]], |
||||||
425 | ['(word', [new GroupBeginToken('(', 0, '(', null), new WordToken('word', 1, '', 'word')]], |
||||||
426 | [')word', [new Token(Tokenizer::TOKEN_GROUP_END, ')', 0), new WordToken('word', 1, '', 'word')]], |
||||||
427 | ['word+', [new WordToken('word+', 0, '', 'word+')]], |
||||||
428 | ['word-', [new WordToken('word-', 0, '', 'word-')]], |
||||||
429 | ['word!', [new WordToken('word!', 0, '', 'word!')]], |
||||||
430 | ['word(', [new WordToken('word', 0, '', 'word'), new GroupBeginToken('(', 4, '(', null)]], |
||||||
431 | ['word)', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]], |
||||||
432 | ['one+two+', [new WordToken('one+two+', 0, '', 'one+two+')]], |
||||||
433 | ['one-two-', [new WordToken('one-two-', 0, '', 'one-two-')]], |
||||||
434 | ['one!two!', [new WordToken('one!two!', 0, '', 'one!two!')]], |
||||||
435 | [ |
||||||
436 | 'one(two(', |
||||||
437 | [ |
||||||
438 | new WordToken('one', 0, '', 'one'), |
||||||
439 | new GroupBeginToken('(', 3, '(', null), |
||||||
440 | new WordToken('two', 4, '', 'two'), |
||||||
441 | new GroupBeginToken('(', 7, '(', null), |
||||||
442 | ], |
||||||
443 | ], |
||||||
444 | [ |
||||||
445 | 'one)two)', |
||||||
446 | [ |
||||||
447 | new WordToken('one', 0, '', 'one'), |
||||||
448 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 3), |
||||||
449 | new WordToken('two', 4, '', 'two'), |
||||||
450 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 7), |
||||||
451 | ], |
||||||
452 | ], |
||||||
453 | ['word\+', [new WordToken('word\+', 0, '', 'word+')]], |
||||||
454 | ['word\-', [new WordToken('word\-', 0, '', 'word-')]], |
||||||
455 | ['word\!', [new WordToken('word\!', 0, '', 'word!')]], |
||||||
456 | ['word\(', [new WordToken('word\(', 0, '', 'word(')]], |
||||||
457 | ['word\)', [new WordToken('word\)', 0, '', 'word)')]], |
||||||
458 | ['\+word', [new WordToken('\+word', 0, '', '+word')]], |
||||||
459 | ['\-word', [new WordToken('\-word', 0, '', '-word')]], |
||||||
460 | ['\!word', [new WordToken('\!word', 0, '', '!word')]], |
||||||
461 | ['\(word', [new WordToken('\(word', 0, '', '(word')]], |
||||||
462 | ['\)word', [new WordToken('\)word', 0, '', ')word')]], |
||||||
463 | ['one\+two\+', [new WordToken('one\+two\+', 0, '', 'one+two+')]], |
||||||
464 | ['one\-two\-', [new WordToken('one\-two\-', 0, '', 'one-two-')]], |
||||||
465 | ['one\!two\!', [new WordToken('one\!two\!', 0, '', 'one!two!')]], |
||||||
466 | ['one\(two\(', [new WordToken('one\(two\(', 0, '', 'one(two(')]], |
||||||
467 | ['one\)two\)', [new WordToken('one\)two\)', 0, '', 'one)two)')]], |
||||||
468 | [ |
||||||
469 | 'one\\\\\)two\\\\\(one\\\\\+two\\\\\-one\\\\\!two', |
||||||
470 | [ |
||||||
471 | new WordToken( |
||||||
472 | 'one\\\\\)two\\\\\(one\\\\\+two\\\\\-one\\\\\!two', |
||||||
473 | 0, |
||||||
474 | '', |
||||||
475 | 'one\)two\(one\+two\-one\!two' |
||||||
476 | ), |
||||||
477 | ], |
||||||
478 | ], |
||||||
479 | [ |
||||||
480 | 'one\\\\)two\\\\(one\\\\+two\\\\-one\\\\!two', |
||||||
481 | [ |
||||||
482 | new WordToken('one\\\\', 0, '', 'one\\'), |
||||||
483 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 5), |
||||||
484 | new WordToken('two\\\\', 6, '', 'two\\'), |
||||||
485 | new GroupBeginToken('(', 11, '(', null), |
||||||
486 | new WordToken('one\\\\+two\\\\-one\\\\!two', 12, '', 'one\+two\-one\!two'), |
||||||
487 | ], |
||||||
488 | ], |
||||||
489 | ['one+two-one!two', [new WordToken('one+two-one!two', 0, '', 'one+two-one!two')]], |
||||||
490 | ['one\\\'two', [new WordToken('one\\\'two', 0, '', "one\\'two")]], |
||||||
491 | ['one\\"two', [new WordToken('one\\"two', 0, '', 'one"two')]], |
||||||
492 | ['\\', [new WordToken('\\', 0, '', '\\')]], |
||||||
493 | ['one\\two', [new WordToken('one\\two', 0, '', 'one\\two')]], |
||||||
494 | ['one\\\\+\\-\\!\\(\\)two', [new WordToken('one\\\\+\\-\\!\\(\\)two', 0, '', 'one\\+-!()two')]], |
||||||
495 | ['\\\\', [new WordToken('\\\\', 0, '', '\\')]], |
||||||
496 | [ |
||||||
497 | '(type:)', |
||||||
498 | [ |
||||||
499 | new GroupBeginToken('(', 0, '(', null), |
||||||
500 | new WordToken('type:', 1, '', 'type:'), |
||||||
501 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 6), |
||||||
502 | ], |
||||||
503 | ], |
||||||
504 | [ |
||||||
505 | 'type: AND', |
||||||
506 | [ |
||||||
507 | new WordToken('type:', 0, '', 'type:'), |
||||||
508 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), |
||||||
509 | new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 6), |
||||||
510 | ], |
||||||
511 | ], |
||||||
512 | ["word'", [new WordToken("word'", 0, '', "word'")]], |
||||||
513 | ['one\'two', [new WordToken("one'two", 0, '', "one'two")]], |
||||||
514 | ["AND'", [new WordToken("AND'", 0, '', "AND'")]], |
||||||
515 | ["OR'", [new WordToken("OR'", 0, '', "OR'")]], |
||||||
516 | ["NOT'", [new WordToken("NOT'", 0, '', "NOT'")]], |
||||||
517 | ]; |
||||||
518 | } |
||||||
519 | |||||||
520 | |||||||
521 | /** |
||||||
522 | * @dataProvider providerForTestTokenize |
||||||
523 | * |
||||||
524 | * @param string $string |
||||||
525 | * @param Token[] $expectedTokens |
||||||
526 | */ |
||||||
527 | public function testTokenize($string, array $expectedTokens): void |
||||||
528 | { |
||||||
529 | $tokenExtractor = $this->getTokenExtractor(); |
||||||
530 | $tokenizer = new Tokenizer($tokenExtractor); |
||||||
531 | $tokenSequence = $tokenizer->tokenize($string); |
||||||
532 | self::assertInstanceOf(TokenSequence::class, $tokenSequence); |
||||||
533 | self::assertEquals($expectedTokens, $tokenSequence->getTokens()); |
||||||
534 | self::assertEquals($string, $tokenSequence->getSource()); |
||||||
535 | } |
||||||
536 | |||||||
537 | |||||||
538 | public function providerForTestTokenizeNotRecognized(): array |
||||||
539 | { |
||||||
540 | return [ |
||||||
541 | [ |
||||||
542 | ( |
||||||
543 | $blah = mb_convert_encoding( |
||||||
0 ignored issues
–
show
Are you sure
$blah = mb_convert_encod...TF-8', 'HTML-ENTITIES') of type array|string can be used in concatenation ?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
544 | '👩‍👩‍👧‍👧', |
||||||
545 | 'UTF-8', |
||||||
546 | 'HTML-ENTITIES' |
||||||
547 | ) |
||||||
548 | ) . '"', |
||||||
549 | [new WordToken($blah, 0, '', $blah), new Token(Tokenizer::TOKEN_BAILOUT, '"', 7)], |
||||||
0 ignored issues
–
show
It seems like
$blah can also be of type array ; however, parameter $word of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() It seems like
$blah can also be of type array ; however, parameter $lexeme of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
550 | ], |
||||||
551 | ['"' . $blah, [new Token(Tokenizer::TOKEN_BAILOUT, '"', 0), new WordToken($blah, 1, '', $blah)]], |
||||||
0 ignored issues
–
show
Are you sure
$blah of type array|string can be used in concatenation ?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
552 | ['word"', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_BAILOUT, '"', 4)]], |
||||||
553 | [ |
||||||
554 | 'one"two', |
||||||
555 | [ |
||||||
556 | new WordToken('one', 0, '', 'one'), |
||||||
557 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 3), |
||||||
558 | new WordToken('two', 4, '', 'two'), |
||||||
559 | ], |
||||||
560 | ], |
||||||
561 | [ |
||||||
562 | 'šđ"čćž', |
||||||
563 | [ |
||||||
564 | new WordToken('šđ', 0, '', 'šđ'), |
||||||
565 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 2), |
||||||
566 | new WordToken('čćž', 3, '', 'čćž'), |
||||||
567 | ], |
||||||
568 | ], |
||||||
569 | ['AND"', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]], |
||||||
570 | ['OR"', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 2)]], |
||||||
571 | ['NOT"', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]], |
||||||
572 | ]; |
||||||
573 | } |
||||||
574 | |||||||
575 | |||||||
576 | /** |
||||||
577 | * @dataProvider providerForTestTokenizeNotRecognized |
||||||
578 | * |
||||||
579 | * @param string $string |
||||||
580 | * @param Token[] $expectedTokens |
||||||
581 | */ |
||||||
582 | public function testTokenizeNotRecognized($string, array $expectedTokens): void |
||||||
583 | { |
||||||
584 | $tokenExtractor = $this->getTokenExtractor(); |
||||||
585 | $tokenizer = new Tokenizer($tokenExtractor); |
||||||
586 | $tokenSequence = $tokenizer->tokenize($string); |
||||||
587 | self::assertInstanceOf(TokenSequence::class, $tokenSequence); |
||||||
588 | self::assertEquals($expectedTokens, $tokenSequence->getTokens()); |
||||||
589 | self::assertEquals($string, $tokenSequence->getSource()); |
||||||
590 | } |
||||||
591 | |||||||
592 | |||||||
593 | protected function getTokenExtractor(): AbstractTokenExtractor |
||||||
594 | { |
||||||
595 | return new Full; |
||||||
596 | } |
||||||
597 | |||||||
598 | } |
||||||
599 |