apicart /
fql
| 1 | <?php declare(strict_types = 1); |
||||||
| 2 | |||||||
| 3 | namespace Apicart\FQL\Tests\Tokenizer; |
||||||
| 4 | |||||||
| 5 | use Apicart\FQL\Token\Token\GroupBegin; |
||||||
| 6 | use Apicart\FQL\Token\Token\GroupBegin as GroupBeginToken; |
||||||
| 7 | use Apicart\FQL\Token\Token\Phrase as PhraseToken; |
||||||
| 8 | use Apicart\FQL\Token\Token\Range as RangeToken; |
||||||
| 9 | use Apicart\FQL\Token\Token\Tag as TagToken; |
||||||
| 10 | use Apicart\FQL\Token\Token\User as UserToken; |
||||||
| 11 | use Apicart\FQL\Token\Token\Word as WordToken; |
||||||
| 12 | use Apicart\FQL\Tokenizer\AbstractTokenExtractor; |
||||||
| 13 | use Apicart\FQL\Tokenizer\Full; |
||||||
| 14 | use Apicart\FQL\Tokenizer\Tokenizer; |
||||||
| 15 | use Apicart\FQL\Value\Token; |
||||||
| 16 | use Apicart\FQL\Value\TokenSequence; |
||||||
| 17 | use PHPUnit\Framework\TestCase; |
||||||
| 18 | |||||||
| 19 | class FullTest extends TestCase |
||||||
| 20 | { |
||||||
| 21 | |||||||
| 22 | public function providerForTestTokenize(): array |
||||||
| 23 | { |
||||||
| 24 | return [ |
||||||
| 25 | [" \n", [new Token(Tokenizer::TOKEN_WHITESPACE, " \n", 0)]], |
||||||
| 26 | ['word', [new WordToken('word', 0, '', 'word')]], |
||||||
| 27 | ["word\n", [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)]], |
||||||
| 28 | ['word ', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)]], |
||||||
| 29 | ['word(', [new WordToken('word', 0, '', 'word'), new GroupBeginToken('(', 4, '(', null)]], |
||||||
| 30 | ['word)', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]], |
||||||
| 31 | ['šđčćž', [new WordToken('šđčćž', 0, '', 'šđčćž')]], |
||||||
| 32 | [ |
||||||
| 33 | $jajeNaOko = mb_convert_encoding('🍳', 'UTF-8', 'HTML-ENTITIES'), |
||||||
| 34 | [new WordToken($jajeNaOko, 0, '', $jajeNaOko)], |
||||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
It seems like
$jajeNaOko can also be of type array; however, parameter $word of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 35 | ], |
||||||
| 36 | [ |
||||||
| 37 | $blah = mb_convert_encoding( |
||||||
| 38 | '👩‍👩‍👧‍👧', |
||||||
| 39 | 'UTF-8', |
||||||
| 40 | 'HTML-ENTITIES' |
||||||
| 41 | ), |
||||||
| 42 | [new WordToken($blah, 0, '', $blah)], |
||||||
| 43 | ], |
||||||
| 44 | ['word-word', [new WordToken('word-word', 0, '', 'word-word')]], |
||||||
| 45 | [ |
||||||
| 46 | "word\nword", |
||||||
| 47 | [ |
||||||
| 48 | new WordToken('word', 0, '', 'word'), |
||||||
| 49 | new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4), |
||||||
| 50 | new WordToken('word', 5, '', 'word'), |
||||||
| 51 | ], |
||||||
| 52 | ], |
||||||
| 53 | [ |
||||||
| 54 | 'word word', |
||||||
| 55 | [ |
||||||
| 56 | new WordToken('word', 0, '', 'word'), |
||||||
| 57 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4), |
||||||
| 58 | new WordToken('word', 5, '', 'word'), |
||||||
| 59 | ], |
||||||
| 60 | ], |
||||||
| 61 | ['word\\ word', [new WordToken('word\\ word', 0, '', 'word word')]], |
||||||
| 62 | ['[a TO b]', [new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive')]], |
||||||
| 63 | ['[a TO b}', [new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive')]], |
||||||
| 64 | ['{a TO b}', [new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive')]], |
||||||
| 65 | ['{a TO b]', [new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive')]], |
||||||
| 66 | [ |
||||||
| 67 | '[2017-01-01 TO 2017-01-05]', |
||||||
| 68 | [ |
||||||
| 69 | new RangeToken( |
||||||
| 70 | '[2017-01-01 TO 2017-01-05]', |
||||||
| 71 | 0, |
||||||
| 72 | '', |
||||||
| 73 | '2017-01-01', |
||||||
| 74 | '2017-01-05', |
||||||
| 75 | 'inclusive', |
||||||
| 76 | 'inclusive' |
||||||
| 77 | ), |
||||||
| 78 | ], |
||||||
| 79 | ], |
||||||
| 80 | ['[20 TO *]', [new RangeToken('[20 TO *]', 0, '', '20', '*', 'inclusive', 'inclusive')]], |
||||||
| 81 | ['[* TO 20]', [new RangeToken('[* TO 20]', 0, '', '*', '20', 'inclusive', 'inclusive')]], |
||||||
| 82 | ['"phrase"', [new PhraseToken('"phrase"', 0, '', '"', 'phrase')]], |
||||||
| 83 | [ |
||||||
| 84 | '"phrase" "phrase"', |
||||||
| 85 | [ |
||||||
| 86 | new PhraseToken('"phrase"', 0, '', '"', 'phrase'), |
||||||
| 87 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8), |
||||||
| 88 | new PhraseToken('"phrase"', 9, '', '"', 'phrase'), |
||||||
| 89 | ], |
||||||
| 90 | ], |
||||||
| 91 | ["\"phrase\nphrase\"", [new PhraseToken("\"phrase\nphrase\"", 0, '', '"', "phrase\nphrase")]], |
||||||
| 92 | ["'phrase'", [new WordToken("'phrase'", 0, '', "'phrase'")]], |
||||||
| 93 | [ |
||||||
| 94 | "'phrase' 'phrase'", |
||||||
| 95 | [ |
||||||
| 96 | new WordToken("'phrase'", 0, '', "'phrase'"), |
||||||
| 97 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8), |
||||||
| 98 | new WordToken("'phrase'", 9, '', "'phrase'"), |
||||||
| 99 | ], |
||||||
| 100 | ], |
||||||
| 101 | [ |
||||||
| 102 | "'phrase\nphrase'", |
||||||
| 103 | [ |
||||||
| 104 | new WordToken("'phrase", 0, '', "'phrase"), |
||||||
| 105 | new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 7), |
||||||
| 106 | new WordToken("phrase'", 8, '', "phrase'"), |
||||||
| 107 | ], |
||||||
| 108 | ], |
||||||
| 109 | ['"phrase\"phrase"', [new PhraseToken('"phrase\"phrase"', 0, '', '"', 'phrase"phrase')]], |
||||||
| 110 | ["'phrase\\'phrase'", [new WordToken("'phrase\\'phrase'", 0, '', "'phrase\\'phrase'")]], |
||||||
| 111 | ['"phrase\'phrase"', [new PhraseToken('"phrase\'phrase"', 0, '', '"', 'phrase\'phrase')]], |
||||||
| 112 | [ |
||||||
| 113 | "'phrase\"phrase'", |
||||||
| 114 | [ |
||||||
| 115 | new WordToken("'phrase", 0, '', "'phrase"), |
||||||
| 116 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 7), |
||||||
| 117 | new WordToken("phrase'", 8, '', "phrase'"), |
||||||
| 118 | ], |
||||||
| 119 | ], |
||||||
| 120 | ['\"not_phrase\"', [new WordToken('\"not_phrase\"', 0, '', '"not_phrase"')]], |
||||||
| 121 | ["\\'not_phrase\\'", [new WordToken("\\'not_phrase\\'", 0, '', "\\'not_phrase\\'")]], |
||||||
| 122 | [ |
||||||
| 123 | '"phrase + - ! ( ) AND OR NOT \\ phrase"', |
||||||
| 124 | [ |
||||||
| 125 | new PhraseToken( |
||||||
| 126 | '"phrase + - ! ( ) AND OR NOT \\ phrase"', |
||||||
| 127 | 0, |
||||||
| 128 | '', |
||||||
| 129 | '"', |
||||||
| 130 | 'phrase + - ! ( ) AND OR NOT \\ phrase' |
||||||
| 131 | ), |
||||||
| 132 | ], |
||||||
| 133 | ], |
||||||
| 134 | [ |
||||||
| 135 | "'word + - ! ( ) AND OR NOT \\ word'", |
||||||
| 136 | [ |
||||||
| 137 | new WordToken("'word", 0, '', "'word"), |
||||||
| 138 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), |
||||||
| 139 | new Token(Tokenizer::TOKEN_MANDATORY, '+', 6), |
||||||
| 140 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7), |
||||||
| 141 | new Token(Tokenizer::TOKEN_PROHIBITED, '-', 8), |
||||||
| 142 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 9), |
||||||
| 143 | new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 10), |
||||||
| 144 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), |
||||||
| 145 | new GroupBegin('(', 12, '(', ''), |
||||||
| 146 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 13), |
||||||
| 147 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 14), |
||||||
| 148 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 15), |
||||||
| 149 | new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 16), |
||||||
| 150 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 19), |
||||||
| 151 | new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 20), |
||||||
| 152 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 22), |
||||||
| 153 | new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 23), |
||||||
| 154 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 26), |
||||||
| 155 | new WordToken("\\ word'", 27, '', " word'"), |
||||||
| 156 | ], |
||||||
| 157 | ], |
||||||
| 158 | [ |
||||||
| 159 | '"phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase"', |
||||||
| 160 | [ |
||||||
| 161 | new PhraseToken( |
||||||
| 162 | '"phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase"', |
||||||
| 163 | 0, |
||||||
| 164 | '', |
||||||
| 165 | '"', |
||||||
| 166 | 'phrase \+ \- \! \( \) \AND \OR \NOT \\\\ phrase' |
||||||
| 167 | ), |
||||||
| 168 | ], |
||||||
| 169 | ], |
||||||
| 170 | [ |
||||||
| 171 | "'word \\+ \\- \\! \\( \\) \\AND \\OR \\NOT \\\\ word'", |
||||||
| 172 | [ |
||||||
| 173 | new WordToken("'word", 0, '', "'word"), |
||||||
| 174 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), |
||||||
| 175 | new WordToken('\\+', 6, '', '+'), |
||||||
| 176 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 8), |
||||||
| 177 | new WordToken('\\-', 9, '', '-'), |
||||||
| 178 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), |
||||||
| 179 | new WordToken('\\!', 12, '', '!'), |
||||||
| 180 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 14), |
||||||
| 181 | new WordToken('\\(', 15, '', '('), |
||||||
| 182 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 17), |
||||||
| 183 | new WordToken('\\)', 18, '', ')'), |
||||||
| 184 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 20), |
||||||
| 185 | new WordToken('\\AND', 21, '', '\AND'), |
||||||
| 186 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 25), |
||||||
| 187 | new WordToken('\\OR', 26, '', '\OR'), |
||||||
| 188 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 29), |
||||||
| 189 | new WordToken('\\NOT', 30, '', '\NOT'), |
||||||
| 190 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 34), |
||||||
| 191 | new WordToken('\\\\', 35, '', '\\'), |
||||||
| 192 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 37), |
||||||
| 193 | new WordToken("word'", 38, '', "word'"), |
||||||
| 194 | ], |
||||||
| 195 | ], |
||||||
| 196 | ['#tag', [new TagToken('#tag', 0, '#', 'tag')]], |
||||||
| 197 | ['\#tag', [new WordToken('\#tag', 0, '', '#tag')]], |
||||||
| 198 | ['#tagšđčćž', [new WordToken('#tagšđčćž', 0, '', '#tagšđčćž')]], |
||||||
| 199 | ['#_tag-tag', [new TagToken('#_tag-tag', 0, '#', '_tag-tag')]], |
||||||
| 200 | ['#-not-tag', [new WordToken('#-not-tag', 0, '', '#-not-tag')]], |
||||||
| 201 | ['#tag+', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_MANDATORY, '+', 4)]], |
||||||
| 202 | ['#tag-', [new TagToken('#tag-', 0, '#', 'tag-')]], |
||||||
| 203 | ['#tag!', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 4)]], |
||||||
| 204 | ["#tag\n", [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)]], |
||||||
| 205 | ['#tag ', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)]], |
||||||
| 206 | ['#tag(', [new TagToken('#tag', 0, '#', 'tag'), new GroupBeginToken('(', 4, '(', null)]], |
||||||
| 207 | ['#tag)', [new TagToken('#tag', 0, '#', 'tag'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]], |
||||||
| 208 | ['@user', [new UserToken('@user', 0, '@', 'user')]], |
||||||
| 209 | ['@user.user', [new UserToken('@user.user', 0, '@', 'user.user')]], |
||||||
| 210 | ['\@user', [new WordToken('\@user', 0, '', '@user')]], |
||||||
| 211 | ['@useršđčćž', [new WordToken('@useršđčćž', 0, '', '@useršđčćž')]], |
||||||
| 212 | ['@_user-user', [new UserToken('@_user-user', 0, '@', '_user-user')]], |
||||||
| 213 | ['@-not-user', [new WordToken('@-not-user', 0, '', '@-not-user')]], |
||||||
| 214 | ['@user+', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_MANDATORY, '+', 5)]], |
||||||
| 215 | ['@user-', [new UserToken('@user-', 0, '@', 'user-')]], |
||||||
| 216 | ['@user!', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 5)]], |
||||||
| 217 | ["@user\n", [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 5)]], |
||||||
| 218 | ['@user ', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5)]], |
||||||
| 219 | ['@user(', [new UserToken('@user', 0, '@', 'user'), new GroupBeginToken('(', 5, '(', null)]], |
||||||
| 220 | ['@user)', [new UserToken('@user', 0, '@', 'user'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 5)]], |
||||||
| 221 | ['domain:', [new WordToken('domain:', 0, '', 'domain:')]], |
||||||
| 222 | ['some.domain:', [new WordToken('some.domain:', 0, '', 'some.domain:')]], |
||||||
| 223 | ['domain:domain:', [new WordToken('domain:domain:', 0, 'domain', 'domain:')]], |
||||||
| 224 | ['some.domain:some.domain:', [new WordToken('some.domain:some.domain:', 0, 'some.domain', 'some.domain:')]], |
||||||
| 225 | [ |
||||||
| 226 | 'domain:domain:domain:domain', |
||||||
| 227 | [new WordToken('domain:domain:domain:domain', 0, 'domain', 'domain:domain:domain')], |
||||||
| 228 | ], |
||||||
| 229 | ['domain\:', [new WordToken('domain\:', 0, '', 'domain:')]], |
||||||
| 230 | ['domain\::', [new WordToken('domain\::', 0, '', 'domain::')]], |
||||||
| 231 | ['domain:word', [new WordToken('domain:word', 0, 'domain', 'word')]], |
||||||
| 232 | ['domain\:word', [new WordToken('domain\:word', 0, '', 'domain:word')]], |
||||||
| 233 | ['domain:"phrase"', [new PhraseToken('domain:"phrase"', 0, 'domain', '"', 'phrase')]], |
||||||
| 234 | ['some.domain:"phrase"', [new PhraseToken('some.domain:"phrase"', 0, 'some.domain', '"', 'phrase')]], |
||||||
| 235 | [ |
||||||
| 236 | 'domain\:"phrase"', |
||||||
| 237 | [new WordToken('domain\:', 0, '', 'domain:'), new PhraseToken('"phrase"', 8, '', '"', 'phrase')], |
||||||
| 238 | ], |
||||||
| 239 | [ |
||||||
| 240 | 'domain:(one)', |
||||||
| 241 | [ |
||||||
| 242 | new GroupBeginToken('domain:(', 0, '(', 'domain'), |
||||||
| 243 | new WordToken('one', 8, '', 'one'), |
||||||
| 244 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 11), |
||||||
| 245 | ], |
||||||
| 246 | ], |
||||||
| 247 | [ |
||||||
| 248 | 'some.domain:(one)', |
||||||
| 249 | [ |
||||||
| 250 | new GroupBeginToken('some.domain:(', 0, '(', 'some.domain'), |
||||||
| 251 | new WordToken('one', 13, '', 'one'), |
||||||
| 252 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 16), |
||||||
| 253 | ], |
||||||
| 254 | ], |
||||||
| 255 | [ |
||||||
| 256 | 'one AND two', |
||||||
| 257 | [ |
||||||
| 258 | new WordToken('one', 0, '', 'one'), |
||||||
| 259 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
| 260 | new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 4), |
||||||
| 261 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7), |
||||||
| 262 | new WordToken('two', 8, '', 'two'), |
||||||
| 263 | ], |
||||||
| 264 | ], |
||||||
| 265 | [ |
||||||
| 266 | 'one && two', |
||||||
| 267 | [ |
||||||
| 268 | new WordToken('one', 0, '', 'one'), |
||||||
| 269 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
| 270 | new Token(Tokenizer::TOKEN_LOGICAL_AND, '&&', 4), |
||||||
| 271 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), |
||||||
| 272 | new WordToken('two', 7, '', 'two'), |
||||||
| 273 | ], |
||||||
| 274 | ], |
||||||
| 275 | [ |
||||||
| 276 | 'one OR two', |
||||||
| 277 | [ |
||||||
| 278 | new WordToken('one', 0, '', 'one'), |
||||||
| 279 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
| 280 | new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 4), |
||||||
| 281 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), |
||||||
| 282 | new WordToken('two', 7, '', 'two'), |
||||||
| 283 | ], |
||||||
| 284 | ], |
||||||
| 285 | [ |
||||||
| 286 | 'one || two', |
||||||
| 287 | [ |
||||||
| 288 | new WordToken('one', 0, '', 'one'), |
||||||
| 289 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
| 290 | new Token(Tokenizer::TOKEN_LOGICAL_OR, '||', 4), |
||||||
| 291 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), |
||||||
| 292 | new WordToken('two', 7, '', 'two'), |
||||||
| 293 | ], |
||||||
| 294 | ], |
||||||
| 295 | [ |
||||||
| 296 | 'one NOT two', |
||||||
| 297 | [ |
||||||
| 298 | new WordToken('one', 0, '', 'one'), |
||||||
| 299 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), |
||||||
| 300 | new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 4), |
||||||
| 301 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 7), |
||||||
| 302 | new WordToken('two', 8, '', 'two'), |
||||||
| 303 | ], |
||||||
| 304 | ], |
||||||
| 305 | ['AND', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0)]], |
||||||
| 306 | ['ANDword', [new WordToken('ANDword', 0, '', 'ANDword')]], |
||||||
| 307 | ['wordAND', [new WordToken('wordAND', 0, '', 'wordAND')]], |
||||||
| 308 | [ |
||||||
| 309 | 'AND+', |
||||||
| 310 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 3)], |
||||||
| 311 | ], |
||||||
| 312 | ['AND\+', [new WordToken('AND\+', 0, '', 'AND+')]], |
||||||
| 313 | [ |
||||||
| 314 | '+AND', |
||||||
| 315 | [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)], |
||||||
| 316 | ], |
||||||
| 317 | [ |
||||||
| 318 | 'AND-', |
||||||
| 319 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 3)], |
||||||
| 320 | ], |
||||||
| 321 | ['AND\-', [new WordToken('AND\-', 0, '', 'AND-')]], |
||||||
| 322 | [ |
||||||
| 323 | '-AND', |
||||||
| 324 | [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)], |
||||||
| 325 | ], |
||||||
| 326 | [ |
||||||
| 327 | 'AND!', |
||||||
| 328 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 3)], |
||||||
| 329 | ], |
||||||
| 330 | ['AND\!', [new WordToken('AND\!', 0, '', 'AND!')]], |
||||||
| 331 | [ |
||||||
| 332 | '!AND', |
||||||
| 333 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 1)], |
||||||
| 334 | ], |
||||||
| 335 | [ |
||||||
| 336 | "AND\n", |
||||||
| 337 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 3)], |
||||||
| 338 | ], |
||||||
| 339 | [ |
||||||
| 340 | 'AND ', |
||||||
| 341 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3)], |
||||||
| 342 | ], |
||||||
| 343 | ['AND(', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new GroupBeginToken('(', 3, '(', null)]], |
||||||
| 344 | [ |
||||||
| 345 | 'AND)', |
||||||
| 346 | [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 3)], |
||||||
| 347 | ], |
||||||
| 348 | ['ORword', [new WordToken('ORword', 0, '', 'ORword')]], |
||||||
| 349 | ['ORword', [new WordToken('ORword', 0, '', 'ORword')]], |
||||||
| 350 | ['OR', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0)]], |
||||||
| 351 | ['OR+', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 2)]], |
||||||
| 352 | ['OR\+', [new WordToken('OR\+', 0, '', 'OR+')]], |
||||||
| 353 | ['+OR', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)]], |
||||||
| 354 | ['OR-', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 2)]], |
||||||
| 355 | ['OR\+', [new WordToken('OR\+', 0, '', 'OR+')]], |
||||||
| 356 | ['-OR', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)]], |
||||||
| 357 | [ |
||||||
| 358 | 'OR!', |
||||||
| 359 | [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 2)], |
||||||
| 360 | ], |
||||||
| 361 | ['OR\!', [new WordToken('OR\!', 0, '', 'OR!')]], |
||||||
| 362 | [ |
||||||
| 363 | '!OR', |
||||||
| 364 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 1)], |
||||||
| 365 | ], |
||||||
| 366 | [ |
||||||
| 367 | "OR\n", |
||||||
| 368 | [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 2)], |
||||||
| 369 | ], |
||||||
| 370 | ['OR ', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2)]], |
||||||
| 371 | ['OR(', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new GroupBeginToken('(', 2, '(', null)]], |
||||||
| 372 | ['OR)', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 2)]], |
||||||
| 373 | ['NOT', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0)]], |
||||||
| 374 | ['NOTword', [new WordToken('NOTword', 0, '', 'NOTword')]], |
||||||
| 375 | ['wordNOT', [new WordToken('wordNOT', 0, '', 'wordNOT')]], |
||||||
| 376 | [ |
||||||
| 377 | 'NOT+', |
||||||
| 378 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 3)], |
||||||
| 379 | ], |
||||||
| 380 | [ |
||||||
| 381 | '+NOT', |
||||||
| 382 | [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)], |
||||||
| 383 | ], |
||||||
| 384 | [ |
||||||
| 385 | 'NOT-', |
||||||
| 386 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 3)], |
||||||
| 387 | ], |
||||||
| 388 | [ |
||||||
| 389 | '-NOT', |
||||||
| 390 | [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)], |
||||||
| 391 | ], |
||||||
| 392 | [ |
||||||
| 393 | 'NOT!', |
||||||
| 394 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 3)], |
||||||
| 395 | ], |
||||||
| 396 | [ |
||||||
| 397 | '!NOT', |
||||||
| 398 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 1)], |
||||||
| 399 | ], |
||||||
| 400 | [ |
||||||
| 401 | "NOT\n", |
||||||
| 402 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 3)], |
||||||
| 403 | ], |
||||||
| 404 | [ |
||||||
| 405 | 'NOT ', |
||||||
| 406 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3)], |
||||||
| 407 | ], |
||||||
| 408 | ['NOT(', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new GroupBeginToken('(', 3, '(', null)]], |
||||||
| 409 | [ |
||||||
| 410 | 'NOT)', |
||||||
| 411 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_GROUP_END, ')', 3)], |
||||||
| 412 | ], |
||||||
| 413 | ['+', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0)]], |
||||||
| 414 | ['++', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new Token(Tokenizer::TOKEN_MANDATORY, '+', 1)]], |
||||||
| 415 | ['-', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0)]], |
||||||
| 416 | ['--', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new Token(Tokenizer::TOKEN_PROHIBITED, '-', 1)]], |
||||||
| 417 | ['!', [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0)]], |
||||||
| 418 | [ |
||||||
| 419 | '!!', |
||||||
| 420 | [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 1)], |
||||||
| 421 | ], |
||||||
| 422 | ['+word', [new Token(Tokenizer::TOKEN_MANDATORY, '+', 0), new WordToken('word', 1, '', 'word')]], |
||||||
| 423 | ['-word', [new Token(Tokenizer::TOKEN_PROHIBITED, '-', 0), new WordToken('word', 1, '', 'word')]], |
||||||
| 424 | ['!word', [new Token(Tokenizer::TOKEN_LOGICAL_NOT_2, '!', 0), new WordToken('word', 1, '', 'word')]], |
||||||
| 425 | ['(word', [new GroupBeginToken('(', 0, '(', null), new WordToken('word', 1, '', 'word')]], |
||||||
| 426 | [')word', [new Token(Tokenizer::TOKEN_GROUP_END, ')', 0), new WordToken('word', 1, '', 'word')]], |
||||||
| 427 | ['word+', [new WordToken('word+', 0, '', 'word+')]], |
||||||
| 428 | ['word-', [new WordToken('word-', 0, '', 'word-')]], |
||||||
| 429 | ['word!', [new WordToken('word!', 0, '', 'word!')]], |
||||||
| 430 | ['word(', [new WordToken('word', 0, '', 'word'), new GroupBeginToken('(', 4, '(', null)]], |
||||||
| 431 | ['word)', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)]], |
||||||
| 432 | ['one+two+', [new WordToken('one+two+', 0, '', 'one+two+')]], |
||||||
| 433 | ['one-two-', [new WordToken('one-two-', 0, '', 'one-two-')]], |
||||||
| 434 | ['one!two!', [new WordToken('one!two!', 0, '', 'one!two!')]], |
||||||
| 435 | [ |
||||||
| 436 | 'one(two(', |
||||||
| 437 | [ |
||||||
| 438 | new WordToken('one', 0, '', 'one'), |
||||||
| 439 | new GroupBeginToken('(', 3, '(', null), |
||||||
| 440 | new WordToken('two', 4, '', 'two'), |
||||||
| 441 | new GroupBeginToken('(', 7, '(', null), |
||||||
| 442 | ], |
||||||
| 443 | ], |
||||||
| 444 | [ |
||||||
| 445 | 'one)two)', |
||||||
| 446 | [ |
||||||
| 447 | new WordToken('one', 0, '', 'one'), |
||||||
| 448 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 3), |
||||||
| 449 | new WordToken('two', 4, '', 'two'), |
||||||
| 450 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 7), |
||||||
| 451 | ], |
||||||
| 452 | ], |
||||||
| 453 | ['word\+', [new WordToken('word\+', 0, '', 'word+')]], |
||||||
| 454 | ['word\-', [new WordToken('word\-', 0, '', 'word-')]], |
||||||
| 455 | ['word\!', [new WordToken('word\!', 0, '', 'word!')]], |
||||||
| 456 | ['word\(', [new WordToken('word\(', 0, '', 'word(')]], |
||||||
| 457 | ['word\)', [new WordToken('word\)', 0, '', 'word)')]], |
||||||
| 458 | ['\+word', [new WordToken('\+word', 0, '', '+word')]], |
||||||
| 459 | ['\-word', [new WordToken('\-word', 0, '', '-word')]], |
||||||
| 460 | ['\!word', [new WordToken('\!word', 0, '', '!word')]], |
||||||
| 461 | ['\(word', [new WordToken('\(word', 0, '', '(word')]], |
||||||
| 462 | ['\)word', [new WordToken('\)word', 0, '', ')word')]], |
||||||
| 463 | ['one\+two\+', [new WordToken('one\+two\+', 0, '', 'one+two+')]], |
||||||
| 464 | ['one\-two\-', [new WordToken('one\-two\-', 0, '', 'one-two-')]], |
||||||
| 465 | ['one\!two\!', [new WordToken('one\!two\!', 0, '', 'one!two!')]], |
||||||
| 466 | ['one\(two\(', [new WordToken('one\(two\(', 0, '', 'one(two(')]], |
||||||
| 467 | ['one\)two\)', [new WordToken('one\)two\)', 0, '', 'one)two)')]], |
||||||
| 468 | [ |
||||||
| 469 | 'one\\\\\)two\\\\\(one\\\\\+two\\\\\-one\\\\\!two', |
||||||
| 470 | [ |
||||||
| 471 | new WordToken( |
||||||
| 472 | 'one\\\\\)two\\\\\(one\\\\\+two\\\\\-one\\\\\!two', |
||||||
| 473 | 0, |
||||||
| 474 | '', |
||||||
| 475 | 'one\)two\(one\+two\-one\!two' |
||||||
| 476 | ), |
||||||
| 477 | ], |
||||||
| 478 | ], |
||||||
| 479 | [ |
||||||
| 480 | 'one\\\\)two\\\\(one\\\\+two\\\\-one\\\\!two', |
||||||
| 481 | [ |
||||||
| 482 | new WordToken('one\\\\', 0, '', 'one\\'), |
||||||
| 483 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 5), |
||||||
| 484 | new WordToken('two\\\\', 6, '', 'two\\'), |
||||||
| 485 | new GroupBeginToken('(', 11, '(', null), |
||||||
| 486 | new WordToken('one\\\\+two\\\\-one\\\\!two', 12, '', 'one\+two\-one\!two'), |
||||||
| 487 | ], |
||||||
| 488 | ], |
||||||
| 489 | ['one+two-one!two', [new WordToken('one+two-one!two', 0, '', 'one+two-one!two')]], |
||||||
| 490 | ['one\\\'two', [new WordToken('one\\\'two', 0, '', "one\\'two")]], |
||||||
| 491 | ['one\\"two', [new WordToken('one\\"two', 0, '', 'one"two')]], |
||||||
| 492 | ['\\', [new WordToken('\\', 0, '', '\\')]], |
||||||
| 493 | ['one\\two', [new WordToken('one\\two', 0, '', 'one\\two')]], |
||||||
| 494 | ['one\\\\+\\-\\!\\(\\)two', [new WordToken('one\\\\+\\-\\!\\(\\)two', 0, '', 'one\\+-!()two')]], |
||||||
| 495 | ['\\\\', [new WordToken('\\\\', 0, '', '\\')]], |
||||||
| 496 | [ |
||||||
| 497 | '(type:)', |
||||||
| 498 | [ |
||||||
| 499 | new GroupBeginToken('(', 0, '(', null), |
||||||
| 500 | new WordToken('type:', 1, '', 'type:'), |
||||||
| 501 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 6), |
||||||
| 502 | ], |
||||||
| 503 | ], |
||||||
| 504 | [ |
||||||
| 505 | 'type: AND', |
||||||
| 506 | [ |
||||||
| 507 | new WordToken('type:', 0, '', 'type:'), |
||||||
| 508 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), |
||||||
| 509 | new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 6), |
||||||
| 510 | ], |
||||||
| 511 | ], |
||||||
| 512 | ["word'", [new WordToken("word'", 0, '', "word'")]], |
||||||
| 513 | ['one\'two', [new WordToken("one'two", 0, '', "one'two")]], |
||||||
| 514 | ["AND'", [new WordToken("AND'", 0, '', "AND'")]], |
||||||
| 515 | ["OR'", [new WordToken("OR'", 0, '', "OR'")]], |
||||||
| 516 | ["NOT'", [new WordToken("NOT'", 0, '', "NOT'")]], |
||||||
| 517 | ]; |
||||||
| 518 | } |
||||||
| 519 | |||||||
| 520 | |||||||
| 521 | /** |
||||||
| 522 | * @dataProvider providerForTestTokenize |
||||||
| 523 | * |
||||||
| 524 | * @param string $string |
||||||
| 525 | * @param Token[] $expectedTokens |
||||||
| 526 | */ |
||||||
| 527 | public function testTokenize($string, array $expectedTokens): void |
||||||
| 528 | { |
||||||
| 529 | $tokenExtractor = $this->getTokenExtractor(); |
||||||
| 530 | $tokenizer = new Tokenizer($tokenExtractor); |
||||||
| 531 | $tokenSequence = $tokenizer->tokenize($string); |
||||||
| 532 | self::assertInstanceOf(TokenSequence::class, $tokenSequence); |
||||||
| 533 | self::assertEquals($expectedTokens, $tokenSequence->getTokens()); |
||||||
| 534 | self::assertEquals($string, $tokenSequence->getSource()); |
||||||
| 535 | } |
||||||
| 536 | |||||||
| 537 | |||||||
| 538 | public function providerForTestTokenizeNotRecognized(): array |
||||||
| 539 | { |
||||||
| 540 | return [ |
||||||
| 541 | [ |
||||||
| 542 | ( |
||||||
| 543 | $blah = mb_convert_encoding( |
||||||
|
0 ignored issues
–
show
Are you sure
$blah = mb_convert_encod...TF-8', 'HTML-ENTITIES') of type array|string can be used in concatenation?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 544 | '👩‍👩‍👧‍👧', |
||||||
| 545 | 'UTF-8', |
||||||
| 546 | 'HTML-ENTITIES' |
||||||
| 547 | ) |
||||||
| 548 | ) . '"', |
||||||
| 549 | [new WordToken($blah, 0, '', $blah), new Token(Tokenizer::TOKEN_BAILOUT, '"', 7)], |
||||||
|
0 ignored issues
–
show
It seems like
$blah can also be of type array; however, parameter $word of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
It seems like
$blah can also be of type array; however, parameter $lexeme of Apicart\FQL\Token\Token\Word::__construct() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 550 | ], |
||||||
| 551 | ['"' . $blah, [new Token(Tokenizer::TOKEN_BAILOUT, '"', 0), new WordToken($blah, 1, '', $blah)]], |
||||||
|
0 ignored issues
–
show
Are you sure
$blah of type array|string can be used in concatenation?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 552 | ['word"', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_BAILOUT, '"', 4)]], |
||||||
| 553 | [ |
||||||
| 554 | 'one"two', |
||||||
| 555 | [ |
||||||
| 556 | new WordToken('one', 0, '', 'one'), |
||||||
| 557 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 3), |
||||||
| 558 | new WordToken('two', 4, '', 'two'), |
||||||
| 559 | ], |
||||||
| 560 | ], |
||||||
| 561 | [ |
||||||
| 562 | 'šđ"čćž', |
||||||
| 563 | [ |
||||||
| 564 | new WordToken('šđ', 0, '', 'šđ'), |
||||||
| 565 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 2), |
||||||
| 566 | new WordToken('čćž', 3, '', 'čćž'), |
||||||
| 567 | ], |
||||||
| 568 | ], |
||||||
| 569 | ['AND"', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]], |
||||||
| 570 | ['OR"', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 2)]], |
||||||
| 571 | ['NOT"', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]], |
||||||
| 572 | ]; |
||||||
| 573 | } |
||||||
| 574 | |||||||
| 575 | |||||||
| 576 | /** |
||||||
| 577 | * @dataProvider providerForTestTokenizeNotRecognized |
||||||
| 578 | * |
||||||
| 579 | * @param string $string |
||||||
| 580 | * @param Token[] $expectedTokens |
||||||
| 581 | */ |
||||||
| 582 | public function testTokenizeNotRecognized($string, array $expectedTokens): void |
||||||
| 583 | { |
||||||
| 584 | $tokenExtractor = $this->getTokenExtractor(); |
||||||
| 585 | $tokenizer = new Tokenizer($tokenExtractor); |
||||||
| 586 | $tokenSequence = $tokenizer->tokenize($string); |
||||||
| 587 | self::assertInstanceOf(TokenSequence::class, $tokenSequence); |
||||||
| 588 | self::assertEquals($expectedTokens, $tokenSequence->getTokens()); |
||||||
| 589 | self::assertEquals($string, $tokenSequence->getSource()); |
||||||
| 590 | } |
||||||
| 591 | |||||||
| 592 | |||||||
| 593 | protected function getTokenExtractor(): AbstractTokenExtractor |
||||||
| 594 | { |
||||||
| 595 | return new Full; |
||||||
| 596 | } |
||||||
| 597 | |||||||
| 598 | } |
||||||
| 599 |