| Total Complexity | 189 |
| Total Lines | 1242 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like JS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use JS, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 16 | class JS extends Tokenizer |
||
| 17 | { |
||
| 18 | |||
| 19 | |||
| 20 | /** |
||
| 21 | * A list of tokens that are allowed to open a scope. |
||
| 22 | * |
||
| 23 | * This array also contains information about what kind of token the scope |
||
| 24 | * opener uses to open and close the scope, if the token strictly requires |
||
| 25 | * an opener, if the token can share a scope closer, and who it can be shared |
||
| 26 | * with. An example of a token that shares a scope closer is a CASE scope. |
||
| 27 | * |
||
| 28 | * @var array |
||
| 29 | */ |
||
| 30 | public $scopeOpeners = [ |
||
| 31 | T_IF => [ |
||
| 32 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 33 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 34 | 'strict' => false, |
||
| 35 | 'shared' => false, |
||
| 36 | 'with' => [], |
||
| 37 | ], |
||
| 38 | T_TRY => [ |
||
| 39 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 40 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 41 | 'strict' => true, |
||
| 42 | 'shared' => false, |
||
| 43 | 'with' => [], |
||
| 44 | ], |
||
| 45 | T_CATCH => [ |
||
| 46 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 47 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 48 | 'strict' => true, |
||
| 49 | 'shared' => false, |
||
| 50 | 'with' => [], |
||
| 51 | ], |
||
| 52 | T_ELSE => [ |
||
| 53 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 54 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 55 | 'strict' => false, |
||
| 56 | 'shared' => false, |
||
| 57 | 'with' => [], |
||
| 58 | ], |
||
| 59 | T_FOR => [ |
||
| 60 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 61 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 62 | 'strict' => false, |
||
| 63 | 'shared' => false, |
||
| 64 | 'with' => [], |
||
| 65 | ], |
||
| 66 | T_CLASS => [ |
||
| 67 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 68 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 69 | 'strict' => true, |
||
| 70 | 'shared' => false, |
||
| 71 | 'with' => [], |
||
| 72 | ], |
||
| 73 | T_FUNCTION => [ |
||
| 74 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 75 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 76 | 'strict' => false, |
||
| 77 | 'shared' => false, |
||
| 78 | 'with' => [], |
||
| 79 | ], |
||
| 80 | T_WHILE => [ |
||
| 81 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 82 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 83 | 'strict' => false, |
||
| 84 | 'shared' => false, |
||
| 85 | 'with' => [], |
||
| 86 | ], |
||
| 87 | T_DO => [ |
||
| 88 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 89 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 90 | 'strict' => true, |
||
| 91 | 'shared' => false, |
||
| 92 | 'with' => [], |
||
| 93 | ], |
||
| 94 | T_SWITCH => [ |
||
| 95 | 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], |
||
| 96 | 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], |
||
| 97 | 'strict' => true, |
||
| 98 | 'shared' => false, |
||
| 99 | 'with' => [], |
||
| 100 | ], |
||
| 101 | T_CASE => [ |
||
| 102 | 'start' => [T_COLON => T_COLON], |
||
| 103 | 'end' => [ |
||
| 104 | T_BREAK => T_BREAK, |
||
| 105 | T_RETURN => T_RETURN, |
||
| 106 | T_CONTINUE => T_CONTINUE, |
||
| 107 | T_THROW => T_THROW, |
||
| 108 | ], |
||
| 109 | 'strict' => true, |
||
| 110 | 'shared' => true, |
||
| 111 | 'with' => [ |
||
| 112 | T_DEFAULT => T_DEFAULT, |
||
| 113 | T_CASE => T_CASE, |
||
| 114 | T_SWITCH => T_SWITCH, |
||
| 115 | ], |
||
| 116 | ], |
||
| 117 | T_DEFAULT => [ |
||
| 118 | 'start' => [T_COLON => T_COLON], |
||
| 119 | 'end' => [ |
||
| 120 | T_BREAK => T_BREAK, |
||
| 121 | T_RETURN => T_RETURN, |
||
| 122 | T_CONTINUE => T_CONTINUE, |
||
| 123 | T_THROW => T_THROW, |
||
| 124 | ], |
||
| 125 | 'strict' => true, |
||
| 126 | 'shared' => true, |
||
| 127 | 'with' => [ |
||
| 128 | T_CASE => T_CASE, |
||
| 129 | T_SWITCH => T_SWITCH, |
||
| 130 | ], |
||
| 131 | ], |
||
| 132 | ]; |
||
| 133 | |||
| 134 | /** |
||
| 135 | * A list of tokens that end the scope. |
||
| 136 | * |
||
| 137 | * This array is just a unique collection of the end tokens |
||
| 138 | * from the _scopeOpeners array. The data is duplicated here to |
||
| 139 | * save time during parsing of the file. |
||
| 140 | * |
||
| 141 | * @var array |
||
| 142 | */ |
||
| 143 | public $endScopeTokens = [ |
||
| 144 | T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, |
||
| 145 | T_BREAK => T_BREAK, |
||
| 146 | ]; |
||
| 147 | |||
| 148 | /** |
||
| 149 | * A list of special JS tokens and their types. |
||
| 150 | * |
||
| 151 | * @var array |
||
| 152 | */ |
||
| 153 | protected $tokenValues = [ |
||
| 154 | 'class' => 'T_CLASS', |
||
| 155 | 'function' => 'T_FUNCTION', |
||
| 156 | 'prototype' => 'T_PROTOTYPE', |
||
| 157 | 'try' => 'T_TRY', |
||
| 158 | 'catch' => 'T_CATCH', |
||
| 159 | 'return' => 'T_RETURN', |
||
| 160 | 'throw' => 'T_THROW', |
||
| 161 | 'break' => 'T_BREAK', |
||
| 162 | 'switch' => 'T_SWITCH', |
||
| 163 | 'continue' => 'T_CONTINUE', |
||
| 164 | 'if' => 'T_IF', |
||
| 165 | 'else' => 'T_ELSE', |
||
| 166 | 'do' => 'T_DO', |
||
| 167 | 'while' => 'T_WHILE', |
||
| 168 | 'for' => 'T_FOR', |
||
| 169 | 'var' => 'T_VAR', |
||
| 170 | 'case' => 'T_CASE', |
||
| 171 | 'default' => 'T_DEFAULT', |
||
| 172 | 'true' => 'T_TRUE', |
||
| 173 | 'false' => 'T_FALSE', |
||
| 174 | 'null' => 'T_NULL', |
||
| 175 | 'this' => 'T_THIS', |
||
| 176 | 'typeof' => 'T_TYPEOF', |
||
| 177 | '(' => 'T_OPEN_PARENTHESIS', |
||
| 178 | ')' => 'T_CLOSE_PARENTHESIS', |
||
| 179 | '{' => 'T_OPEN_CURLY_BRACKET', |
||
| 180 | '}' => 'T_CLOSE_CURLY_BRACKET', |
||
| 181 | '[' => 'T_OPEN_SQUARE_BRACKET', |
||
| 182 | ']' => 'T_CLOSE_SQUARE_BRACKET', |
||
| 183 | '?' => 'T_INLINE_THEN', |
||
| 184 | '.' => 'T_OBJECT_OPERATOR', |
||
| 185 | '+' => 'T_PLUS', |
||
| 186 | '-' => 'T_MINUS', |
||
| 187 | '*' => 'T_MULTIPLY', |
||
| 188 | '%' => 'T_MODULUS', |
||
| 189 | '/' => 'T_DIVIDE', |
||
| 190 | '^' => 'T_LOGICAL_XOR', |
||
| 191 | ',' => 'T_COMMA', |
||
| 192 | ';' => 'T_SEMICOLON', |
||
| 193 | ':' => 'T_COLON', |
||
| 194 | '<' => 'T_LESS_THAN', |
||
| 195 | '>' => 'T_GREATER_THAN', |
||
| 196 | '<<' => 'T_SL', |
||
| 197 | '>>' => 'T_SR', |
||
| 198 | '>>>' => 'T_ZSR', |
||
| 199 | '<<=' => 'T_SL_EQUAL', |
||
| 200 | '>>=' => 'T_SR_EQUAL', |
||
| 201 | '>>>=' => 'T_ZSR_EQUAL', |
||
| 202 | '<=' => 'T_IS_SMALLER_OR_EQUAL', |
||
| 203 | '>=' => 'T_IS_GREATER_OR_EQUAL', |
||
| 204 | '=>' => 'T_DOUBLE_ARROW', |
||
| 205 | '!' => 'T_BOOLEAN_NOT', |
||
| 206 | '||' => 'T_BOOLEAN_OR', |
||
| 207 | '&&' => 'T_BOOLEAN_AND', |
||
| 208 | '|' => 'T_BITWISE_OR', |
||
| 209 | '&' => 'T_BITWISE_AND', |
||
| 210 | '!=' => 'T_IS_NOT_EQUAL', |
||
| 211 | '!==' => 'T_IS_NOT_IDENTICAL', |
||
| 212 | '=' => 'T_EQUAL', |
||
| 213 | '==' => 'T_IS_EQUAL', |
||
| 214 | '===' => 'T_IS_IDENTICAL', |
||
| 215 | '-=' => 'T_MINUS_EQUAL', |
||
| 216 | '+=' => 'T_PLUS_EQUAL', |
||
| 217 | '*=' => 'T_MUL_EQUAL', |
||
| 218 | '/=' => 'T_DIV_EQUAL', |
||
| 219 | '%=' => 'T_MOD_EQUAL', |
||
| 220 | '++' => 'T_INC', |
||
| 221 | '--' => 'T_DEC', |
||
| 222 | '//' => 'T_COMMENT', |
||
| 223 | '/*' => 'T_COMMENT', |
||
| 224 | '/**' => 'T_DOC_COMMENT', |
||
| 225 | '*/' => 'T_COMMENT', |
||
| 226 | ]; |
||
| 227 | |||
| 228 | /** |
||
| 229 | * A list string delimiters. |
||
| 230 | * |
||
| 231 | * @var array |
||
| 232 | */ |
||
| 233 | protected $stringTokens = [ |
||
| 234 | '\'' => '\'', |
||
| 235 | '"' => '"', |
||
| 236 | ]; |
||
| 237 | |||
| 238 | /** |
||
| 239 | * A list tokens that start and end comments. |
||
| 240 | * |
||
| 241 | * @var array |
||
| 242 | */ |
||
| 243 | protected $commentTokens = [ |
||
| 244 | '//' => null, |
||
| 245 | '/*' => '*/', |
||
| 246 | '/**' => '*/', |
||
| 247 | ]; |
||
| 248 | |||
| 249 | |||
| 250 | /** |
||
| 251 | * Initialise the tokenizer. |
||
| 252 | * |
||
| 253 | * Pre-checks the content to see if it looks minified. |
||
| 254 | * |
||
| 255 | * @param string $content The content to tokenize, |
||
| 256 | * @param \PHP_CodeSniffer\Config $config The config data for the run. |
||
| 257 | * @param string $eolChar The EOL char used in the content. |
||
| 258 | * |
||
| 259 | * @return void |
||
| 260 | * @throws TokenizerException If the file appears to be minified. |
||
| 261 | */ |
||
| 262 | public function __construct($content, Config $config, $eolChar='\n') |
||
| 263 | { |
||
| 264 | if ($this->isMinifiedContent($content, $eolChar) === true) { |
||
| 265 | throw new TokenizerException('File appears to be minified and cannot be processed'); |
||
| 266 | } |
||
| 267 | |||
| 268 | return parent::__construct($content, $config, $eolChar); |
||
| 269 | |||
| 270 | }//end __construct() |
||
| 271 | |||
| 272 | |||
| 273 | /** |
||
| 274 | * Creates an array of tokens when given some JS code. |
||
| 275 | * |
||
| 276 | * @param string $string The string to tokenize. |
||
| 277 | * |
||
| 278 | * @return array |
||
| 279 | */ |
||
| 280 | public function tokenize($string) |
||
| 281 | { |
||
| 282 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 283 | echo "\t*** START JS TOKENIZING ***".PHP_EOL; |
||
| 284 | } |
||
| 285 | |||
| 286 | $maxTokenLength = 0; |
||
| 287 | foreach ($this->tokenValues as $token => $values) { |
||
| 288 | if (strlen($token) > $maxTokenLength) { |
||
| 289 | $maxTokenLength = strlen($token); |
||
| 290 | } |
||
| 291 | } |
||
| 292 | |||
| 293 | $tokens = []; |
||
| 294 | $inString = ''; |
||
| 295 | $stringChar = null; |
||
| 296 | $inComment = ''; |
||
| 297 | $buffer = ''; |
||
| 298 | $preStringBuffer = ''; |
||
| 299 | $cleanBuffer = false; |
||
| 300 | |||
| 301 | $commentTokenizer = new Comment(); |
||
| 302 | |||
| 303 | $tokens[] = [ |
||
| 304 | 'code' => T_OPEN_TAG, |
||
| 305 | 'type' => 'T_OPEN_TAG', |
||
| 306 | 'content' => '', |
||
| 307 | ]; |
||
| 308 | |||
| 309 | // Convert newlines to single characters for ease of |
||
| 310 | // processing. We will change them back later. |
||
| 311 | $string = str_replace($this->eolChar, "\n", $string); |
||
| 312 | |||
| 313 | $chars = str_split($string); |
||
| 314 | $numChars = count($chars); |
||
| 315 | for ($i = 0; $i < $numChars; $i++) { |
||
| 316 | $char = $chars[$i]; |
||
| 317 | |||
| 318 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 319 | $content = Util\Common::prepareForOutput($char); |
||
| 320 | $bufferContent = Util\Common::prepareForOutput($buffer); |
||
| 321 | |||
| 322 | if ($inString !== '') { |
||
| 323 | echo "\t"; |
||
| 324 | } |
||
| 325 | |||
| 326 | if ($inComment !== '') { |
||
| 327 | echo "\t"; |
||
| 328 | } |
||
| 329 | |||
| 330 | echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL; |
||
| 331 | }//end if |
||
| 332 | |||
| 333 | if ($inString === '' && $inComment === '' && $buffer !== '') { |
||
| 334 | // If the buffer only has whitespace and we are about to |
||
| 335 | // add a character, store the whitespace first. |
||
| 336 | if (trim($char) !== '' && trim($buffer) === '') { |
||
| 337 | $tokens[] = [ |
||
| 338 | 'code' => T_WHITESPACE, |
||
| 339 | 'type' => 'T_WHITESPACE', |
||
| 340 | 'content' => str_replace("\n", $this->eolChar, $buffer), |
||
| 341 | ]; |
||
| 342 | |||
| 343 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 344 | $content = Util\Common::prepareForOutput($buffer); |
||
| 345 | echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL; |
||
| 346 | } |
||
| 347 | |||
| 348 | $buffer = ''; |
||
| 349 | } |
||
| 350 | |||
| 351 | // If the buffer is not whitespace and we are about to |
||
| 352 | // add a whitespace character, store the content first. |
||
| 353 | if ($inString === '' |
||
| 354 | && $inComment === '' |
||
| 355 | && trim($char) === '' |
||
| 356 | && trim($buffer) !== '' |
||
| 357 | ) { |
||
| 358 | $tokens[] = [ |
||
| 359 | 'code' => T_STRING, |
||
| 360 | 'type' => 'T_STRING', |
||
| 361 | 'content' => str_replace("\n", $this->eolChar, $buffer), |
||
| 362 | ]; |
||
| 363 | |||
| 364 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 365 | $content = Util\Common::prepareForOutput($buffer); |
||
| 366 | echo "\t=> Added token T_STRING ($content)".PHP_EOL; |
||
| 367 | } |
||
| 368 | |||
| 369 | $buffer = ''; |
||
| 370 | } |
||
| 371 | }//end if |
||
| 372 | |||
| 373 | // Process strings. |
||
| 374 | if ($inComment === '' && isset($this->stringTokens[$char]) === true) { |
||
| 375 | if ($inString === $char) { |
||
| 376 | // This could be the end of the string, but make sure it |
||
| 377 | // is not escaped first. |
||
| 378 | $escapes = 0; |
||
| 379 | for ($x = ($i - 1); $x >= 0; $x--) { |
||
| 380 | if ($chars[$x] !== '\\') { |
||
| 381 | break; |
||
| 382 | } |
||
| 383 | |||
| 384 | $escapes++; |
||
| 385 | } |
||
| 386 | |||
| 387 | if ($escapes === 0 || ($escapes % 2) === 0) { |
||
| 388 | // There is an even number escape chars, |
||
| 389 | // so this is not escaped, it is the end of the string. |
||
| 390 | $tokens[] = [ |
||
| 391 | 'code' => T_CONSTANT_ENCAPSED_STRING, |
||
| 392 | 'type' => 'T_CONSTANT_ENCAPSED_STRING', |
||
| 393 | 'content' => str_replace("\n", $this->eolChar, $buffer).$char, |
||
| 394 | ]; |
||
| 395 | |||
| 396 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 397 | echo "\t\t* found end of string *".PHP_EOL; |
||
| 398 | $content = Util\Common::prepareForOutput($buffer.$char); |
||
| 399 | echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL; |
||
| 400 | } |
||
| 401 | |||
| 402 | $buffer = ''; |
||
| 403 | $preStringBuffer = ''; |
||
| 404 | $inString = ''; |
||
| 405 | $stringChar = null; |
||
| 406 | continue; |
||
| 407 | }//end if |
||
| 408 | } else if ($inString === '') { |
||
| 409 | $inString = $char; |
||
| 410 | $stringChar = $i; |
||
| 411 | $preStringBuffer = $buffer; |
||
| 412 | |||
| 413 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 414 | echo "\t\t* looking for string closer *".PHP_EOL; |
||
| 415 | } |
||
| 416 | }//end if |
||
| 417 | }//end if |
||
| 418 | |||
| 419 | if ($inString !== '' && $char === "\n") { |
||
| 420 | // Unless this newline character is escaped, the string did not |
||
| 421 | // end before the end of the line, which means it probably |
||
| 422 | // wasn't a string at all (maybe a regex). |
||
| 423 | if ($chars[($i - 1)] !== '\\') { |
||
| 424 | $i = $stringChar; |
||
| 425 | $buffer = $preStringBuffer; |
||
| 426 | $preStringBuffer = ''; |
||
| 427 | $inString = ''; |
||
| 428 | $stringChar = null; |
||
| 429 | $char = $chars[$i]; |
||
| 430 | |||
| 431 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 432 | echo "\t\t* found newline before end of string, bailing *".PHP_EOL; |
||
| 433 | } |
||
| 434 | } |
||
| 435 | } |
||
| 436 | |||
| 437 | $buffer .= $char; |
||
| 438 | |||
| 439 | // We don't look for special tokens inside strings, |
||
| 440 | // so if we are in a string, we can continue here now |
||
| 441 | // that the current char is in the buffer. |
||
| 442 | if ($inString !== '') { |
||
| 443 | continue; |
||
| 444 | } |
||
| 445 | |||
| 446 | // Special case for T_DIVIDE which can actually be |
||
| 447 | // the start of a regular expression. |
||
| 448 | if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') { |
||
| 449 | $regex = $this->getRegexToken( |
||
| 450 | $i, |
||
| 451 | $string, |
||
| 452 | $chars, |
||
| 453 | $tokens, |
||
| 454 | $this->eolChar |
||
| 455 | ); |
||
| 456 | |||
| 457 | if ($regex !== null) { |
||
| 458 | $tokens[] = [ |
||
| 459 | 'code' => T_REGULAR_EXPRESSION, |
||
| 460 | 'type' => 'T_REGULAR_EXPRESSION', |
||
| 461 | 'content' => $regex['content'], |
||
| 462 | ]; |
||
| 463 | |||
| 464 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 465 | $content = Util\Common::prepareForOutput($regex['content']); |
||
| 466 | echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL; |
||
| 467 | } |
||
| 468 | |||
| 469 | $i = $regex['end']; |
||
| 470 | $buffer = ''; |
||
| 471 | $cleanBuffer = false; |
||
| 472 | continue; |
||
| 473 | }//end if |
||
| 474 | }//end if |
||
| 475 | |||
| 476 | // Check for known tokens, but ignore tokens found that are not at |
||
| 477 | // the end of a string, like FOR and this.FORmat. |
||
| 478 | if (isset($this->tokenValues[strtolower($buffer)]) === true |
||
| 479 | && (preg_match('|[a-zA-z0-9_]|', $char) === 0 |
||
| 480 | || isset($chars[($i + 1)]) === false |
||
| 481 | || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0) |
||
| 482 | ) { |
||
| 483 | $matchedToken = false; |
||
| 484 | $lookAheadLength = ($maxTokenLength - strlen($buffer)); |
||
| 485 | |||
| 486 | if ($lookAheadLength > 0) { |
||
| 487 | // The buffer contains a token type, but we need |
||
| 488 | // to look ahead at the next chars to see if this is |
||
| 489 | // actually part of a larger token. For example, |
||
| 490 | // FOR and FOREACH. |
||
| 491 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 492 | echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL; |
||
| 493 | } |
||
| 494 | |||
| 495 | $charBuffer = $buffer; |
||
| 496 | for ($x = 1; $x <= $lookAheadLength; $x++) { |
||
| 497 | if (isset($chars[($i + $x)]) === false) { |
||
| 498 | break; |
||
| 499 | } |
||
| 500 | |||
| 501 | $charBuffer .= $chars[($i + $x)]; |
||
| 502 | |||
| 503 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 504 | $content = Util\Common::prepareForOutput($charBuffer); |
||
| 505 | echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL; |
||
| 506 | } |
||
| 507 | |||
| 508 | if (isset($this->tokenValues[strtolower($charBuffer)]) === true) { |
||
| 509 | // We've found something larger that matches |
||
| 510 | // so we can ignore this char. Except for 1 very specific |
||
| 511 | // case where a comment like /**/ needs to tokenize as |
||
| 512 | // T_COMMENT and not T_DOC_COMMENT. |
||
| 513 | $oldType = $this->tokenValues[strtolower($buffer)]; |
||
| 514 | $newType = $this->tokenValues[strtolower($charBuffer)]; |
||
| 515 | if ($oldType === 'T_COMMENT' |
||
| 516 | && $newType === 'T_DOC_COMMENT' |
||
| 517 | && $chars[($i + $x + 1)] === '/' |
||
| 518 | ) { |
||
| 519 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 520 | echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL; |
||
| 521 | } |
||
| 522 | } else { |
||
| 523 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 524 | echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL; |
||
| 525 | } |
||
| 526 | |||
| 527 | $matchedToken = true; |
||
| 528 | break; |
||
| 529 | } |
||
| 530 | }//end if |
||
| 531 | }//end for |
||
| 532 | }//end if |
||
| 533 | |||
| 534 | if ($matchedToken === false) { |
||
| 535 | if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) { |
||
| 536 | echo "\t\t* look ahead found nothing *".PHP_EOL; |
||
| 537 | } |
||
| 538 | |||
| 539 | $value = $this->tokenValues[strtolower($buffer)]; |
||
| 540 | |||
| 541 | if ($value === 'T_FUNCTION' && $buffer !== 'function') { |
||
| 542 | // The function keyword needs to be all lowercase or else |
||
| 543 | // it is just a function called "Function". |
||
| 544 | $value = 'T_STRING'; |
||
| 545 | } |
||
| 546 | |||
| 547 | $tokens[] = [ |
||
| 548 | 'code' => constant($value), |
||
| 549 | 'type' => $value, |
||
| 550 | 'content' => $buffer, |
||
| 551 | ]; |
||
| 552 | |||
| 553 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 554 | $content = Util\Common::prepareForOutput($buffer); |
||
| 555 | echo "\t=> Added token $value ($content)".PHP_EOL; |
||
| 556 | } |
||
| 557 | |||
| 558 | $cleanBuffer = true; |
||
| 559 | }//end if |
||
| 560 | } else if (isset($this->tokenValues[strtolower($char)]) === true) { |
||
| 561 | // No matter what token we end up using, we don't |
||
| 562 | // need the content in the buffer any more because we have |
||
| 563 | // found a valid token. |
||
| 564 | $newContent = substr(str_replace("\n", $this->eolChar, $buffer), 0, -1); |
||
| 565 | if ($newContent !== '') { |
||
| 566 | $tokens[] = [ |
||
| 567 | 'code' => T_STRING, |
||
| 568 | 'type' => 'T_STRING', |
||
| 569 | 'content' => $newContent, |
||
| 570 | ]; |
||
| 571 | |||
| 572 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 573 | $content = Util\Common::prepareForOutput(substr($buffer, 0, -1)); |
||
| 574 | echo "\t=> Added token T_STRING ($content)".PHP_EOL; |
||
| 575 | } |
||
| 576 | } |
||
| 577 | |||
| 578 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 579 | echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL; |
||
| 580 | } |
||
| 581 | |||
| 582 | // The char is a token type, but we need to look ahead at the |
||
| 583 | // next chars to see if this is actually part of a larger token. |
||
| 584 | // For example, = and ===. |
||
| 585 | $charBuffer = $char; |
||
| 586 | $matchedToken = false; |
||
| 587 | for ($x = 1; $x <= $maxTokenLength; $x++) { |
||
| 588 | if (isset($chars[($i + $x)]) === false) { |
||
| 589 | break; |
||
| 590 | } |
||
| 591 | |||
| 592 | $charBuffer .= $chars[($i + $x)]; |
||
| 593 | |||
| 594 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 595 | $content = Util\Common::prepareForOutput($charBuffer); |
||
| 596 | echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL; |
||
| 597 | } |
||
| 598 | |||
| 599 | if (isset($this->tokenValues[strtolower($charBuffer)]) === true) { |
||
| 600 | // We've found something larger that matches |
||
| 601 | // so we can ignore this char. |
||
| 602 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 603 | $type = $this->tokenValues[strtolower($charBuffer)]; |
||
| 604 | echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL; |
||
| 605 | } |
||
| 606 | |||
| 607 | $matchedToken = true; |
||
| 608 | break; |
||
| 609 | } |
||
| 610 | }//end for |
||
| 611 | |||
| 612 | if ($matchedToken === false) { |
||
| 613 | $value = $this->tokenValues[strtolower($char)]; |
||
| 614 | $tokens[] = [ |
||
| 615 | 'code' => constant($value), |
||
| 616 | 'type' => $value, |
||
| 617 | 'content' => $char, |
||
| 618 | ]; |
||
| 619 | |||
| 620 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 621 | echo "\t\t* look ahead found nothing *".PHP_EOL; |
||
| 622 | $content = Util\Common::prepareForOutput($char); |
||
| 623 | echo "\t=> Added token $value ($content)".PHP_EOL; |
||
| 624 | } |
||
| 625 | |||
| 626 | $cleanBuffer = true; |
||
| 627 | } else { |
||
| 628 | $buffer = $char; |
||
| 629 | }//end if |
||
| 630 | }//end if |
||
| 631 | |||
| 632 | // Keep track of content inside comments. |
||
| 633 | if ($inComment === '' |
||
| 634 | && array_key_exists($buffer, $this->commentTokens) === true |
||
| 635 | ) { |
||
| 636 | // This is not really a comment if the content |
||
| 637 | // looks like \// (i.e., it is escaped). |
||
| 638 | if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') { |
||
| 639 | $lastToken = array_pop($tokens); |
||
| 640 | $lastContent = $lastToken['content']; |
||
| 641 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 642 | $value = $this->tokenValues[strtolower($lastContent)]; |
||
| 643 | $content = Util\Common::prepareForOutput($lastContent); |
||
| 644 | echo "\t=> Removed token $value ($content)".PHP_EOL; |
||
| 645 | } |
||
| 646 | |||
| 647 | $lastChars = str_split($lastContent); |
||
| 648 | $lastNumChars = count($lastChars); |
||
| 649 | for ($x = 0; $x < $lastNumChars; $x++) { |
||
| 650 | $lastChar = $lastChars[$x]; |
||
| 651 | $value = $this->tokenValues[strtolower($lastChar)]; |
||
| 652 | $tokens[] = [ |
||
| 653 | 'code' => constant($value), |
||
| 654 | 'type' => $value, |
||
| 655 | 'content' => $lastChar, |
||
| 656 | ]; |
||
| 657 | |||
| 658 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 659 | $content = Util\Common::prepareForOutput($lastChar); |
||
| 660 | echo "\t=> Added token $value ($content)".PHP_EOL; |
||
| 661 | } |
||
| 662 | } |
||
| 663 | } else { |
||
| 664 | // We have started a comment. |
||
| 665 | $inComment = $buffer; |
||
| 666 | |||
| 667 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 668 | echo "\t\t* looking for end of comment *".PHP_EOL; |
||
| 669 | } |
||
| 670 | }//end if |
||
| 671 | } else if ($inComment !== '') { |
||
| 672 | if ($this->commentTokens[$inComment] === null) { |
||
| 673 | // Comment ends at the next newline. |
||
| 674 | if (strpos($buffer, "\n") !== false) { |
||
| 675 | $inComment = ''; |
||
| 676 | } |
||
| 677 | } else { |
||
| 678 | if ($this->commentTokens[$inComment] === $buffer) { |
||
| 679 | $inComment = ''; |
||
| 680 | } |
||
| 681 | } |
||
| 682 | |||
| 683 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 684 | if ($inComment === '') { |
||
| 685 | echo "\t\t* found end of comment *".PHP_EOL; |
||
| 686 | } |
||
| 687 | } |
||
| 688 | |||
| 689 | if ($inComment === '' && $cleanBuffer === false) { |
||
| 690 | $tokens[] = [ |
||
| 691 | 'code' => T_STRING, |
||
| 692 | 'type' => 'T_STRING', |
||
| 693 | 'content' => str_replace("\n", $this->eolChar, $buffer), |
||
| 694 | ]; |
||
| 695 | |||
| 696 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 697 | $content = Util\Common::prepareForOutput($buffer); |
||
| 698 | echo "\t=> Added token T_STRING ($content)".PHP_EOL; |
||
| 699 | } |
||
| 700 | |||
| 701 | $buffer = ''; |
||
| 702 | } |
||
| 703 | }//end if |
||
| 704 | |||
| 705 | if ($cleanBuffer === true) { |
||
| 706 | $buffer = ''; |
||
| 707 | $cleanBuffer = false; |
||
| 708 | } |
||
| 709 | }//end for |
||
| 710 | |||
| 711 | if (empty($buffer) === false) { |
||
| 712 | if ($inString !== '') { |
||
| 713 | // The string did not end before the end of the file, |
||
| 714 | // which means there was probably a syntax error somewhere. |
||
| 715 | $tokens[] = [ |
||
| 716 | 'code' => T_STRING, |
||
| 717 | 'type' => 'T_STRING', |
||
| 718 | 'content' => str_replace("\n", $this->eolChar, $buffer), |
||
| 719 | ]; |
||
| 720 | |||
| 721 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 722 | $content = Util\Common::prepareForOutput($buffer); |
||
| 723 | echo "\t=> Added token T_STRING ($content)".PHP_EOL; |
||
| 724 | } |
||
| 725 | } else { |
||
| 726 | // Buffer contains whitespace from the end of the file. |
||
| 727 | $tokens[] = [ |
||
| 728 | 'code' => T_WHITESPACE, |
||
| 729 | 'type' => 'T_WHITESPACE', |
||
| 730 | 'content' => str_replace("\n", $this->eolChar, $buffer), |
||
| 731 | ]; |
||
| 732 | |||
| 733 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 734 | $content = Util\Common::prepareForOutput($buffer); |
||
| 735 | echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL; |
||
| 736 | } |
||
| 737 | }//end if |
||
| 738 | }//end if |
||
| 739 | |||
| 740 | $tokens[] = [ |
||
| 741 | 'code' => T_CLOSE_TAG, |
||
| 742 | 'type' => 'T_CLOSE_TAG', |
||
| 743 | 'content' => '', |
||
| 744 | ]; |
||
| 745 | |||
| 746 | /* |
||
| 747 | Now that we have done some basic tokenizing, we need to |
||
| 748 | modify the tokens to join some together and split some apart |
||
| 749 | so they match what the PHP tokenizer does. |
||
| 750 | */ |
||
| 751 | |||
| 752 | $finalTokens = []; |
||
| 753 | $newStackPtr = 0; |
||
| 754 | $numTokens = count($tokens); |
||
| 755 | for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { |
||
| 756 | $token = $tokens[$stackPtr]; |
||
| 757 | |||
| 758 | /* |
||
| 759 | Look for comments and join the tokens together. |
||
| 760 | */ |
||
| 761 | |||
| 762 | if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) { |
||
| 763 | $newContent = ''; |
||
| 764 | $tokenContent = $token['content']; |
||
| 765 | |||
| 766 | $endContent = null; |
||
| 767 | if (isset($this->commentTokens[$tokenContent]) === true) { |
||
| 768 | $endContent = $this->commentTokens[$tokenContent]; |
||
| 769 | } |
||
| 770 | |||
| 771 | while ($tokenContent !== $endContent) { |
||
| 772 | if ($endContent === null |
||
| 773 | && strpos($tokenContent, $this->eolChar) !== false |
||
| 774 | ) { |
||
| 775 | // A null end token means the comment ends at the end of |
||
| 776 | // the line so we look for newlines and split the token. |
||
| 777 | $tokens[$stackPtr]['content'] = substr( |
||
| 778 | $tokenContent, |
||
| 779 | (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar)) |
||
| 780 | ); |
||
| 781 | |||
| 782 | $tokenContent = substr( |
||
| 783 | $tokenContent, |
||
| 784 | 0, |
||
| 785 | (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar)) |
||
| 786 | ); |
||
| 787 | |||
| 788 | // If the substr failed, skip the token as the content |
||
| 789 | // will now be blank. |
||
| 790 | if ($tokens[$stackPtr]['content'] !== false |
||
| 791 | && $tokens[$stackPtr]['content'] !== '' |
||
| 792 | ) { |
||
| 793 | $stackPtr--; |
||
| 794 | } |
||
| 795 | |||
| 796 | break; |
||
| 797 | }//end if |
||
| 798 | |||
| 799 | $stackPtr++; |
||
| 800 | $newContent .= $tokenContent; |
||
| 801 | if (isset($tokens[$stackPtr]) === false) { |
||
| 802 | break; |
||
| 803 | } |
||
| 804 | |||
| 805 | $tokenContent = $tokens[$stackPtr]['content']; |
||
| 806 | }//end while |
||
| 807 | |||
| 808 | if ($token['code'] === T_DOC_COMMENT) { |
||
| 809 | $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $this->eolChar, $newStackPtr); |
||
| 810 | foreach ($commentTokens as $commentToken) { |
||
| 811 | $finalTokens[$newStackPtr] = $commentToken; |
||
| 812 | $newStackPtr++; |
||
| 813 | } |
||
| 814 | |||
| 815 | continue; |
||
| 816 | } else { |
||
| 817 | // Save the new content in the current token so |
||
| 818 | // the code below can chop it up on newlines. |
||
| 819 | $token['content'] = $newContent.$tokenContent; |
||
| 820 | } |
||
| 821 | }//end if |
||
| 822 | |||
| 823 | /* |
||
| 824 | If this token has newlines in its content, split each line up |
||
| 825 | and create a new token for each line. We do this so it's easier |
||
| 826 | to ascertain where errors occur on a line. |
||
| 827 | Note that $token[1] is the token's content. |
||
| 828 | */ |
||
| 829 | |||
| 830 | if (strpos($token['content'], $this->eolChar) !== false) { |
||
| 831 | $tokenLines = explode($this->eolChar, $token['content']); |
||
| 832 | $numLines = count($tokenLines); |
||
| 833 | |||
| 834 | for ($i = 0; $i < $numLines; $i++) { |
||
| 835 | $newToken['content'] = $tokenLines[$i]; |
||
| 836 | if ($i === ($numLines - 1)) { |
||
| 837 | if ($tokenLines[$i] === '') { |
||
| 838 | break; |
||
| 839 | } |
||
| 840 | } else { |
||
| 841 | $newToken['content'] .= $this->eolChar; |
||
| 842 | } |
||
| 843 | |||
| 844 | $newToken['type'] = $token['type']; |
||
| 845 | $newToken['code'] = $token['code']; |
||
| 846 | $finalTokens[$newStackPtr] = $newToken; |
||
| 847 | $newStackPtr++; |
||
| 848 | } |
||
| 849 | } else { |
||
| 850 | $finalTokens[$newStackPtr] = $token; |
||
| 851 | $newStackPtr++; |
||
| 852 | }//end if |
||
| 853 | |||
| 854 | // Convert numbers, including decimals. |
||
| 855 | if ($token['code'] === T_STRING |
||
| 856 | || $token['code'] === T_OBJECT_OPERATOR |
||
| 857 | ) { |
||
| 858 | $newContent = ''; |
||
| 859 | $oldStackPtr = $stackPtr; |
||
| 860 | while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) { |
||
| 861 | $newContent .= $tokens[$stackPtr]['content']; |
||
| 862 | $stackPtr++; |
||
| 863 | } |
||
| 864 | |||
| 865 | if ($newContent !== '' && $newContent !== '.') { |
||
| 866 | $finalTokens[($newStackPtr - 1)]['content'] = $newContent; |
||
| 867 | if (ctype_digit($newContent) === true) { |
||
| 868 | $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER'); |
||
| 869 | $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER'; |
||
| 870 | } else { |
||
| 871 | $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER'); |
||
| 872 | $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER'; |
||
| 873 | } |
||
| 874 | |||
| 875 | $stackPtr--; |
||
| 876 | continue; |
||
| 877 | } else { |
||
| 878 | $stackPtr = $oldStackPtr; |
||
| 879 | } |
||
| 880 | }//end if |
||
| 881 | |||
| 882 | // Convert the token after an object operator into a string, in most cases. |
||
| 883 | if ($token['code'] === T_OBJECT_OPERATOR) { |
||
| 884 | for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { |
||
| 885 | if (isset(Util\Tokens::$emptyTokens[$tokens[$i]['code']]) === true) { |
||
| 886 | continue; |
||
| 887 | } |
||
| 888 | |||
| 889 | if ($tokens[$i]['code'] !== T_PROTOTYPE |
||
| 890 | && $tokens[$i]['code'] !== T_LNUMBER |
||
| 891 | && $tokens[$i]['code'] !== T_DNUMBER |
||
| 892 | ) { |
||
| 893 | $tokens[$i]['code'] = T_STRING; |
||
| 894 | $tokens[$i]['type'] = 'T_STRING'; |
||
| 895 | } |
||
| 896 | |||
| 897 | break; |
||
| 898 | } |
||
| 899 | } |
||
| 900 | }//end for |
||
| 901 | |||
| 902 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 903 | echo "\t*** END TOKENIZING ***".PHP_EOL; |
||
| 904 | } |
||
| 905 | |||
| 906 | return $finalTokens; |
||
| 907 | |||
| 908 | }//end tokenize() |
||
| 909 | |||
| 910 | |||
| 911 | /** |
||
| 912 | * Tokenizes a regular expression if one is found. |
||
| 913 | * |
||
| 914 | * If a regular expression is not found, NULL is returned. |
||
| 915 | * |
||
| 916 | * @param string $char The index of the possible regex start character. |
||
| 917 | * @param string $string The complete content of the string being tokenized. |
||
| 918 | * @param string $chars An array of characters being tokenized. |
||
| 919 | * @param string $tokens The current array of tokens found in the string. |
||
| 920 | * |
||
| 921 | * @return void |
||
| 922 | */ |
||
| 923 | public function getRegexToken($char, $string, $chars, $tokens) |
||
| 924 | { |
||
| 925 | $beforeTokens = [ |
||
| 926 | T_EQUAL => true, |
||
| 927 | T_IS_NOT_EQUAL => true, |
||
| 928 | T_IS_IDENTICAL => true, |
||
| 929 | T_IS_NOT_IDENTICAL => true, |
||
| 930 | T_OPEN_PARENTHESIS => true, |
||
| 931 | T_OPEN_SQUARE_BRACKET => true, |
||
| 932 | T_RETURN => true, |
||
| 933 | T_BOOLEAN_OR => true, |
||
| 934 | T_BOOLEAN_AND => true, |
||
| 935 | T_BITWISE_OR => true, |
||
| 936 | T_BITWISE_AND => true, |
||
| 937 | T_COMMA => true, |
||
| 938 | T_COLON => true, |
||
| 939 | T_TYPEOF => true, |
||
| 940 | T_INLINE_THEN => true, |
||
| 941 | T_INLINE_ELSE => true, |
||
| 942 | ]; |
||
| 943 | |||
| 944 | $afterTokens = [ |
||
| 945 | ',' => true, |
||
| 946 | ')' => true, |
||
| 947 | ']' => true, |
||
| 948 | ';' => true, |
||
| 949 | ' ' => true, |
||
| 950 | '.' => true, |
||
| 951 | ':' => true, |
||
| 952 | $this->eolChar => true, |
||
| 953 | ]; |
||
| 954 | |||
| 955 | // Find the last non-whitespace token that was added |
||
| 956 | // to the tokens array. |
||
| 957 | $numTokens = count($tokens); |
||
| 958 | for ($prev = ($numTokens - 1); $prev >= 0; $prev--) { |
||
| 959 | if (isset(Util\Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) { |
||
| 960 | break; |
||
| 961 | } |
||
| 962 | } |
||
| 963 | |||
| 964 | if (isset($beforeTokens[$tokens[$prev]['code']]) === false) { |
||
| 965 | return null; |
||
| 966 | } |
||
| 967 | |||
| 968 | // This is probably a regular expression, so look for the end of it. |
||
| 969 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 970 | echo "\t* token possibly starts a regular expression *".PHP_EOL; |
||
| 971 | } |
||
| 972 | |||
| 973 | $numChars = count($chars); |
||
| 974 | for ($next = ($char + 1); $next < $numChars; $next++) { |
||
| 975 | if ($chars[$next] === '/') { |
||
| 976 | // Just make sure this is not escaped first. |
||
| 977 | if ($chars[($next - 1)] !== '\\') { |
||
| 978 | // In the simple form: /.../ so we found the end. |
||
| 979 | break; |
||
| 980 | } else if ($chars[($next - 2)] === '\\') { |
||
| 981 | // In the form: /...\\/ so we found the end. |
||
| 982 | break; |
||
| 983 | } |
||
| 984 | } else { |
||
| 985 | $possibleEolChar = substr($string, $next, strlen($this->eolChar)); |
||
| 986 | if ($possibleEolChar === $this->eolChar) { |
||
| 987 | // This is the last token on the line and regular |
||
| 988 | // expressions need to be defined on a single line, |
||
| 989 | // so this is not a regular expression. |
||
| 990 | break; |
||
| 991 | } |
||
| 992 | } |
||
| 993 | } |
||
| 994 | |||
| 995 | if ($chars[$next] !== '/') { |
||
| 996 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 997 | echo "\t* could not find end of regular expression *".PHP_EOL; |
||
| 998 | } |
||
| 999 | |||
| 1000 | return null; |
||
| 1001 | } |
||
| 1002 | |||
| 1003 | while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) { |
||
| 1004 | // The token directly after the end of the regex can |
||
| 1005 | // be modifiers like global and case insensitive |
||
| 1006 | // (.e.g, /pattern/gi). |
||
| 1007 | $next++; |
||
| 1008 | } |
||
| 1009 | |||
| 1010 | $regexEnd = $next; |
||
| 1011 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1012 | echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL; |
||
| 1013 | } |
||
| 1014 | |||
| 1015 | for ($next = ($next + 1); $next < $numChars; $next++) { |
||
| 1016 | if ($chars[$next] !== ' ') { |
||
| 1017 | break; |
||
| 1018 | } else { |
||
| 1019 | $possibleEolChar = substr($string, $next, strlen($this->eolChar)); |
||
| 1020 | if ($possibleEolChar === $this->eolChar) { |
||
| 1021 | // This is the last token on the line. |
||
| 1022 | break; |
||
| 1023 | } |
||
| 1024 | } |
||
| 1025 | } |
||
| 1026 | |||
| 1027 | if (isset($afterTokens[$chars[$next]]) === false) { |
||
| 1028 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1029 | echo "\t* tokens after regular expression do not look correct *".PHP_EOL; |
||
| 1030 | } |
||
| 1031 | |||
| 1032 | return null; |
||
| 1033 | } |
||
| 1034 | |||
| 1035 | // This is a regular expression, so join all the tokens together. |
||
| 1036 | $content = ''; |
||
| 1037 | for ($x = $char; $x <= $regexEnd; $x++) { |
||
| 1038 | $content .= $chars[$x]; |
||
| 1039 | } |
||
| 1040 | |||
| 1041 | $token = [ |
||
| 1042 | 'start' => $char, |
||
| 1043 | 'end' => $regexEnd, |
||
| 1044 | 'content' => $content, |
||
| 1045 | ]; |
||
| 1046 | |||
| 1047 | return $token; |
||
| 1048 | |||
| 1049 | }//end getRegexToken() |
||
| 1050 | |||
| 1051 | |||
| 1052 | /** |
||
| 1053 | * Performs additional processing after main tokenizing. |
||
| 1054 | * |
||
| 1055 | * This additional processing looks for properties, closures, labels and objects. |
||
| 1056 | * |
||
| 1057 | * @return void |
||
| 1058 | */ |
||
| 1059 | public function processAdditional() |
||
| 1060 | { |
||
| 1061 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1062 | echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL; |
||
| 1063 | } |
||
| 1064 | |||
| 1065 | $numTokens = count($this->tokens); |
||
| 1066 | $classStack = []; |
||
| 1067 | |||
| 1068 | for ($i = 0; $i < $numTokens; $i++) { |
||
| 1069 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1070 | $type = $this->tokens[$i]['type']; |
||
| 1071 | $content = Util\Common::prepareForOutput($this->tokens[$i]['content']); |
||
| 1072 | |||
| 1073 | echo str_repeat("\t", count($classStack)); |
||
| 1074 | echo "\tProcess token $i: $type => $content".PHP_EOL; |
||
| 1075 | } |
||
| 1076 | |||
| 1077 | // Looking for functions that are actually closures. |
||
| 1078 | if ($this->tokens[$i]['code'] === T_FUNCTION && isset($this->tokens[$i]['scope_opener']) === true) { |
||
| 1079 | for ($x = ($i + 1); $x < $numTokens; $x++) { |
||
| 1080 | if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { |
||
| 1081 | break; |
||
| 1082 | } |
||
| 1083 | } |
||
| 1084 | |||
| 1085 | if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) { |
||
| 1086 | $this->tokens[$i]['code'] = T_CLOSURE; |
||
| 1087 | $this->tokens[$i]['type'] = 'T_CLOSURE'; |
||
| 1088 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1089 | $line = $this->tokens[$i]['line']; |
||
| 1090 | echo str_repeat("\t", count($classStack)); |
||
| 1091 | echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE *".PHP_EOL; |
||
| 1092 | } |
||
| 1093 | |||
| 1094 | for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) { |
||
| 1095 | if (isset($this->tokens[$x]['conditions'][$i]) === false) { |
||
| 1096 | continue; |
||
| 1097 | } |
||
| 1098 | |||
| 1099 | $this->tokens[$x]['conditions'][$i] = T_CLOSURE; |
||
| 1100 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1101 | $type = $this->tokens[$x]['type']; |
||
| 1102 | echo str_repeat("\t", count($classStack)); |
||
| 1103 | echo "\t\t* cleaned $x ($type) *".PHP_EOL; |
||
| 1104 | } |
||
| 1105 | } |
||
| 1106 | }//end if |
||
| 1107 | |||
| 1108 | continue; |
||
| 1109 | } else if ($this->tokens[$i]['code'] === T_OPEN_CURLY_BRACKET |
||
| 1110 | && isset($this->tokens[$i]['scope_condition']) === false |
||
| 1111 | && isset($this->tokens[$i]['bracket_closer']) === true |
||
| 1112 | ) { |
||
| 1113 | $condition = end($this->tokens[$i]['conditions']); |
||
| 1114 | reset($this->tokens[$i]['conditions']); |
||
| 1115 | if ($condition === T_CLASS) { |
||
| 1116 | // Possibly an ES6 method. To be classified as one, the previous |
||
| 1117 | // non-empty tokens need to be a set of parenthesis, and then a string |
||
| 1118 | // (the method name). |
||
| 1119 | for ($parenCloser = ($i - 1); $parenCloser > 0; $parenCloser--) { |
||
| 1120 | if (isset(Util\Tokens::$emptyTokens[$this->tokens[$parenCloser]['code']]) === false) { |
||
| 1121 | break; |
||
| 1122 | } |
||
| 1123 | } |
||
| 1124 | |||
| 1125 | if ($this->tokens[$parenCloser]['code'] === T_CLOSE_PARENTHESIS) { |
||
| 1126 | $parenOpener = $this->tokens[$parenCloser]['parenthesis_opener']; |
||
| 1127 | for ($name = ($parenOpener - 1); $name > 0; $name--) { |
||
| 1128 | if (isset(Util\Tokens::$emptyTokens[$this->tokens[$name]['code']]) === false) { |
||
| 1129 | break; |
||
| 1130 | } |
||
| 1131 | } |
||
| 1132 | |||
| 1133 | if ($this->tokens[$name]['code'] === T_STRING) { |
||
| 1134 | // We found a method name. |
||
| 1135 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1136 | $line = $this->tokens[$name]['line']; |
||
| 1137 | echo str_repeat("\t", count($classStack)); |
||
| 1138 | echo "\t* token $name on line $line changed from T_STRING to T_FUNCTION *".PHP_EOL; |
||
| 1139 | } |
||
| 1140 | |||
| 1141 | $closer = $this->tokens[$i]['bracket_closer']; |
||
| 1142 | |||
| 1143 | $this->tokens[$name]['code'] = T_FUNCTION; |
||
| 1144 | $this->tokens[$name]['type'] = 'T_FUNCTION'; |
||
| 1145 | |||
| 1146 | foreach ([$name, $i, $closer] as $token) { |
||
| 1147 | $this->tokens[$token]['scope_condition'] = $name; |
||
| 1148 | $this->tokens[$token]['scope_opener'] = $i; |
||
| 1149 | $this->tokens[$token]['scope_closer'] = $closer; |
||
| 1150 | $this->tokens[$token]['parenthesis_opener'] = $parenOpener; |
||
| 1151 | $this->tokens[$token]['parenthesis_closer'] = $parenCloser; |
||
| 1152 | $this->tokens[$token]['parenthesis_owner'] = $name; |
||
| 1153 | } |
||
| 1154 | |||
| 1155 | $this->tokens[$parenOpener]['parenthesis_owner'] = $name; |
||
| 1156 | $this->tokens[$parenCloser]['parenthesis_owner'] = $name; |
||
| 1157 | |||
| 1158 | for ($x = ($i + 1); $x < $closer; $x++) { |
||
| 1159 | $this->tokens[$x]['conditions'][$name] = T_FUNCTION; |
||
| 1160 | ksort($this->tokens[$x]['conditions'], SORT_NUMERIC); |
||
| 1161 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1162 | $type = $this->tokens[$x]['type']; |
||
| 1163 | echo str_repeat("\t", count($classStack)); |
||
| 1164 | echo "\t\t* added T_FUNCTION condition to $x ($type) *".PHP_EOL; |
||
| 1165 | } |
||
| 1166 | } |
||
| 1167 | |||
| 1168 | continue; |
||
| 1169 | }//end if |
||
| 1170 | }//end if |
||
| 1171 | }//end if |
||
| 1172 | |||
| 1173 | $classStack[] = $i; |
||
| 1174 | |||
| 1175 | $closer = $this->tokens[$i]['bracket_closer']; |
||
| 1176 | $this->tokens[$i]['code'] = T_OBJECT; |
||
| 1177 | $this->tokens[$i]['type'] = 'T_OBJECT'; |
||
| 1178 | $this->tokens[$closer]['code'] = T_CLOSE_OBJECT; |
||
| 1179 | $this->tokens[$closer]['type'] = 'T_CLOSE_OBJECT'; |
||
| 1180 | |||
| 1181 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1182 | echo str_repeat("\t", count($classStack)); |
||
| 1183 | echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL; |
||
| 1184 | echo str_repeat("\t", count($classStack)); |
||
| 1185 | echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL; |
||
| 1186 | } |
||
| 1187 | |||
| 1188 | for ($x = ($i + 1); $x < $closer; $x++) { |
||
| 1189 | $this->tokens[$x]['conditions'][$i] = T_OBJECT; |
||
| 1190 | ksort($this->tokens[$x]['conditions'], SORT_NUMERIC); |
||
| 1191 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1192 | $type = $this->tokens[$x]['type']; |
||
| 1193 | echo str_repeat("\t", count($classStack)); |
||
| 1194 | echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL; |
||
| 1195 | } |
||
| 1196 | } |
||
| 1197 | } else if ($this->tokens[$i]['code'] === T_CLOSE_OBJECT) { |
||
| 1198 | $opener = array_pop($classStack); |
||
| 1199 | } else if ($this->tokens[$i]['code'] === T_COLON) { |
||
| 1200 | // If it is a scope opener, it belongs to a |
||
| 1201 | // DEFAULT or CASE statement. |
||
| 1202 | if (isset($this->tokens[$i]['scope_condition']) === true) { |
||
| 1203 | continue; |
||
| 1204 | } |
||
| 1205 | |||
| 1206 | // Make sure this is not part of an inline IF statement. |
||
| 1207 | for ($x = ($i - 1); $x >= 0; $x--) { |
||
| 1208 | if ($this->tokens[$x]['code'] === T_INLINE_THEN) { |
||
| 1209 | $this->tokens[$i]['code'] = T_INLINE_ELSE; |
||
| 1210 | $this->tokens[$i]['type'] = 'T_INLINE_ELSE'; |
||
| 1211 | |||
| 1212 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1213 | echo str_repeat("\t", count($classStack)); |
||
| 1214 | echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL; |
||
| 1215 | } |
||
| 1216 | |||
| 1217 | continue(2); |
||
| 1218 | } else if ($this->tokens[$x]['line'] < $this->tokens[$i]['line']) { |
||
| 1219 | break; |
||
| 1220 | } |
||
| 1221 | } |
||
| 1222 | |||
| 1223 | // The string to the left of the colon is either a property or label. |
||
| 1224 | for ($label = ($i - 1); $label >= 0; $label--) { |
||
| 1225 | if (isset(Util\Tokens::$emptyTokens[$this->tokens[$label]['code']]) === false) { |
||
| 1226 | break; |
||
| 1227 | } |
||
| 1228 | } |
||
| 1229 | |||
| 1230 | if ($this->tokens[$label]['code'] !== T_STRING |
||
| 1231 | && $this->tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING |
||
| 1232 | ) { |
||
| 1233 | continue; |
||
| 1234 | } |
||
| 1235 | |||
| 1236 | if (empty($classStack) === false) { |
||
| 1237 | $this->tokens[$label]['code'] = T_PROPERTY; |
||
| 1238 | $this->tokens[$label]['type'] = 'T_PROPERTY'; |
||
| 1239 | |||
| 1240 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1241 | echo str_repeat("\t", count($classStack)); |
||
| 1242 | echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL; |
||
| 1243 | } |
||
| 1244 | } else { |
||
| 1245 | $this->tokens[$label]['code'] = T_LABEL; |
||
| 1246 | $this->tokens[$label]['type'] = 'T_LABEL'; |
||
| 1247 | |||
| 1248 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1249 | echo str_repeat("\t", count($classStack)); |
||
| 1250 | echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL; |
||
| 1251 | } |
||
| 1252 | }//end if |
||
| 1253 | }//end if |
||
| 1254 | }//end for |
||
| 1255 | |||
| 1256 | if (PHP_CODESNIFFER_VERBOSITY > 1) { |
||
| 1257 | echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL; |
||
| 1258 | } |
||
| 1264 |