Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 22 | class Parser |
||
| 23 | { |
||
| 24 | /** |
||
| 25 | * |
||
| 26 | * Grammar (<identifier>s and <literal>s are simple scalars defined by regular expressions on the lexer): |
||
| 27 | * |
||
| 28 | * <operator> ::= "<" | ">" | "<=" | ">=" | "!=" | "<>" | "=" |
||
| 29 | * <logic> ::= "AND" | "OR" |
||
| 30 | * <assertion> ::= <identifier> <operator> <literal> |
||
| 31 | * <concatenation> ::= <statement> { <logic> <statement> } |
||
| 32 | * <group> ::= "(" <concatenation> ")" |
||
| 33 | * <statement> ::= <assertion> | <group> |
||
| 34 | * <query> ::= <concatenation> |
||
| 35 | * |
||
| 36 | */ |
||
| 37 | |||
| 38 | private $tokenIndex; |
||
| 39 | |||
| 40 | private $tokenStream = []; |
||
| 41 | |||
| 42 | public function __construct() |
||
| 46 | |||
| 47 | /** |
||
| 48 | * Lex, initialise and return the AST. |
||
| 49 | * |
||
| 50 | * @param $string |
||
| 51 | * |
||
| 52 | * @return bool|ASTAssertion|ASTGroup |
||
| 53 | */ |
||
| 54 | public function parse($string) |
||
| 61 | |||
| 62 | /** |
||
| 63 | * Entry point of the grammar parsing. |
||
| 64 | * |
||
| 65 | * @return bool|ASTAssertion|ASTGroup |
||
| 66 | */ |
||
| 67 | public function getAST() |
||
| 80 | |||
| 81 | /** |
||
| 82 | * Tries to match the following tokens to a <concatenation> grammar. |
||
| 83 | * |
||
| 84 | * @return bool|ASTAssertion|ASTGroup |
||
| 85 | */ |
||
| 86 | public function matchConcatenation() |
||
| 120 | |||
| 121 | /** |
||
| 122 | * Tries to match a general <statement>, that is a <group> or <assertion> |
||
| 123 | * |
||
| 124 | * @return bool|ASTAssertion|ASTGroup |
||
| 125 | */ |
||
| 126 | public function matchStatement() |
||
| 147 | |||
| 148 | /** |
||
| 149 | * Tries to match a <group> grammar to the following tokens |
||
| 150 | * |
||
| 151 | * @return bool|ASTAssertion|ASTGroup |
||
| 152 | */ |
||
| 153 | public function matchGroup() |
||
| 176 | |||
| 177 | /** |
||
| 178 | * Tries to match the following tokens to an <assertion>. |
||
| 179 | * |
||
| 180 | * @throws Exception\UQLSyntaxError |
||
| 181 | * @return bool|ASTAssertion |
||
| 182 | */ |
||
| 183 | public function matchAssertion() |
||
| 223 | |||
| 224 | /** |
||
| 225 | * Tries to match the next token to an <operator>. |
||
| 226 | * |
||
| 227 | * @return bool |
||
| 228 | */ |
||
| 229 | public function matchOperator() |
||
| 251 | |||
| 252 | public function matchArray() |
||
| 284 | |||
| 285 | /** |
||
| 286 | * Tries to match the next token to a <logic> operator |
||
| 287 | * |
||
| 288 | * @return bool |
||
| 289 | */ |
||
| 290 | public function matchLogic() |
||
| 303 | |||
| 304 | /** |
||
| 305 | * @return mixed |
||
| 306 | */ |
||
| 307 | public function getTokenStream() |
||
| 311 | |||
| 312 | /** |
||
| 313 | * @param mixed $tokenStream |
||
| 314 | */ |
||
| 315 | public function setTokenStream($tokenStream) |
||
| 319 | |||
| 320 | /** |
||
| 321 | * @return mixed |
||
| 322 | */ |
||
| 323 | public function getTokenIndex() |
||
| 327 | |||
| 328 | /** |
||
| 329 | * @param mixed $tokenIndex |
||
| 330 | */ |
||
| 331 | public function setTokenIndex($tokenIndex) |
||
| 335 | |||
| 336 | /** |
||
| 337 | * Advance the token index and return. |
||
| 338 | * |
||
| 339 | * @return bool |
||
| 340 | */ |
||
| 341 | private function nextToken() |
||
| 347 | |||
| 348 | /** |
||
| 349 | * Return the current token, without advancing the index. |
||
| 350 | * |
||
| 351 | * @return bool |
||
| 352 | */ |
||
| 353 | private function currentToken() |
||
| 357 | |||
| 358 | /** |
||
| 359 | * Move back the token index once. |
||
| 360 | */ |
||
| 361 | private function rewindToken() |
||
| 365 | |||
| 366 | /** |
||
| 367 | * Helper method. Throws an Exception representing a Syntax Error. |
||
| 368 | * |
||
| 369 | * @param $message |
||
| 370 | * |
||
| 371 | * @throws \Exception |
||
| 372 | */ |
||
| 373 | private function throwUnexpectedTokenSyntaxError(array $expectedTokenCategories, $message = null) |
||
| 390 | |||
| 391 | private function throwSyntaxError($message) |
||
| 395 | |||
| 396 | /** |
||
| 397 | * Transforms a literal subtype (e.g. T_LITERAL_FALSE) into a plain |
||
| 398 | * literal match. Plain literals are unchanged. |
||
| 399 | * |
||
| 400 | * @param $literal |
||
| 401 | * |
||
| 402 | * @return array |
||
| 403 | */ |
||
| 404 | private function transformLiteral($literal) |
||
| 425 | } |
||
| 426 |
The break statement is not necessary if it is preceded for example by a return statement:
If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.