Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php | ||
| 22 | class Parser | ||
| 23 | { | ||
| 24 | /** | ||
| 25 | * | ||
| 26 | * Grammar (<identifier>s and <literal>s are simple scalars defined by regular expressions on the lexer): | ||
| 27 | * | ||
| 28 | * <operator> ::= "<" | ">" | "<=" | ">=" | "!=" | "<>" | "=" | ||
| 29 | * <logic> ::= "AND" | "OR" | ||
| 30 | * <assertion> ::= <identifier> <operator> <literal> | ||
| 31 |      * <concatenation>  ::=     <statement> { <logic> <statement> } | ||
| 32 |      * <group>          ::=     "(" <concatenation> ")" | ||
| 33 | * <statement> ::= <assertion> | <group> | ||
| 34 | * <query> ::= <concatenation> | ||
| 35 | * | ||
| 36 | */ | ||
| 37 | |||
| 38 | private $tokenIndex; | ||
| 39 | |||
| 40 | private $tokenStream = []; | ||
| 41 | |||
| 42 | public function __construct() | ||
| 46 | |||
| 47 | /** | ||
| 48 | * Lex, initialise and return the AST. | ||
| 49 | * | ||
| 50 | * @param $string | ||
| 51 | * | ||
| 52 | * @return bool|ASTAssertion|ASTGroup | ||
| 53 | */ | ||
| 54 | public function parse($string) | ||
| 61 | |||
| 62 | /** | ||
| 63 | * Entry point of the grammar parsing. | ||
| 64 | * | ||
| 65 | * @return bool|ASTAssertion|ASTGroup | ||
| 66 | */ | ||
| 67 | public function getAST() | ||
| 80 | |||
| 81 | /** | ||
| 82 | * Tries to match the following tokens to a <concatenation> grammar. | ||
| 83 | * | ||
| 84 | * @return bool|ASTAssertion|ASTGroup | ||
| 85 | */ | ||
| 86 | public function matchConcatenation() | ||
| 120 | |||
| 121 | /** | ||
| 122 | * Tries to match a general <statement>, that is a <group> or <assertion> | ||
| 123 | * | ||
| 124 | * @return bool|ASTAssertion|ASTGroup | ||
| 125 | */ | ||
| 126 | public function matchStatement() | ||
| 147 | |||
| 148 | /** | ||
| 149 | * Tries to match a <group> grammar to the following tokens | ||
| 150 | * | ||
| 151 | * @return bool|ASTAssertion|ASTGroup | ||
| 152 | */ | ||
| 153 | public function matchGroup() | ||
| 176 | |||
| 177 | /** | ||
| 178 | * Tries to match the following tokens to an <assertion>. | ||
| 179 | * | ||
| 180 | * @throws Exception\UQLSyntaxError | ||
| 181 | * @return bool|ASTAssertion | ||
| 182 | */ | ||
| 183 | public function matchAssertion() | ||
| 223 | |||
| 224 | /** | ||
| 225 | * Tries to match the next token to an <operator>. | ||
| 226 | * | ||
| 227 | * @return bool | ||
| 228 | */ | ||
| 229 | public function matchOperator() | ||
| 251 | |||
| 252 | public function matchArray() | ||
| 284 | |||
| 285 | /** | ||
| 286 | * Tries to match the next token to a <logic> operator | ||
| 287 | * | ||
| 288 | * @return bool | ||
| 289 | */ | ||
| 290 | public function matchLogic() | ||
| 303 | |||
| 304 | /** | ||
| 305 | * @return mixed | ||
| 306 | */ | ||
| 307 | public function getTokenStream() | ||
| 311 | |||
| 312 | /** | ||
| 313 | * @param mixed $tokenStream | ||
| 314 | */ | ||
| 315 | public function setTokenStream($tokenStream) | ||
| 319 | |||
| 320 | /** | ||
| 321 | * @return mixed | ||
| 322 | */ | ||
| 323 | public function getTokenIndex() | ||
| 327 | |||
| 328 | /** | ||
| 329 | * @param mixed $tokenIndex | ||
| 330 | */ | ||
| 331 | public function setTokenIndex($tokenIndex) | ||
| 335 | |||
| 336 | /** | ||
| 337 | * Advance the token index and return. | ||
| 338 | * | ||
| 339 | * @return bool | ||
| 340 | */ | ||
| 341 | private function nextToken() | ||
| 347 | |||
| 348 | /** | ||
| 349 | * Return the current token, without advancing the index. | ||
| 350 | * | ||
| 351 | * @return bool | ||
| 352 | */ | ||
| 353 | private function currentToken() | ||
| 357 | |||
| 358 | /** | ||
| 359 | * Move back the token index once. | ||
| 360 | */ | ||
| 361 | private function rewindToken() | ||
| 365 | |||
| 366 | /** | ||
| 367 | * Helper method. Throws an Exception representing a Syntax Error. | ||
| 368 | * | ||
| 369 | * @param $message | ||
| 370 | * | ||
| 371 | * @throws \Exception | ||
| 372 | */ | ||
| 373 | private function throwUnexpectedTokenSyntaxError(array $expectedTokenCategories, $message = null) | ||
| 390 | |||
| 391 | private function throwSyntaxError($message) | ||
| 395 | |||
| 396 | /** | ||
| 397 | * Transforms a literal subtype (e.g. T_LITERAL_FALSE) into a plain | ||
| 398 | * literal match. Plain literals are unchanged. | ||
| 399 | * | ||
| 400 | * @param $literal | ||
| 401 | * | ||
| 402 | * @return array | ||
| 403 | */ | ||
| 404 | private function transformLiteral($literal) | ||
| 425 | } | ||
| 426 | 
The break statement is not necessary if it is preceded for example by a return statement:
If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.