Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
22 | class Parser |
||
23 | { |
||
24 | /** |
||
25 | * |
||
26 | * Grammar (<identifier>s and <literal>s are simple scalars defined by regular expressions on the lexer): |
||
27 | * |
||
28 | * <operator> ::= "<" | ">" | "<=" | ">=" | "!=" | "<>" | "=" |
||
29 | * <logic> ::= "AND" | "OR" |
||
30 | * <assertion> ::= <identifier> <operator> <literal> |
||
31 | * <concatenation> ::= <statement> { <logic> <statement> } |
||
32 | * <group> ::= "(" <concatenation> ")" |
||
33 | * <statement> ::= <assertion> | <group> |
||
34 | * <query> ::= <concatenation> |
||
35 | * |
||
36 | */ |
||
37 | |||
38 | private $tokenIndex; |
||
39 | |||
40 | private $tokenStream = []; |
||
41 | |||
42 | public function __construct() |
||
46 | |||
47 | /** |
||
48 | * Lex, initialise and return the AST. |
||
49 | * |
||
50 | * @param $string |
||
51 | * |
||
52 | * @return bool|ASTAssertion|ASTGroup |
||
53 | */ |
||
54 | public function parse($string) |
||
61 | |||
62 | /** |
||
63 | * Entry point of the grammar parsing. |
||
64 | * |
||
65 | * @return bool|ASTAssertion|ASTGroup |
||
66 | */ |
||
67 | public function getAST() |
||
80 | |||
81 | /** |
||
82 | * Tries to match the following tokens to a <concatenation> grammar. |
||
83 | * |
||
84 | * @return bool|ASTAssertion|ASTGroup |
||
85 | */ |
||
86 | public function matchConcatenation() |
||
120 | |||
121 | /** |
||
122 | * Tries to match a general <statement>, that is a <group> or <assertion> |
||
123 | * |
||
124 | * @return bool|ASTAssertion|ASTGroup |
||
125 | */ |
||
126 | public function matchStatement() |
||
147 | |||
148 | /** |
||
149 | * Tries to match a <group> grammar to the following tokens |
||
150 | * |
||
151 | * @return bool|ASTAssertion|ASTGroup |
||
152 | */ |
||
153 | public function matchGroup() |
||
176 | |||
177 | /** |
||
178 | * Tries to match the following tokens to an <assertion>. |
||
179 | * |
||
180 | * @throws Exception\UQLSyntaxError |
||
181 | * @return bool|ASTAssertion |
||
182 | */ |
||
183 | public function matchAssertion() |
||
223 | |||
224 | /** |
||
225 | * Tries to match the next token to an <operator>. |
||
226 | * |
||
227 | * @return bool |
||
228 | */ |
||
229 | public function matchOperator() |
||
251 | |||
252 | public function matchArray() |
||
284 | |||
285 | /** |
||
286 | * Tries to match the next token to a <logic> operator |
||
287 | * |
||
288 | * @return bool |
||
289 | */ |
||
290 | public function matchLogic() |
||
303 | |||
304 | /** |
||
305 | * @return mixed |
||
306 | */ |
||
307 | public function getTokenStream() |
||
311 | |||
312 | /** |
||
313 | * @param mixed $tokenStream |
||
314 | */ |
||
315 | public function setTokenStream($tokenStream) |
||
319 | |||
320 | /** |
||
321 | * @return mixed |
||
322 | */ |
||
323 | public function getTokenIndex() |
||
327 | |||
328 | /** |
||
329 | * @param mixed $tokenIndex |
||
330 | */ |
||
331 | public function setTokenIndex($tokenIndex) |
||
335 | |||
336 | /** |
||
337 | * Advance the token index and return. |
||
338 | * |
||
339 | * @return bool |
||
340 | */ |
||
341 | private function nextToken() |
||
347 | |||
348 | /** |
||
349 | * Return the current token, without advancing the index. |
||
350 | * |
||
351 | * @return bool |
||
352 | */ |
||
353 | private function currentToken() |
||
357 | |||
358 | /** |
||
359 | * Move back the token index once. |
||
360 | */ |
||
361 | private function rewindToken() |
||
365 | |||
366 | /** |
||
367 | * Helper method. Throws an Exception representing a Syntax Error. |
||
368 | * |
||
369 | * @param $message |
||
370 | * |
||
371 | * @throws \Exception |
||
372 | */ |
||
373 | private function throwUnexpectedTokenSyntaxError(array $expectedTokenCategories, $message = null) |
||
390 | |||
391 | private function throwSyntaxError($message) |
||
395 | |||
396 | /** |
||
397 | * Transforms a literal subtype (e.g. T_LITERAL_FALSE) into a plain |
||
398 | * literal match. Plain literals are unchanged. |
||
399 | * |
||
400 | * @param $literal |
||
401 | * |
||
402 | * @return array |
||
403 | */ |
||
404 | private function transformLiteral($literal) |
||
425 | } |
||
426 |
The break statement is not necessary if it is preceded for example by a return statement:
If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.