Complex classes like Tokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Tokenizer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
8 | class Tokenizer { |
||
9 | private $str; |
||
10 | const NAME = 'LITERAL'; |
||
11 | const STRING = 'STRING'; |
||
12 | const OPEN_BRACKET = 'OPEN_BRACKET'; |
||
13 | const CLOSE_BRACKET = 'CLOSE_BRACKET'; |
||
14 | const OPEN_SQUARE_BRACKET = 'SQUARE_BRACKET'; |
||
15 | const CLOSE_SQUARE_BRACKET = 'CLOSE_SQUARE_BRACKET'; |
||
16 | const CONCAT = 'CONCAT'; |
||
17 | const ARG = 'ARG'; |
||
18 | const WHITESPACE = 'WHITESPACE'; |
||
19 | const NEW_LINE = 'NEW_LINE'; |
||
20 | const DOT = 'DOT'; |
||
21 | const NUMERIC = 'NUMERIC'; |
||
22 | const EQUALS = 'EQUALS'; |
||
23 | const NOT = 'NOT'; |
||
24 | const OPEN_BRACE = 'OPEN_BRACE'; |
||
25 | const CLOSE_BRACE = 'CLOSE_BRACE'; |
||
26 | const BOOL = 'BOOL'; |
||
27 | const COLON = 'COLON'; |
||
28 | const SEMI_COLON = 'SEMI_COLON'; |
||
29 | const NUM_SIGN = 'NUM_SIGN'; |
||
30 | const GREATER_THAN = 'GREATER_THAN'; |
||
31 | const AT_SIGN = 'AT_SIGN'; |
||
32 | const SUBTRACT = 'SUBTRACT'; |
||
33 | const MULTIPLY = 'MULTIPLY'; |
||
34 | const DIVIDE = 'DIVIDE'; |
||
35 | |||
36 | private $lineNo = 1; |
||
37 | |||
38 | private $chars = [ |
||
39 | '"' => self::STRING, |
||
40 | '\'' => self::STRING, |
||
41 | '(' => self::OPEN_BRACKET, |
||
42 | ')' => self::CLOSE_BRACKET, |
||
43 | '[' => self::OPEN_SQUARE_BRACKET, |
||
44 | ']' => self::CLOSE_SQUARE_BRACKET, |
||
45 | '+' => self::CONCAT, |
||
46 | ',' => self::ARG, |
||
47 | '.' => self::DOT, |
||
48 | '!' => self::NOT, |
||
49 | '=' => self::EQUALS, |
||
50 | '{' => self::OPEN_BRACE, |
||
51 | '}' => self::CLOSE_BRACE, |
||
52 | ':' => self::COLON, |
||
53 | ';' => self::SEMI_COLON, |
||
54 | '#' => self::NUM_SIGN, |
||
55 | '>' => self::GREATER_THAN, |
||
56 | '@' => self::AT_SIGN, |
||
57 | '-' => self::SUBTRACT, |
||
58 | '*' => self::MULTIPLY, |
||
59 | '/' => self::DIVIDE, |
||
60 | ' ' => self::WHITESPACE, |
||
61 | "\n" => self::NEW_LINE, |
||
62 | "\r" => self::WHITESPACE, |
||
63 | "\t" => self::WHITESPACE |
||
64 | ]; |
||
65 | |||
66 | public function __construct($str) { |
||
69 | |||
70 | public function getTokens($returnObj = true) { |
||
87 | |||
88 | private function doSingleLineComments(&$tokens, $char, $i) { |
||
94 | |||
95 | private function doMultiLineComments(&$tokens, $char, $i) { |
||
102 | |||
103 | private function doSimpleTokens(&$tokens, $char) { |
||
110 | |||
111 | private function doNewLine(&$tokens, $char) { |
||
117 | |||
118 | private function isLiteral($n) { |
||
124 | |||
125 | private function doLiterals(&$tokens, $char, &$i) { |
||
135 | |||
136 | private function processLiterals(&$tokens, $name) { |
||
142 | |||
143 | private function doBrackets(&$tokens, $char, $i) { |
||
159 | |||
160 | private function doStrings(&$tokens, $char, $i) { |
||
169 | |||
170 | private function extractString($pos) { |
||
177 | |||
178 | private function extractBrackets($open, $startBracket = '(', $closeBracket = ')') { |
||
185 | |||
186 | private function identifyChar($chr) { |
||
190 | |||
191 | private function getChar($num) { |
||
196 | } |
||
197 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.