Complex classes like Tokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Tokenizer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 8 | class Tokenizer { |
||
| 9 | private $str; |
||
| 10 | const NAME = 'LITERAL'; |
||
| 11 | const STRING = 'STRING'; |
||
| 12 | const OPEN_BRACKET = 'OPEN_BRACKET'; |
||
| 13 | const CLOSE_BRACKET = 'CLOSE_BRACKET'; |
||
| 14 | const OPEN_SQUARE_BRACKET = 'SQUARE_BRACKET'; |
||
| 15 | const CLOSE_SQUARE_BRACKET = 'CLOSE_SQUARE_BRACKET'; |
||
| 16 | const CONCAT = 'CONCAT'; |
||
| 17 | const ARG = 'ARG'; |
||
| 18 | const WHITESPACE = 'WHITESPACE'; |
||
| 19 | const NEW_LINE = 'NEW_LINE'; |
||
| 20 | const DOT = 'DOT'; |
||
| 21 | const NUMERIC = 'NUMERIC'; |
||
| 22 | const EQUALS = 'EQUALS'; |
||
| 23 | const NOT = 'NOT'; |
||
| 24 | const OPEN_BRACE = 'OPEN_BRACE'; |
||
| 25 | const CLOSE_BRACE = 'CLOSE_BRACE'; |
||
| 26 | const BOOL = 'BOOL'; |
||
| 27 | const COLON = 'COLON'; |
||
| 28 | const SEMI_COLON = 'SEMI_COLON'; |
||
| 29 | const NUM_SIGN = 'NUM_SIGN'; |
||
| 30 | const GREATER_THAN = 'GREATER_THAN'; |
||
| 31 | const AT_SIGN = 'AT_SIGN'; |
||
| 32 | const SUBTRACT = 'SUBTRACT'; |
||
| 33 | const MULTIPLY = 'MULTIPLY'; |
||
| 34 | const DIVIDE = 'DIVIDE'; |
||
| 35 | |||
| 36 | private $lineNo = 1; |
||
| 37 | |||
| 38 | private $chars = [ |
||
| 39 | '"' => self::STRING, |
||
| 40 | '\'' => self::STRING, |
||
| 41 | '(' => self::OPEN_BRACKET, |
||
| 42 | ')' => self::CLOSE_BRACKET, |
||
| 43 | '[' => self::OPEN_SQUARE_BRACKET, |
||
| 44 | ']' => self::CLOSE_SQUARE_BRACKET, |
||
| 45 | '+' => self::CONCAT, |
||
| 46 | ',' => self::ARG, |
||
| 47 | '.' => self::DOT, |
||
| 48 | '!' => self::NOT, |
||
| 49 | '=' => self::EQUALS, |
||
| 50 | '{' => self::OPEN_BRACE, |
||
| 51 | '}' => self::CLOSE_BRACE, |
||
| 52 | ':' => self::COLON, |
||
| 53 | ';' => self::SEMI_COLON, |
||
| 54 | '#' => self::NUM_SIGN, |
||
| 55 | '>' => self::GREATER_THAN, |
||
| 56 | '@' => self::AT_SIGN, |
||
| 57 | '-' => self::SUBTRACT, |
||
| 58 | '*' => self::MULTIPLY, |
||
| 59 | '/' => self::DIVIDE, |
||
| 60 | ' ' => self::WHITESPACE, |
||
| 61 | "\n" => self::NEW_LINE, |
||
| 62 | "\r" => self::WHITESPACE, |
||
| 63 | "\t" => self::WHITESPACE |
||
| 64 | ]; |
||
| 65 | |||
| 66 | public function __construct($str) { |
||
| 69 | |||
| 70 | public function getTokens($returnObj = true) { |
||
| 90 | |||
| 91 | private function doComments(&$tokens, $char, $i) { |
||
| 95 | |||
| 96 | private function doSingleLineComments(&$tokens, $char, $i) { |
||
| 102 | |||
| 103 | private function doMultiLineComments(&$tokens, $char, $i) { |
||
| 110 | |||
| 111 | private function doSimpleTokens(&$tokens, $char) { |
||
| 118 | |||
| 119 | private function doNewLine(&$tokens, $char) { |
||
| 125 | |||
| 126 | private function isLiteral($n) { |
||
| 132 | |||
| 133 | private function doLiterals(&$tokens, $char, &$i) { |
||
| 143 | |||
| 144 | private function processLiterals(&$tokens, $name) { |
||
| 150 | |||
| 151 | private function doBrackets(&$tokens, $char, $i) { |
||
| 167 | |||
| 168 | private function doStrings(&$tokens, $char, $i) { |
||
| 177 | |||
| 178 | private function extractString($pos) { |
||
| 185 | |||
| 186 | private function extractBrackets($open, $startBracket = '(', $closeBracket = ')') { |
||
| 193 | |||
| 194 | private function identifyChar($chr) { |
||
| 198 | |||
| 199 | private function getChar($num) { |
||
| 204 | } |
||
| 205 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.