Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 12 | class Parser extends ParserBase |
||
| 13 | { |
||
| 14 | /** |
||
| 15 | * @var bool Whether currrent test contains a double quote character |
||
| 16 | */ |
||
| 17 | protected $hasDoubleQuote; |
||
| 18 | |||
| 19 | /** |
||
| 20 | * @var bool Whether currrent test contains a single quote character |
||
| 21 | */ |
||
| 22 | protected $hasSingleQuote; |
||
| 23 | |||
| 24 | /** |
||
| 25 | * @var string Text being parsed |
||
| 26 | */ |
||
| 27 | protected $text; |
||
| 28 | |||
| 29 | /** |
||
| 30 | * {@inheritdoc} |
||
| 31 | */ |
||
| 32 | 78 | public function parse($text, array $matches) |
|
| 65 | |||
| 66 | /** |
||
| 67 | * Add a fancy replacement tag |
||
| 68 | * |
||
| 69 | * @param integer $tagPos Position of the tag in the text |
||
| 70 | * @param integer $tagLen Length of text consumed by the tag |
||
| 71 | * @param string $chr Replacement character |
||
| 72 | * @param integer $prio Tag's priority |
||
| 73 | * @return \s9e\TextFormatter\Parser\Tag |
||
| 74 | */ |
||
| 75 | 72 | protected function addTag($tagPos, $tagLen, $chr, $prio = 0) |
|
| 82 | |||
| 83 | /** |
||
| 84 | * Parse dashes and ellipses |
||
| 85 | * |
||
| 86 | * Does en dash –, em dash — and ellipsis … |
||
| 87 | * |
||
| 88 | * @return void |
||
| 89 | */ |
||
| 90 | 77 | protected function parseDashesAndEllipses() |
|
| 109 | |||
| 110 | /** |
||
| 111 | * Parse pairs of double quotes |
||
| 112 | * |
||
| 113 | * Does quote pairs “” -- must be done separately to handle nesting |
||
| 114 | * |
||
| 115 | * @return void |
||
| 116 | */ |
||
| 117 | 76 | protected function parseDoubleQuotePairs() |
|
| 128 | |||
| 129 | /** |
||
| 130 | * Parse vulgar fractions |
||
| 131 | * |
||
| 132 | * @return void |
||
| 133 | */ |
||
| 134 | 77 | protected function parseFractions() |
|
| 170 | |||
| 171 | /** |
||
| 172 | * Parse guillemets-style quotation marks |
||
| 173 | * |
||
| 174 | * @return void |
||
| 175 | */ |
||
| 176 | 77 | protected function parseGuillemets() |
|
| 193 | |||
| 194 | /** |
||
| 195 | * Parse the not equal sign |
||
| 196 | * |
||
| 197 | * Supports != and =/= |
||
| 198 | * |
||
| 199 | * @return void |
||
| 200 | */ |
||
| 201 | 77 | protected function parseNotEqualSign() |
|
| 215 | |||
| 216 | /** |
||
| 217 | * Parse pairs of quotes |
||
| 218 | * |
||
| 219 | * @param string $regexp Regexp used to identify quote pairs |
||
| 220 | * @param string $leftQuote Fancy replacement for left quote |
||
| 221 | * @param string $rightQuote Fancy replacement for right quote |
||
| 222 | * @return void |
||
| 223 | */ |
||
| 224 | 37 | protected function parseQuotePairs($regexp, $leftQuote, $rightQuote) |
|
| 237 | |||
| 238 | /** |
||
| 239 | * Parse pairs of single quotes |
||
| 240 | * |
||
| 241 | * Does quote pairs ‘’ must be done separately to handle nesting |
||
| 242 | * |
||
| 243 | * @return void |
||
| 244 | */ |
||
| 245 | 76 | protected function parseSingleQuotePairs() |
|
| 256 | |||
| 257 | /** |
||
| 258 | * Parse single quotes in general |
||
| 259 | * |
||
| 260 | * Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits |
||
| 261 | * |
||
| 262 | * @return void |
||
| 263 | */ |
||
| 264 | 76 | protected function parseSingleQuotes() |
|
| 279 | |||
| 280 | /** |
||
| 281 | * Parse symbols found after digits |
||
| 282 | * |
||
| 283 | * Does symbols found after a digit: |
||
| 284 | * - apostrophe ’ if it's followed by an "s" as in 80's |
||
| 285 | * - prime ′ and double prime ″ |
||
| 286 | * - multiply sign × if it's followed by an optional space and another digit |
||
| 287 | * |
||
| 288 | * @return void |
||
| 289 | */ |
||
| 290 | 77 | protected function parseSymbolsAfterDigits() |
|
| 327 | |||
| 328 | /** |
||
| 329 | * Parse symbols found in parentheses such as (c) |
||
| 330 | * |
||
| 331 | * Does symbols ©, ® and ™ |
||
| 332 | * |
||
| 333 | * @return void |
||
| 334 | */ |
||
| 335 | 77 | protected function parseSymbolsInParentheses() |
|
| 354 | } |