Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Hyphenator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Hyphenator, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 115 | final class Hyphenator |
||
| 116 | { |
||
| 117 | |||
| 118 | /** |
||
| 119 | * The highest possible hyphernation quality |
||
| 120 | * |
||
| 121 | * @const int QUALITY_HIGHEST |
||
| 122 | */ |
||
| 123 | const QUALITY_HIGHEST = 9; |
||
| 124 | |||
| 125 | /** |
||
| 126 | * A high hyphernation quality |
||
| 127 | * |
||
| 128 | * @const int QUALITY_HIGH |
||
| 129 | */ |
||
| 130 | const QUALITY_HIGH = 7; |
||
| 131 | |||
| 132 | /** |
||
| 133 | * A medium hyphernation quality |
||
| 134 | * |
||
| 135 | * @const int QUALITY_NORMAL |
||
| 136 | */ |
||
| 137 | const QUALITY_NORMAL = 5; |
||
| 138 | |||
| 139 | /** |
||
| 140 | * A low hyphernation quality |
||
| 141 | * |
||
| 142 | * @const int QUALITY_LOW |
||
| 143 | */ |
||
| 144 | const QUALITY_LOW = 3; |
||
| 145 | |||
| 146 | /** |
||
| 147 | * The lowest possible hyphernation quality |
||
| 148 | * |
||
| 149 | * @const int QUALITY_LOWEST |
||
| 150 | */ |
||
| 151 | const QUALITY_LOWEST = 1; |
||
| 152 | |||
| 153 | /** |
||
| 154 | * Storage for the Home-path. |
||
| 155 | * |
||
| 156 | * The hyphenation-files iare searched in different places. |
||
| 157 | * <ol><li>Location given via the constant HYPHENATOR_HOME</li> |
||
| 158 | * <li>Location set via \Org\Heigl\Hyphenator\Hyphenator::setDefaultHome()</li> |
||
| 159 | * <li>Location set via \Org\Heigl\Hyphenator\Hyphenator::setHome()</li> |
||
| 160 | * <li>The 'share'-Folder inside the Hyphenator-Package</li> |
||
| 161 | * </ol> |
||
| 162 | * |
||
| 163 | * The configuration-object can also be obtained using the |
||
| 164 | * \Org\Heigl\Hyphenator::getConfig()-Method and can then be adapted |
||
| 165 | * according to ones needs. |
||
| 166 | * |
||
| 167 | * @var string $homePath |
||
| 168 | */ |
||
| 169 | private $homePath = ''; |
||
| 170 | |||
| 171 | /** |
||
| 172 | * Storage of the default Home-Path. |
||
| 173 | * |
||
| 174 | * @var string $defaultHomePath |
||
| 175 | */ |
||
| 176 | private static $defaultHomePath = ''; |
||
| 177 | |||
| 178 | /** |
||
| 179 | * Storage for the Options-Object. |
||
| 180 | * |
||
| 181 | * @var Options $options |
||
| 182 | */ |
||
| 183 | private $options; |
||
| 184 | |||
| 185 | /** |
||
| 186 | * Storage for the Dictionaries. |
||
| 187 | * |
||
| 188 | * @var DictionaryRegistry $dicts |
||
| 189 | */ |
||
| 190 | private $dicts; |
||
| 191 | |||
| 192 | /** |
||
| 193 | * Storage for the Filters. |
||
| 194 | * |
||
| 195 | * @var FilterRegistry $filters |
||
| 196 | */ |
||
| 197 | private $filters; |
||
| 198 | |||
| 199 | /** |
||
| 200 | * Storage for the tokenizers. |
||
| 201 | * |
||
| 202 | * @var TokenizerRegistry $tokenizers |
||
| 203 | */ |
||
| 204 | private $tokenizers; |
||
| 205 | |||
| 206 | /** |
||
| 207 | * Set the Options |
||
| 208 | * |
||
| 209 | * @param Options $options The options to set |
||
| 210 | * |
||
| 211 | * @return Hyphenator |
||
| 212 | */ |
||
| 213 | public function setOptions(Options $options) |
||
| 223 | |||
| 224 | /** |
||
| 225 | * Get the Options |
||
| 226 | * |
||
| 227 | * @return Options |
||
| 228 | */ |
||
| 229 | public function getOptions() |
||
| 233 | |||
| 234 | /** |
||
| 235 | * Add a Dictionary to the Hyphenator |
||
| 236 | * |
||
| 237 | * @param Dictionary|string $dictionary The |
||
| 238 | * Dictionary wit hyphenation-Patterns to add to this Hyphenator |
||
| 239 | * |
||
| 240 | * @return Hyphenator |
||
| 241 | */ |
||
| 242 | public function addDictionary($dictionary) |
||
| 252 | |||
| 253 | /** |
||
| 254 | * Add a Filter to the Hyphenator |
||
| 255 | * |
||
| 256 | * @param Filter|string $filter The Filter with |
||
| 257 | * non-standard-hyphenation-patterns |
||
| 258 | * |
||
| 259 | * @link http://hunspell.sourceforge.net/tb87nemeth.pdf |
||
| 260 | * @return Hyphenator |
||
| 261 | */ |
||
| 262 | public function addFilter($filter) |
||
| 274 | |||
| 275 | /** |
||
| 276 | * Add a tokenizer to the tokenizer-registry |
||
| 277 | * |
||
| 278 | * @param Tokenizer|string $tokenizer The tokenizer to add |
||
| 279 | * |
||
| 280 | * @return Hyphenator |
||
| 281 | */ |
||
| 282 | public function addTokenizer($tokenizer) |
||
| 293 | |||
| 294 | /** |
||
| 295 | * Get the tokenizers |
||
| 296 | * |
||
| 297 | * @return TokenizerRegistry |
||
| 298 | */ |
||
| 299 | public function getTokenizers() |
||
| 309 | |||
| 310 | /** |
||
| 311 | * Get the dictionaries |
||
| 312 | * |
||
| 313 | * @return DictionaryRegistry |
||
| 314 | */ |
||
| 315 | public function getDictionaries() |
||
| 323 | |||
| 324 | /** |
||
| 325 | * Get the filters |
||
| 326 | * |
||
| 327 | * @return FilterRegistry |
||
| 328 | */ |
||
| 329 | public function getFilters() |
||
| 339 | |||
| 340 | public function __construct() |
||
| 349 | |||
| 350 | /** |
||
| 351 | * This method does the actual hyphenation. |
||
| 352 | * |
||
| 353 | * The given <var>$string</var> is splitted into chunks (i.e. Words) at |
||
| 354 | * every blank. |
||
| 355 | * |
||
| 356 | * After that every chunk is hyphenated and the array of chunks is merged |
||
| 357 | * into a single string using blanks again. |
||
| 358 | * |
||
| 359 | * This method does not take into account other word-delimiters than blanks |
||
| 360 | * (eg. returns or tabstops) and it will fail with texts containing markup |
||
| 361 | * in any way. |
||
| 362 | * |
||
| 363 | * @param string $string The string to hyphenate |
||
| 364 | * |
||
| 365 | * @return string|array<array-key, mixed> The hyphenated string |
||
| 366 | */ |
||
| 367 | public function hyphenate($string) |
||
| 379 | |||
| 380 | /** |
||
| 381 | * Get the hyphenation pattern for the contained tokens |
||
| 382 | * |
||
| 383 | * Use the dictionaties and options of the given Hyphenator-Object |
||
| 384 | * |
||
| 385 | * @param TokenRegistry $registry The Hyphenator object containing the |
||
| 386 | * dictionaries and options |
||
| 387 | * |
||
| 388 | * @return TokenRegistry |
||
| 389 | */ |
||
| 390 | public function getHyphenationPattern(TokenRegistry $registry) |
||
| 405 | |||
| 406 | /** |
||
| 407 | * Filter the content of the given TokenRegistry |
||
| 408 | * |
||
| 409 | * @param TokenRegistry $registry The tokens |
||
| 410 | * to filter |
||
| 411 | * |
||
| 412 | * @return TokenRegistry |
||
| 413 | */ |
||
| 414 | public function filter(TokenRegistry $registry) |
||
| 418 | |||
| 419 | /** |
||
| 420 | * Hyphenate a Token-Object |
||
| 421 | * |
||
| 422 | * @param WordToken $token The token to hyphenate |
||
| 423 | * |
||
| 424 | * @return Token |
||
| 425 | */ |
||
| 426 | public function getPatternForToken(WordToken $token) |
||
| 432 | |||
| 433 | /** |
||
| 434 | * Set the default home-Path |
||
| 435 | * |
||
| 436 | * @param string $homePath The default Hyphenator Home-path. |
||
| 437 | * |
||
| 438 | * @throws PathNotFoundException |
||
| 439 | * @throws PathNotDirException |
||
| 440 | * @return void |
||
| 441 | */ |
||
| 442 | View Code Duplication | public static function setDefaultHomePath($homePath) |
|
| 453 | |||
| 454 | /** |
||
| 455 | * Get the default Home-Path |
||
| 456 | * |
||
| 457 | * @return string |
||
| 458 | */ |
||
| 459 | public static function getDefaultHomePath() |
||
| 475 | |||
| 476 | /** |
||
| 477 | * Set the instance-home-Path |
||
| 478 | * |
||
| 479 | * @param string $homePath This instances home-path. |
||
| 480 | * |
||
| 481 | * @throws PathNotFoundException |
||
| 482 | * @throws PathNotDirException |
||
| 483 | * @return Hyphenator |
||
| 484 | */ |
||
| 485 | View Code Duplication | public function setHomePath($homePath) |
|
| 498 | |||
| 499 | /** |
||
| 500 | * Get this instances Home-Path. |
||
| 501 | * |
||
| 502 | * If no homePath is set for this instance this method will return the |
||
| 503 | * result of the \Org\Heigl\Hyphenator\Hyphenator::getdefaultHomePath() |
||
| 504 | * Method |
||
| 505 | * |
||
| 506 | * @return string |
||
| 507 | */ |
||
| 508 | public function getHomePath() |
||
| 516 | |||
| 517 | /** |
||
| 518 | * Create a new Hyphenator-Object for a certain locale |
||
| 519 | * |
||
| 520 | * To determine the storage of the dictionaries we either use the set |
||
| 521 | * default configuration-file or we take the provided file and set the |
||
| 522 | * home-path from the information within that file. |
||
| 523 | * |
||
| 524 | * @param string $path The path to the configuration-file to use |
||
| 525 | * @param string $locale The locale to be used |
||
| 526 | * |
||
| 527 | * @return Hyphenator |
||
| 528 | */ |
||
| 529 | public static function factory($path = null, $locale = null) |
||
| 541 | |||
| 542 | /** |
||
| 543 | * autoload classes. |
||
| 544 | * |
||
| 545 | * @param string $className the name of the class to load |
||
| 546 | * |
||
| 547 | * @return bool |
||
| 548 | */ |
||
| 549 | public static function __autoload($className) |
||
| 566 | |||
| 567 | /** |
||
| 568 | * Register this packages autoloader with the autoload-stack |
||
| 569 | * |
||
| 570 | * @return void |
||
| 571 | */ |
||
| 572 | public static function registerAutoload() |
||
| 576 | } |
||
| 577 | |||
| 585 |