Complex classes like FulltextIndex often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use FulltextIndex, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 15 | class FulltextIndex extends AbstractIndex |
||
| 16 | { |
||
| 17 | /** @var FulltextIndex $instance */ |
||
| 18 | protected static $instance = null; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * Get new or existing singleton instance of the FulltextIndex |
||
| 22 | * |
||
| 23 | * @return FulltextIndex |
||
| 24 | */ |
||
| 25 | public static function getInstance() |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Measure the length of a string |
||
| 35 | * Differs from strlen in handling of asian characters. |
||
| 36 | * |
||
| 37 | * @author Tom N Harris <[email protected]> |
||
| 38 | * |
||
| 39 | * @param string $w |
||
| 40 | * @return int |
||
| 41 | */ |
||
| 42 | public function wordlen($w) |
||
| 54 | |||
| 55 | /** |
||
| 56 | * Adds the contents of a page to the fulltext index |
||
| 57 | * |
||
| 58 | * The added text replaces previous words for the same page. |
||
| 59 | * An empty value erases the page. |
||
| 60 | * |
||
| 61 | * @param string $page a page name |
||
| 62 | * @param string $text the body of the page |
||
| 63 | * @param bool $requireLock should be false only if the caller is resposible for index lock |
||
| 64 | * @return bool if the function completed successfully |
||
| 65 | * |
||
| 66 | * @author Tom N Harris <[email protected]> |
||
| 67 | * @author Andreas Gohr <[email protected]> |
||
| 68 | */ |
||
| 69 | public function addPageWords($page, $text, $requireLock = true) |
||
| 134 | |||
| 135 | /** |
||
| 136 | * Split the words in a page and add them to the index |
||
| 137 | * |
||
| 138 | * @param string $text content of the page |
||
| 139 | * @return array list of word IDs and number of times used |
||
| 140 | * |
||
| 141 | * @author Andreas Gohr <[email protected]> |
||
| 142 | * @author Christopher Smith <[email protected]> |
||
| 143 | * @author Tom N Harris <[email protected]> |
||
| 144 | */ |
||
| 145 | protected function getPageWords($text) |
||
| 187 | |||
| 188 | /** |
||
| 189 | * Delete the contents of a page to the fulltext index |
||
| 190 | * |
||
| 191 | * @param string $page a page name |
||
| 192 | * @param bool $requireLock should be false only if the caller is resposible for index lock |
||
| 193 | * @return bool If renaming the value has been successful, false on error |
||
| 194 | * |
||
| 195 | * @author Tom N Harris <[email protected]> |
||
| 196 | * @author Satoshi Sahara <[email protected]> |
||
| 197 | */ |
||
| 198 | public function deletePageWords($page, $requireLock = true) |
||
| 236 | |||
| 237 | /** |
||
| 238 | * Find pages in the fulltext index containing the words, |
||
| 239 | * |
||
| 240 | * The search words must be pre-tokenized, meaning only letters and |
||
| 241 | * numbers with an optional wildcard |
||
| 242 | * |
||
| 243 | * The returned array will have the original tokens as key. The values |
||
| 244 | * in the returned list is an array with the page names as keys and the |
||
| 245 | * number of times that token appears on the page as value. |
||
| 246 | * |
||
| 247 | * @param array $tokens list of words to search for |
||
| 248 | * @return array list of page names with usage counts |
||
| 249 | * |
||
| 250 | * @author Tom N Harris <[email protected]> |
||
| 251 | * @author Andreas Gohr <[email protected]> |
||
| 252 | */ |
||
| 253 | public function lookupWords(&$tokens) |
||
| 292 | |||
| 293 | /** |
||
| 294 | * Find the index ID of each search term |
||
| 295 | * |
||
| 296 | * The query terms should only contain valid characters, with a '*' at |
||
| 297 | * either the beginning or end of the word (or both). |
||
| 298 | * The $result parameter can be used to merge the index locations with |
||
| 299 | * the appropriate query term. |
||
| 300 | * |
||
| 301 | * @param array $words The query terms. |
||
| 302 | * @param array $result Set to word => array("length*id" ...) |
||
| 303 | * @return array Set to length => array(id ...) |
||
| 304 | * |
||
| 305 | * @author Tom N Harris <[email protected]> |
||
| 306 | */ |
||
| 307 | protected function getIndexWords(&$words, &$result) |
||
| 386 | |||
| 387 | /** |
||
| 388 | * Get the word lengths that have been indexed |
||
| 389 | * |
||
| 390 | * Reads the index directory and returns an array of lengths |
||
| 391 | * that there are indices for. |
||
| 392 | * |
||
| 393 | * @author YoBoY <[email protected]> |
||
| 394 | * |
||
| 395 | * @param array|int $filter |
||
| 396 | * @return array |
||
| 397 | */ |
||
| 398 | public function getIndexLengths($filter) |
||
| 399 | { |
||
| 400 | global $conf; |
||
| 401 | $idx = array(); |
||
| 402 | if (is_array($filter)) { |
||
| 403 | // testing if index files exist only |
||
| 404 | $path = $conf['indexdir']."/i"; |
||
| 405 | foreach ($filter as $key => $value) { |
||
| 406 | if (file_exists($path.$key.'.idx')) { |
||
| 407 | $idx[] = $key; |
||
| 408 | } |
||
| 409 | } |
||
| 410 | } else { |
||
| 411 | $lengths = $this->listIndexLengths(); |
||
| 412 | foreach ($lengths as $key => $length) { |
||
| 413 | // keep all the values equal or superior |
||
| 414 | if ((int)$length >= (int)$filter) { |
||
| 415 | $idx[] = $length; |
||
| 416 | } |
||
| 417 | } |
||
| 418 | } |
||
| 419 | return $idx; |
||
| 420 | } |
||
| 421 | |||
| 422 | /** |
||
| 423 | * Get the list of lengths indexed in the wiki |
||
| 424 | * |
||
| 425 | * Read the index directory or a cache file and returns |
||
| 426 | * a sorted array of lengths of the words used in the wiki. |
||
| 427 | * |
||
| 428 | * @author YoBoY <[email protected]> |
||
| 429 | * |
||
| 430 | * @return array |
||
| 431 | */ |
||
| 432 | public function listIndexLengths() |
||
| 479 | |||
| 480 | /** |
||
| 481 | * Return a list of words sorted by number of times used |
||
| 482 | * |
||
| 483 | * @param int $min bottom frequency threshold |
||
| 484 | * @param int $max upper frequency limit. No limit if $max<$min |
||
| 485 | * @param int $minlen minimum length of words to count |
||
| 486 | * @return array list of words as the keys and frequency as value |
||
| 487 | * |
||
| 488 | * @author Tom N Harris <[email protected]> |
||
| 489 | */ |
||
| 490 | public function histogram($min=1, $max=0, $minlen=3) |
||
| 494 | |||
| 495 | /** |
||
| 496 | * Clear the Fulltext Index |
||
| 497 | * |
||
| 498 | * @param bool $requireLock should be false only if the caller is resposible for index lock |
||
| 499 | * @return bool If the index has been cleared successfully |
||
| 500 | */ |
||
| 501 | public function clear($requireLock = true) |
||
| 518 | } |
||
| 519 |