Complex classes like AbstractIndex often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AbstractIndex, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | abstract class AbstractIndex |
||
| 18 | { |
||
| 19 | /* pages will be marked as deleted in page.idx */ |
||
| 20 | const INDEX_MARK_DELETED = '#deleted:'; |
||
| 21 | |||
| 22 | /** @var array $pidCache Cache for getPID() */ |
||
| 23 | protected static $pidCache = array(); |
||
| 24 | |||
| 25 | /** |
||
| 26 | * Clean a name of a key for use as a file name. |
||
| 27 | * |
||
| 28 | * Romanizes non-latin characters, then strips away anything that's |
||
| 29 | * not a letter, number, or underscore. |
||
| 30 | * |
||
| 31 | * @author Tom N Harris <[email protected]> |
||
| 32 | * |
||
| 33 | * @param string $name |
||
| 34 | * @return string |
||
| 35 | */ |
||
| 36 | protected function cleanName($name) |
||
| 37 | { |
||
| 38 | $name = Utf8\Clean::romanize(trim((string)$name)); |
||
| 39 | $name = preg_replace('#[ \./\\:-]+#', '_', $name); |
||
| 40 | $name = preg_replace('/[^A-Za-z0-9_]/', '', $name); |
||
| 41 | return strtolower($name); |
||
| 42 | } |
||
| 43 | |||
| 44 | /** |
||
| 45 | * Get the numeric PID of a page |
||
| 46 | * |
||
| 47 | * Warning: The page may not exist in the filesystem. |
||
| 48 | * |
||
| 49 | * @param string $page The page to get the PID for |
||
| 50 | * @return int The numeric page id |
||
| 51 | * |
||
| 52 | * @throws IndexAccessException |
||
| 53 | * @throws IndexLockException |
||
| 54 | * @throws IndexWriteException |
||
| 55 | */ |
||
| 56 | public function getPID($page) |
||
| 57 | { |
||
| 58 | if (!isset($page)) { |
||
| 59 | throw new IndexAccessException('Indexer: invalid argument for getPID'); |
||
| 60 | } |
||
| 61 | |||
| 62 | // return PID when it is in the cache |
||
| 63 | // avoid expensive addIndexKey operation for the most recently |
||
| 64 | // requested pages by using a cache |
||
| 65 | if (isset(static::$pidCache[$page])) return static::$pidCache[$page]; |
||
| 66 | |||
| 67 | $this->lock(); |
||
| 68 | |||
| 69 | $index = $this->getIndex('page', ''); |
||
| 70 | $pid = array_search($page, $index, true); |
||
| 71 | if ($pid !== false) { |
||
| 72 | $flagSaveIndex = false; |
||
| 73 | } else { |
||
| 74 | $flagSaveIndex = true; |
||
| 75 | // search old page entry that had marked as deleted |
||
| 76 | $pid = array_search(self::INDEX_MARK_DELETED.$page, $index, true); |
||
| 77 | if ($pid !== false) { |
||
| 78 | $index[$pid] = $page; |
||
| 79 | } else { |
||
| 80 | $pid = count($index); |
||
| 81 | $index[$pid] = $page; |
||
| 82 | } |
||
| 83 | } |
||
| 84 | |||
| 85 | if ($flagSaveIndex) $this->saveIndex('page', '', $index); |
||
| 86 | |||
| 87 | // limit cache to 10 entries by discarding the oldest element |
||
| 88 | // as in DokuWiki usually only the most recently |
||
| 89 | // added item will be requested again |
||
| 90 | if (count(static::$pidCache) > 10) array_shift(static::$pidCache); |
||
| 91 | static::$pidCache[$page] = $pid; |
||
| 92 | |||
| 93 | $this->unlock(); |
||
| 94 | return $pid; |
||
| 95 | } |
||
| 96 | |||
| 97 | /** |
||
| 98 | * Reset pidCache |
||
| 99 | */ |
||
| 100 | protected function resetPIDCache() |
||
| 104 | |||
| 105 | /** |
||
| 106 | * Get the page id of a numeric PID |
||
| 107 | * |
||
| 108 | * @param int $pid The PID to get the page id for |
||
| 109 | * @return string The page id |
||
| 110 | */ |
||
| 111 | public function getPageFromPID($pid) |
||
| 115 | |||
| 116 | /** |
||
| 117 | * Return a list of all pages |
||
| 118 | * Warning: pages may not exist in the filesystem. |
||
| 119 | * |
||
| 120 | * @return array list of page names |
||
| 121 | */ |
||
| 122 | public function getPages() |
||
| 130 | |||
| 131 | /** |
||
| 132 | * Lock the indexer |
||
| 133 | * |
||
| 134 | * @return true |
||
| 135 | * @throws IndexLockException |
||
| 136 | * @author Tom N Harris <[email protected]> |
||
| 137 | * |
||
| 138 | */ |
||
| 139 | protected function lock() |
||
| 161 | |||
| 162 | /** |
||
| 163 | * Release the indexer lock |
||
| 164 | * |
||
| 165 | * @return true |
||
| 166 | * @throws IndexLockException |
||
| 167 | * @author Tom N Harris <[email protected]> |
||
| 168 | * |
||
| 169 | */ |
||
| 170 | protected function unlock() |
||
| 178 | |||
| 179 | /** |
||
| 180 | * Retrieve the entire index |
||
| 181 | * |
||
| 182 | * The $suffix argument is for an index that is split into multiple parts. |
||
| 183 | * Different index files should use different base names. |
||
| 184 | * |
||
| 185 | * @param string $idx name of the index |
||
| 186 | * @param string $suffix subpart identifier |
||
| 187 | * @return array list of lines without CR or LF |
||
| 188 | * |
||
| 189 | * @author Tom N Harris <[email protected]> |
||
| 190 | */ |
||
| 191 | public function getIndex($idx, $suffix) |
||
| 198 | |||
| 199 | /** |
||
| 200 | * Replace the contents of the index with an array |
||
| 201 | * |
||
| 202 | * @param string $idx name of the index |
||
| 203 | * @param string $suffix subpart identifier |
||
| 204 | * @param array $lines list of lines without LF |
||
| 205 | * @return true |
||
| 206 | * |
||
| 207 | * @throws IndexWriteException |
||
| 208 | * @author Tom N Harris <[email protected]> |
||
| 209 | */ |
||
| 210 | protected function saveIndex($idx, $suffix, $lines) |
||
| 229 | |||
| 230 | /** |
||
| 231 | * Retrieve or insert a value in the index |
||
| 232 | * |
||
| 233 | * @param string $idx name of the index |
||
| 234 | * @param string $suffix subpart identifier |
||
| 235 | * @param string $value line to find in the index |
||
| 236 | * @return int line number of the value in the index |
||
| 237 | * |
||
| 238 | * @throws IndexWriteException |
||
| 239 | * @author Tom N Harris <[email protected]> |
||
| 240 | */ |
||
| 241 | protected function addIndexKey($idx, $suffix, $value) |
||
| 252 | |||
| 253 | /** |
||
| 254 | * Write a line into the index |
||
| 255 | * |
||
| 256 | * @param string $idx name of the index |
||
| 257 | * @param string $suffix subpart identifier |
||
| 258 | * @param int $id the line number |
||
| 259 | * @param string $line line to write |
||
| 260 | * @return true |
||
| 261 | * |
||
| 262 | * @throws IndexWriteException |
||
| 263 | * @author Tom N Harris <[email protected]> |
||
| 264 | */ |
||
| 265 | protected function saveIndexKey($idx, $suffix, $id, $line) |
||
| 303 | |||
| 304 | /** |
||
| 305 | * Retrieve a line from the index |
||
| 306 | * |
||
| 307 | * @param string $idx name of the index |
||
| 308 | * @param string $suffix subpart identifier |
||
| 309 | * @param int $id the line number |
||
| 310 | * @return string a line with trailing whitespace removed |
||
| 311 | * |
||
| 312 | * @author Tom N Harris <[email protected]> |
||
| 313 | */ |
||
| 314 | protected function getIndexKey($idx, $suffix, $id) |
||
| 328 | |||
| 329 | /** |
||
| 330 | * Insert or replace a tuple in a line |
||
| 331 | * |
||
| 332 | * @author Tom N Harris <[email protected]> |
||
| 333 | * |
||
| 334 | * @param string $line |
||
| 335 | * @param int|string $id |
||
| 336 | * @param int $count |
||
| 337 | * @return string |
||
| 338 | */ |
||
| 339 | protected function updateTuple($line, $id, $count) |
||
| 354 | |||
| 355 | /** |
||
| 356 | * Split a line into an array of tuples |
||
| 357 | * |
||
| 358 | * @author Tom N Harris <[email protected]> |
||
| 359 | * @author Andreas Gohr <[email protected]> |
||
| 360 | * |
||
| 361 | * @param array $keys |
||
| 362 | * @param string $line |
||
| 363 | * @return array |
||
| 364 | */ |
||
| 365 | protected function parseTuples($keys, $line) |
||
| 380 | |||
| 381 | /** |
||
| 382 | * Sum the counts in a list of tuples |
||
| 383 | * |
||
| 384 | * @author Tom N Harris <[email protected]> |
||
| 385 | * |
||
| 386 | * @param string $line |
||
| 387 | * @return int |
||
| 388 | */ |
||
| 389 | protected function countTuples($line) |
||
| 400 | |||
| 401 | /** |
||
| 402 | * Clear the whole index |
||
| 403 | * |
||
| 404 | * @return bool If the index has been cleared successfully |
||
| 405 | */ |
||
| 406 | abstract public function clear(); |
||
| 407 | } |
||
| 408 |