Complex classes like Zend_Search_Lucene_Document_Html often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Zend_Search_Lucene_Document_Html, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 37 | class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document |
||
|
|
|||
| 38 | { |
||
| 39 | /** |
||
| 40 | * List of document links |
||
| 41 | * |
||
| 42 | * @var array |
||
| 43 | */ |
||
| 44 | private $_links = array(); |
||
| 45 | |||
| 46 | /** |
||
| 47 | * List of document header links |
||
| 48 | * |
||
| 49 | * @var array |
||
| 50 | */ |
||
| 51 | private $_headerLinks = array(); |
||
| 52 | |||
| 53 | /** |
||
| 54 | * Stored DOM representation |
||
| 55 | * |
||
| 56 | * @var DOMDocument |
||
| 57 | */ |
||
| 58 | private $_doc; |
||
| 59 | |||
| 60 | /** |
||
| 61 | * Exclud nofollow links flag |
||
| 62 | * |
||
| 63 | * If true then links with rel='nofollow' attribute are not included into |
||
| 64 | * document links. |
||
| 65 | * |
||
| 66 | * @var boolean |
||
| 67 | */ |
||
| 68 | private static $_excludeNoFollowLinks = false; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * |
||
| 72 | * List of inline tags |
||
| 73 | * |
||
| 74 | * @var array |
||
| 75 | */ |
||
| 76 | private $_inlineTags = array('a', 'abbr', 'acronym', 'dfn', 'em', 'strong', 'code', |
||
| 77 | 'samp', 'kbd', 'var', 'b', 'i', 'big', 'small', 'strike', |
||
| 78 | 'tt', 'u', 'font', 'span', 'bdo', 'cite', 'del', 'ins', |
||
| 79 | 'q', 'sub', 'sup'); |
||
| 80 | |||
| 81 | /** |
||
| 82 | * Object constructor |
||
| 83 | * |
||
| 84 | * @param string $data HTML string (may be HTML fragment, ) |
||
| 85 | * @param boolean $isFile |
||
| 86 | * @param boolean $storeContent |
||
| 87 | * @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag. |
||
| 88 | */ |
||
| 89 | private function __construct($data, $isFile, $storeContent, $defaultEncoding = '') |
||
| 187 | |||
| 188 | /** |
||
| 189 | * Set exclude nofollow links flag |
||
| 190 | * |
||
| 191 | * @param boolean $newValue |
||
| 192 | */ |
||
| 193 | public static function setExcludeNoFollowLinks($newValue) |
||
| 197 | |||
| 198 | /** |
||
| 199 | * Get exclude nofollow links flag |
||
| 200 | * |
||
| 201 | * @return boolean |
||
| 202 | */ |
||
| 203 | public static function getExcludeNoFollowLinks() |
||
| 207 | |||
| 208 | /** |
||
| 209 | * Get node text |
||
| 210 | * |
||
| 211 | * We should exclude scripts, which may be not included into comment tags, CDATA sections, |
||
| 212 | * |
||
| 213 | * @param DOMNode $node |
||
| 214 | * @param string &$text |
||
| 215 | */ |
||
| 216 | private function _retrieveNodeText(DOMNode $node, &$text) |
||
| 229 | |||
| 230 | /** |
||
| 231 | * Get document HREF links |
||
| 232 | * |
||
| 233 | * @return array |
||
| 234 | */ |
||
| 235 | public function getLinks() |
||
| 239 | |||
| 240 | /** |
||
| 241 | * Get document header links |
||
| 242 | * |
||
| 243 | * @return array |
||
| 244 | */ |
||
| 245 | public function getHeaderLinks() |
||
| 249 | |||
| 250 | /** |
||
| 251 | * Load HTML document from a string |
||
| 252 | * |
||
| 253 | * @param string $data |
||
| 254 | * @param boolean $storeContent |
||
| 255 | * @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag. |
||
| 256 | * @return Zend_Search_Lucene_Document_Html |
||
| 257 | */ |
||
| 258 | public static function loadHTML($data, $storeContent = false, $defaultEncoding = '') |
||
| 262 | |||
| 263 | /** |
||
| 264 | * Load HTML document from a file |
||
| 265 | * |
||
| 266 | * @param string $file |
||
| 267 | * @param boolean $storeContent |
||
| 268 | * @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag. |
||
| 269 | * @return Zend_Search_Lucene_Document_Html |
||
| 270 | */ |
||
| 271 | public static function loadHTMLFile($file, $storeContent = false, $defaultEncoding = '') |
||
| 275 | |||
| 276 | |||
| 277 | /** |
||
| 278 | * Highlight text in text node |
||
| 279 | * |
||
| 280 | * @param DOMText $node |
||
| 281 | * @param array $wordsToHighlight |
||
| 282 | * @param callback $callback Callback method, used to transform (highlighting) text. |
||
| 283 | * @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform) |
||
| 284 | * @throws Zend_Search_Lucene_Exception |
||
| 285 | */ |
||
| 286 | protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params) |
||
| 343 | |||
| 344 | |||
| 345 | /** |
||
| 346 | * highlight words in content of the specified node |
||
| 347 | * |
||
| 348 | * @param DOMNode $contextNode |
||
| 349 | * @param array $wordsToHighlight |
||
| 350 | * @param callback $callback Callback method, used to transform (highlighting) text. |
||
| 351 | * @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform) |
||
| 352 | */ |
||
| 353 | protected function _highlightNodeRecursive(DOMNode $contextNode, $wordsToHighlight, $callback, $params) |
||
| 377 | |||
| 378 | /** |
||
| 379 | * Standard callback method used to highlight words. |
||
| 380 | * |
||
| 381 | * @param string $stringToHighlight |
||
| 382 | * @return string |
||
| 383 | * @internal |
||
| 384 | */ |
||
| 385 | public function applyColour($stringToHighlight, $colour) |
||
| 389 | |||
| 390 | /** |
||
| 391 | * Highlight text with specified color |
||
| 392 | * |
||
| 393 | * @param string|array $words |
||
| 394 | * @param string $colour |
||
| 395 | * @return string |
||
| 396 | */ |
||
| 397 | public function highlight($words, $colour = '#66ffff') |
||
| 401 | |||
| 402 | |||
| 403 | |||
| 404 | /** |
||
| 405 | * Highlight text using specified View helper or callback function. |
||
| 406 | * |
||
| 407 | * @param string|array $words Words to highlight. Words could be organized using the array or string. |
||
| 408 | * @param callback $callback Callback method, used to transform (highlighting) text. |
||
| 409 | * @param array $params Array of additionall callback parameters passed through into it |
||
| 410 | * (first non-optional parameter is an HTML fragment for highlighting) |
||
| 411 | * @return string |
||
| 412 | * @throws Zend_Search_Lucene_Exception |
||
| 413 | */ |
||
| 414 | public function highlightExtended($words, $callback, $params = array()) |
||
| 451 | |||
| 452 | |||
| 453 | /** |
||
| 454 | * Get HTML |
||
| 455 | * |
||
| 456 | * @return string |
||
| 457 | */ |
||
| 458 | public function getHTML() |
||
| 462 | |||
| 463 | /** |
||
| 464 | * Get HTML body |
||
| 465 | * |
||
| 466 | * @return string |
||
| 467 | */ |
||
| 468 | public function getHtmlBody() |
||
| 480 | } |
||
| 481 | |||
| 482 |