Complex classes like SimplePage often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SimplePage, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 335 | class SimplePage { |
||
| 336 | var $_links; |
||
| 337 | var $_title; |
||
| 338 | var $_last_widget; |
||
| 339 | var $_label; |
||
| 340 | var $_left_over_labels; |
||
| 341 | var $_open_forms; |
||
| 342 | var $_complete_forms; |
||
| 343 | var $_frameset; |
||
| 344 | var $_frames; |
||
| 345 | var $_frameset_nesting_level; |
||
| 346 | var $_transport_error; |
||
| 347 | var $_raw; |
||
| 348 | var $_text; |
||
| 349 | var $_sent; |
||
| 350 | var $_headers; |
||
| 351 | var $_method; |
||
| 352 | var $_url; |
||
| 353 | var $_base = false; |
||
| 354 | var $_request_data; |
||
| 355 | |||
| 356 | /** |
||
| 357 | * Parses a page ready to access it's contents. |
||
| 358 | * @param SimpleHttpResponse $response Result of HTTP fetch. |
||
| 359 | * @access public |
||
| 360 | */ |
||
| 361 | function SimplePage($response = false) { |
||
| 362 | $this->_links = array(); |
||
| 363 | $this->_title = false; |
||
| 364 | $this->_left_over_labels = array(); |
||
| 365 | $this->_open_forms = array(); |
||
| 366 | $this->_complete_forms = array(); |
||
| 367 | $this->_frameset = false; |
||
| 368 | $this->_frames = array(); |
||
| 369 | $this->_frameset_nesting_level = 0; |
||
| 370 | $this->_text = false; |
||
| 371 | if ($response) { |
||
| 372 | $this->_extractResponse($response); |
||
| 373 | } else { |
||
| 374 | $this->_noResponse(); |
||
| 375 | } |
||
| 376 | } |
||
| 377 | |||
| 378 | /** |
||
| 379 | * Extracts all of the response information. |
||
| 380 | * @param SimpleHttpResponse $response Response being parsed. |
||
| 381 | * @access private |
||
| 382 | */ |
||
| 383 | function _extractResponse($response) { |
||
| 392 | |||
| 393 | /** |
||
| 394 | * Sets up a missing response. |
||
| 395 | * @access private |
||
| 396 | */ |
||
| 397 | function _noResponse() { |
||
| 406 | |||
| 407 | /** |
||
| 408 | * Original request as bytes sent down the wire. |
||
| 409 | * @return mixed Sent content. |
||
| 410 | * @access public |
||
| 411 | */ |
||
| 412 | function getRequest() { |
||
| 415 | |||
| 416 | /** |
||
| 417 | * Accessor for raw text of page. |
||
| 418 | * @return string Raw unparsed content. |
||
| 419 | * @access public |
||
| 420 | */ |
||
| 421 | function getRaw() { |
||
| 424 | |||
| 425 | /** |
||
| 426 | * Accessor for plain text of page as a text browser |
||
| 427 | * would see it. |
||
| 428 | * @return string Plain text of page. |
||
| 429 | * @access public |
||
| 430 | */ |
||
| 431 | function getText() { |
||
| 437 | |||
| 438 | /** |
||
| 439 | * Accessor for raw headers of page. |
||
| 440 | * @return string Header block as text. |
||
| 441 | * @access public |
||
| 442 | */ |
||
| 443 | function getHeaders() { |
||
| 449 | |||
| 450 | /** |
||
| 451 | * Original request method. |
||
| 452 | * @return string GET, POST or HEAD. |
||
| 453 | * @access public |
||
| 454 | */ |
||
| 455 | function getMethod() { |
||
| 458 | |||
| 459 | /** |
||
| 460 | * Original resource name. |
||
| 461 | * @return SimpleUrl Current url. |
||
| 462 | * @access public |
||
| 463 | */ |
||
| 464 | function getUrl() { |
||
| 467 | |||
| 468 | /** |
||
| 469 | * Base URL if set via BASE tag page url otherwise |
||
| 470 | * @return SimpleUrl Base url. |
||
| 471 | * @access public |
||
| 472 | */ |
||
| 473 | function getBaseUrl() { |
||
| 476 | |||
| 477 | /** |
||
| 478 | * Original request data. |
||
| 479 | * @return mixed Sent content. |
||
| 480 | * @access public |
||
| 481 | */ |
||
| 482 | function getRequestData() { |
||
| 485 | |||
| 486 | /** |
||
| 487 | * Accessor for last error. |
||
| 488 | * @return string Error from last response. |
||
| 489 | * @access public |
||
| 490 | */ |
||
| 491 | function getTransportError() { |
||
| 494 | |||
| 495 | /** |
||
| 496 | * Accessor for current MIME type. |
||
| 497 | * @return string MIME type as string; e.g. 'text/html' |
||
| 498 | * @access public |
||
| 499 | */ |
||
| 500 | function getMimeType() { |
||
| 506 | |||
| 507 | /** |
||
| 508 | * Accessor for HTTP response code. |
||
| 509 | * @return integer HTTP response code received. |
||
| 510 | * @access public |
||
| 511 | */ |
||
| 512 | function getResponseCode() { |
||
| 518 | |||
| 519 | /** |
||
| 520 | * Accessor for last Authentication type. Only valid |
||
| 521 | * straight after a challenge (401). |
||
| 522 | * @return string Description of challenge type. |
||
| 523 | * @access public |
||
| 524 | */ |
||
| 525 | function getAuthentication() { |
||
| 531 | |||
| 532 | /** |
||
| 533 | * Accessor for last Authentication realm. Only valid |
||
| 534 | * straight after a challenge (401). |
||
| 535 | * @return string Name of security realm. |
||
| 536 | * @access public |
||
| 537 | */ |
||
| 538 | function getRealm() { |
||
| 544 | |||
| 545 | /** |
||
| 546 | * Accessor for current frame focus. Will be |
||
| 547 | * false as no frames. |
||
| 548 | * @return array Always empty. |
||
| 549 | * @access public |
||
| 550 | */ |
||
| 551 | function getFrameFocus() { |
||
| 554 | |||
| 555 | /** |
||
| 556 | * Sets the focus by index. The integer index starts from 1. |
||
| 557 | * @param integer $choice Chosen frame. |
||
| 558 | * @return boolean Always false. |
||
| 559 | * @access public |
||
| 560 | */ |
||
| 561 | function setFrameFocusByIndex($choice) { |
||
| 564 | |||
| 565 | /** |
||
| 566 | * Sets the focus by name. Always fails for a leaf page. |
||
| 567 | * @param string $name Chosen frame. |
||
| 568 | * @return boolean False as no frames. |
||
| 569 | * @access public |
||
| 570 | */ |
||
| 571 | function setFrameFocus($name) { |
||
| 574 | |||
| 575 | /** |
||
| 576 | * Clears the frame focus. Does nothing for a leaf page. |
||
| 577 | * @access public |
||
| 578 | */ |
||
| 579 | function clearFrameFocus() { |
||
| 581 | |||
| 582 | /** |
||
| 583 | * Adds a tag to the page. |
||
| 584 | * @param SimpleTag $tag Tag to accept. |
||
| 585 | * @access public |
||
| 586 | */ |
||
| 587 | function acceptTag(&$tag) { |
||
| 601 | |||
| 602 | /** |
||
| 603 | * Opens a label for a described widget. |
||
| 604 | * @param SimpleFormTag $tag Tag to accept. |
||
| 605 | * @access public |
||
| 606 | */ |
||
| 607 | function acceptLabelStart(&$tag) { |
||
| 611 | |||
| 612 | /** |
||
| 613 | * Closes the most recently opened label. |
||
| 614 | * @access public |
||
| 615 | */ |
||
| 616 | function acceptLabelEnd() { |
||
| 627 | |||
| 628 | /** |
||
| 629 | * Tests to see if a tag is a possible form |
||
| 630 | * element. |
||
| 631 | * @param string $name HTML element name. |
||
| 632 | * @return boolean True if form element. |
||
| 633 | * @access private |
||
| 634 | */ |
||
| 635 | function _isFormElement($name) { |
||
| 638 | |||
| 639 | /** |
||
| 640 | * Opens a form. New widgets go here. |
||
| 641 | * @param SimpleFormTag $tag Tag to accept. |
||
| 642 | * @access public |
||
| 643 | */ |
||
| 644 | function acceptFormStart(&$tag) { |
||
| 647 | |||
| 648 | /** |
||
| 649 | * Closes the most recently opened form. |
||
| 650 | * @access public |
||
| 651 | */ |
||
| 652 | function acceptFormEnd() { |
||
| 657 | |||
| 658 | /** |
||
| 659 | * Opens a frameset. A frameset may contain nested |
||
| 660 | * frameset tags. |
||
| 661 | * @param SimpleFramesetTag $tag Tag to accept. |
||
| 662 | * @access public |
||
| 663 | */ |
||
| 664 | function acceptFramesetStart(&$tag) { |
||
| 670 | |||
| 671 | /** |
||
| 672 | * Closes the most recently opened frameset. |
||
| 673 | * @access public |
||
| 674 | */ |
||
| 675 | function acceptFramesetEnd() { |
||
| 680 | |||
| 681 | /** |
||
| 682 | * Takes a single frame tag and stashes it in |
||
| 683 | * the current frame set. |
||
| 684 | * @param SimpleFrameTag $tag Tag to accept. |
||
| 685 | * @access public |
||
| 686 | */ |
||
| 687 | function acceptFrame(&$tag) { |
||
| 694 | |||
| 695 | /** |
||
| 696 | * Test to see if in the middle of reading |
||
| 697 | * a frameset. |
||
| 698 | * @return boolean True if inframeset. |
||
| 699 | * @access private |
||
| 700 | */ |
||
| 701 | function _isLoadingFrames() { |
||
| 707 | |||
| 708 | /** |
||
| 709 | * Test to see if link is an absolute one. |
||
| 710 | * @param string $url Url to test. |
||
| 711 | * @return boolean True if absolute. |
||
| 712 | * @access protected |
||
| 713 | */ |
||
| 714 | function _linkIsAbsolute($url) { |
||
| 718 | |||
| 719 | /** |
||
| 720 | * Adds a link to the page. |
||
| 721 | * @param SimpleAnchorTag $tag Link to accept. |
||
| 722 | * @access protected |
||
| 723 | */ |
||
| 724 | function _addLink($tag) { |
||
| 727 | |||
| 728 | /** |
||
| 729 | * Marker for end of complete page. Any work in |
||
| 730 | * progress can now be closed. |
||
| 731 | * @access public |
||
| 732 | */ |
||
| 733 | function acceptPageEnd() { |
||
| 745 | |||
| 746 | /** |
||
| 747 | * Test for the presence of a frameset. |
||
| 748 | * @return boolean True if frameset. |
||
| 749 | * @access public |
||
| 750 | */ |
||
| 751 | function hasFrames() { |
||
| 754 | |||
| 755 | /** |
||
| 756 | * Accessor for frame name and source URL for every frame that |
||
| 757 | * will need to be loaded. Immediate children only. |
||
| 758 | * @return boolean/array False if no frameset or |
||
| 759 | * otherwise a hash of frame URLs. |
||
| 760 | * The key is either a numerical |
||
| 761 | * base one index or the name attribute. |
||
| 762 | * @access public |
||
| 763 | */ |
||
| 764 | function getFrameset() { |
||
| 776 | |||
| 777 | /** |
||
| 778 | * Fetches a list of loaded frames. |
||
| 779 | * @return array/string Just the URL for a single page. |
||
| 780 | * @access public |
||
| 781 | */ |
||
| 782 | function getFrames() { |
||
| 786 | |||
| 787 | /** |
||
| 788 | * Accessor for a list of all links. |
||
| 789 | * @return array List of urls with scheme of |
||
| 790 | * http or https and hostname. |
||
| 791 | * @access public |
||
| 792 | */ |
||
| 793 | function getUrls() { |
||
| 801 | |||
| 802 | /** |
||
| 803 | * Accessor for URLs by the link label. Label will match |
||
| 804 | * regardess of whitespace issues and case. |
||
| 805 | * @param string $label Text of link. |
||
| 806 | * @return array List of links with that label. |
||
| 807 | * @access public |
||
| 808 | */ |
||
| 809 | function getUrlsByLabel($label) { |
||
| 818 | |||
| 819 | /** |
||
| 820 | * Accessor for a URL by the id attribute. |
||
| 821 | * @param string $id Id attribute of link. |
||
| 822 | * @return SimpleUrl URL with that id of false if none. |
||
| 823 | * @access public |
||
| 824 | */ |
||
| 825 | function getUrlById($id) { |
||
| 833 | |||
| 834 | /** |
||
| 835 | * Converts a link tag into a target URL. |
||
| 836 | * @param SimpleAnchor $link Parsed link. |
||
| 837 | * @return SimpleUrl URL with frame target if any. |
||
| 838 | * @access private |
||
| 839 | */ |
||
| 840 | function _getUrlFromLink($link) { |
||
| 847 | |||
| 848 | /** |
||
| 849 | * Expands expandomatic URLs into fully qualified |
||
| 850 | * URLs. |
||
| 851 | * @param SimpleUrl $url Relative URL. |
||
| 852 | * @return SimpleUrl Absolute URL. |
||
| 853 | * @access public |
||
| 854 | */ |
||
| 855 | function expandUrl($url) { |
||
| 862 | |||
| 863 | /** |
||
| 864 | * Sets the base url for the page. |
||
| 865 | * @param SimpleTag $tag Base URL for page. |
||
| 866 | * @access protected |
||
| 867 | */ |
||
| 868 | function _setBase(&$tag) { |
||
| 872 | |||
| 873 | /** |
||
| 874 | * Sets the title tag contents. |
||
| 875 | * @param SimpleTitleTag $tag Title of page. |
||
| 876 | * @access protected |
||
| 877 | */ |
||
| 878 | function _setTitle(&$tag) { |
||
| 881 | |||
| 882 | /** |
||
| 883 | * Accessor for parsed title. |
||
| 884 | * @return string Title or false if no title is present. |
||
| 885 | * @access public |
||
| 886 | */ |
||
| 887 | function getTitle() { |
||
| 893 | |||
| 894 | /** |
||
| 895 | * Finds a held form by button label. Will only |
||
| 896 | * search correctly built forms. |
||
| 897 | * @param SimpleSelector $selector Button finder. |
||
| 898 | * @return SimpleForm Form object containing |
||
| 899 | * the button. |
||
| 900 | * @access public |
||
| 901 | */ |
||
| 902 | function &getFormBySubmit($selector) { |
||
| 911 | |||
| 912 | /** |
||
| 913 | * Finds a held form by image using a selector. |
||
| 914 | * Will only search correctly built forms. |
||
| 915 | * @param SimpleSelector $selector Image finder. |
||
| 916 | * @return SimpleForm Form object containing |
||
| 917 | * the image. |
||
| 918 | * @access public |
||
| 919 | */ |
||
| 920 | function &getFormByImage($selector) { |
||
| 929 | |||
| 930 | /** |
||
| 931 | * Finds a held form by the form ID. A way of |
||
| 932 | * identifying a specific form when we have control |
||
| 933 | * of the HTML code. |
||
| 934 | * @param string $id Form label. |
||
| 935 | * @return SimpleForm Form object containing the matching ID. |
||
| 936 | * @access public |
||
| 937 | */ |
||
| 938 | function &getFormById($id) { |
||
| 947 | |||
| 948 | /** |
||
| 949 | * Sets a field on each form in which the field is |
||
| 950 | * available. |
||
| 951 | * @param SimpleSelector $selector Field finder. |
||
| 952 | * @param string $value Value to set field to. |
||
| 953 | * @return boolean True if value is valid. |
||
| 954 | * @access public |
||
| 955 | */ |
||
| 956 | function setField($selector, $value, $position=false) { |
||
| 965 | |||
| 966 | /** |
||
| 967 | * Accessor for a form element value within a page. |
||
| 968 | * @param SimpleSelector $selector Field finder. |
||
| 969 | * @return string/boolean A string if the field is |
||
| 970 | * present, false if unchecked |
||
| 971 | * and null if missing. |
||
| 972 | * @access public |
||
| 973 | */ |
||
| 974 | function getField($selector) { |
||
| 983 | } |
||
| 984 | |||
| 985 |