Complex classes like Base_Site_Model often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Base_Site_Model, and based on these observations, apply Extract Interface, too.
| 1 | <?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed'); |
||
| 25 | abstract class Base_Site_Model extends CI_Model { |
||
| 26 | public $site = ''; |
||
| 27 | public $titleFormat = ''; |
||
| 28 | public $chapterFormat = ''; |
||
| 29 | |||
| 30 | 16 | public function __construct() { |
|
| 31 | 16 | parent::__construct(); |
|
| 32 | |||
| 33 | 16 | $this->load->database(); |
|
| 34 | |||
| 35 | 16 | $this->site = get_class($this); |
|
| 36 | 16 | } |
|
| 37 | |||
| 38 | abstract public function getFullTitleURL(string $title_url) : string; |
||
| 39 | |||
| 40 | abstract public function getChapterData(string $title_url, string $chapter) : array; |
||
| 41 | |||
| 42 | abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array; |
||
| 43 | |||
| 44 | 2 | final public function isValidTitleURL(string $title_url) : bool { |
|
| 45 | 2 | $success = (bool) preg_match($this->titleFormat, $title_url); |
|
| 46 | 2 | if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}"); |
|
| 47 | 2 | return $success; |
|
| 48 | } |
||
| 49 | 2 | final public function isValidChapter(string $chapter) : bool { |
|
| 50 | 2 | $success = (bool) preg_match($this->chapterFormat, $chapter); |
|
| 51 | 2 | if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}"); |
|
| 52 | 2 | return $success; |
|
| 53 | } |
||
| 54 | |||
| 55 | final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) { |
||
| 56 | $ch = curl_init(); |
||
| 57 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
||
| 58 | curl_setopt($ch, CURLOPT_ENCODING , "gzip"); |
||
| 59 | //curl_setopt($ch, CURLOPT_VERBOSE, 1); |
||
| 60 | curl_setopt($ch, CURLOPT_HEADER, 1); |
||
| 61 | |||
| 62 | if($follow_redirect) curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); |
||
| 63 | |||
| 64 | if(!empty($cookie_string)) curl_setopt($ch, CURLOPT_COOKIE, $cookie_string); |
||
| 65 | if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path); |
||
| 66 | |||
| 67 | //Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff. |
||
| 68 | curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36'); |
||
| 69 | |||
| 70 | //TODO: Check in a while if this being enabled still causes issues |
||
| 71 | //curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs |
||
| 72 | |||
| 73 | curl_setopt($ch, CURLOPT_URL, $url); |
||
| 74 | |||
| 75 | if($isPost) { |
||
| 76 | curl_setopt($ch,CURLOPT_POST, count($postFields)); |
||
| 77 | curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields)); |
||
| 78 | } |
||
| 79 | |||
| 80 | $response = curl_exec($ch); |
||
| 81 | if($response === FALSE) { |
||
| 82 | log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch)); |
||
| 83 | //FIXME: We don't always account for FALSE return |
||
| 84 | return FALSE; |
||
| 85 | } |
||
| 86 | |||
| 87 | $status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
||
| 88 | $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); |
||
| 89 | $headers = http_parse_headers(substr($response, 0, $header_size)); |
||
| 90 | $body = substr($response, $header_size); |
||
| 91 | curl_close($ch); |
||
| 92 | |||
| 93 | return [ |
||
| 94 | 'headers' => $headers, |
||
| 95 | 'status_code' => $status_code, |
||
| 96 | 'body' => $body |
||
| 97 | ]; |
||
| 98 | } |
||
| 99 | |||
| 100 | /** |
||
| 101 | * @param array $content |
||
| 102 | * @param string $title_url |
||
| 103 | * @param string $node_title_string |
||
| 104 | * @param string $node_row_string |
||
| 105 | * @param string $node_latest_string |
||
| 106 | * @param string $node_chapter_string |
||
| 107 | * @param string $failure_string |
||
| 108 | * |
||
| 109 | * @return DOMElement[]|false |
||
| 110 | */ |
||
| 111 | final protected function parseTitleDataDOM( |
||
| 112 | $content, string $title_url, |
||
| 113 | string $node_title_string, string $node_row_string, |
||
| 114 | string $node_latest_string, string $node_chapter_string, |
||
| 115 | string $failure_string = "") { |
||
| 116 | |||
| 117 | if(!is_array($content)) { |
||
| 118 | log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)"); |
||
| 119 | } else { |
||
| 120 | list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; |
||
|
|
|||
| 121 | |||
| 122 | if(!($status_code >= 200 && $status_code < 300)) { |
||
| 123 | log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})"); |
||
| 124 | } else if(empty($data)) { |
||
| 125 | log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})"); |
||
| 126 | } else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) { |
||
| 127 | log_message('error', "{$this->site} : {$title_url} | Failure string matched"); |
||
| 128 | } else { |
||
| 129 | $data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it. |
||
| 130 | |||
| 131 | $dom = new DOMDocument(); |
||
| 132 | libxml_use_internal_errors(TRUE); |
||
| 133 | $dom->loadHTML('<?xml encoding="utf-8" ?>' . $data); |
||
| 134 | libxml_use_internal_errors(FALSE); |
||
| 135 | |||
| 136 | $xpath = new DOMXPath($dom); |
||
| 137 | $nodes_title = $xpath->query($node_title_string); |
||
| 138 | $nodes_row = $xpath->query($node_row_string); |
||
| 139 | if($nodes_title->length === 1 && $nodes_row->length === 1) { |
||
| 140 | $firstRow = $nodes_row->item(0); |
||
| 141 | $nodes_latest = $xpath->query($node_latest_string, $firstRow); |
||
| 142 | |||
| 143 | if($node_chapter_string !== '') { |
||
| 144 | $nodes_chapter = $xpath->query($node_chapter_string, $firstRow); |
||
| 145 | } else { |
||
| 146 | $nodes_chapter = $nodes_row; |
||
| 147 | } |
||
| 148 | |||
| 149 | if($nodes_latest->length === 1 && $nodes_chapter->length === 1) { |
||
| 150 | return [ |
||
| 151 | 'nodes_title' => $nodes_title->item(0), |
||
| 152 | 'nodes_latest' => $nodes_latest->item(0), |
||
| 153 | 'nodes_chapter' => $nodes_chapter->item(0) |
||
| 154 | ]; |
||
| 155 | } else { |
||
| 156 | log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})"); |
||
| 157 | } |
||
| 158 | } else { |
||
| 159 | log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})"); |
||
| 160 | } |
||
| 161 | } |
||
| 162 | } |
||
| 163 | |||
| 164 | return FALSE; |
||
| 165 | } |
||
| 166 | |||
| 167 | public function cleanTitleDataDOM(string $data) : string { |
||
| 168 | return $data; |
||
| 169 | } |
||
| 170 | |||
| 171 | //This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them. |
||
| 172 | final public function parseFoolSlide(string $fullURL, string $title_url) { |
||
| 173 | $titleData = []; |
||
| 174 | |||
| 175 | if($content = $this->get_content($fullURL, "", "", FALSE, TRUE, ['adult' => 'true'])) { |
||
| 176 | $content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']); |
||
| 177 | |||
| 178 | $data = $this->parseTitleDataDOM( |
||
| 179 | $content, |
||
| 180 | $title_url, |
||
| 181 | "//div[@class='large comic']/h1[@class='title']", |
||
| 182 | "(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]", |
||
| 183 | "div[@class='meta_r']", |
||
| 184 | "div[@class='title']/a" |
||
| 185 | ); |
||
| 186 | if($data) { |
||
| 187 | $titleData['title'] = trim($data['nodes_title']->textContent); |
||
| 188 | |||
| 189 | $link = (string) $data['nodes_chapter']->getAttribute('href'); |
||
| 190 | $titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link); |
||
| 191 | |||
| 192 | $titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1]))); |
||
| 193 | } |
||
| 194 | } |
||
| 195 | |||
| 196 | return (!empty($titleData) ? $titleData : NULL); |
||
| 197 | } |
||
| 198 | |||
| 199 | final public function doCustomFollow(string $data = "", array $extra = []) : array { |
||
| 200 | $titleData = []; |
||
| 201 | $this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) { |
||
| 202 | if(is_array($content)) { |
||
| 203 | if(array_key_exists('status_code', $content)) { |
||
| 204 | $statusCode = $content['status_code']; |
||
| 205 | if($statusCode === 200) { |
||
| 206 | $isCallable = is_callable($successCallback); |
||
| 207 | if(($isCallable && $successCallback($content['body'])) || !$isCallable) { |
||
| 208 | $titleData['followed'] = 'Y'; |
||
| 209 | |||
| 210 | log_message('info', "doCustomFollow succeeded for {$id}"); |
||
| 211 | } else { |
||
| 212 | log_message('error', "doCustomFollow failed (Invalid response?) for {$id}"); |
||
| 213 | } |
||
| 214 | } else { |
||
| 215 | log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}"); |
||
| 216 | } |
||
| 217 | } else { |
||
| 218 | log_message('error', "doCustomFollow failed (Missing status code?) for {$id}"); |
||
| 219 | } |
||
| 220 | } else { |
||
| 221 | log_message('error', "doCustomFollow failed (Failed request) for {$id}"); |
||
| 222 | } |
||
| 223 | }, $data, $extra); |
||
| 224 | return $titleData; |
||
| 225 | } |
||
| 226 | public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {} |
||
| 229 | 12 | final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool { |
|
| 230 | //FIXME: Make this more generic when we have more site support for it. MangaFox and Batoto have similar chapter formats. |
||
| 231 | |||
| 232 | //NOTE: We only need to check against the new chapter here, as that is what is used for confirming update. |
||
| 233 | 12 | $status = FALSE; |
|
| 234 | |||
| 235 | //Make sure we have a volume element |
||
| 236 | 12 | if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0'); |
|
| 237 | 12 | if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0'); |
|
| 281 | } |
||
| 282 |
This checks looks for assignemnts to variables using the
list(...)function, where not all assigned variables are subsequently used.Consider the following code example.
Only the variables
$aand$care used. There was no need to assign$b.Instead, the list call could have been.