Completed
Push — master ( 8c1715...c08a3e )
by Angus
03:23
created

Base_Site_Model   A

Complexity

Total Complexity 31

Size/Duplication

Total Lines 182
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Test Coverage

Coverage 0%

Importance

Changes 0
Metric Value
dl 0
loc 182
ccs 0
cts 0
cp 0
rs 9.8
c 0
b 0
f 0
wmc 31
lcom 1
cbo 2

13 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 1
getFullTitleURL() 0 1 ?
getChapterData() 0 1 ?
getTitleData() 0 1 ?
A isValidTitleURL() 0 5 2
A isValidChapter() 0 5 2
B get_content() 0 44 6
C parseTitleDataDOM() 0 58 12
A cleanTitleDataDOM() 0 3 1
B parseFoolSlide() 0 26 4
A doCustomFollow() 0 1 1
A doCustomUpdate() 0 1 1
A doCustomCheck() 0 1 1
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
class Tracker_Sites_Model extends CI_Model {
4 103
	public function __construct() {
5 103
		parent::__construct();
6 103
	}
7
8
	public function __get($name) {
9
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
10
		//      It's probably a minor speed reduction, but that isn't much of an issue.
11
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
12
		if(!class_exists($name) || !(get_parent_class($name) === 'Base_Site_Model')) {
13
			return get_instance()->{$name};
14
		} else {
15
			$this->loadSite($name);
16
			return $this->{$name};
17
		}
18
	}
19
20
	private function loadSite(string $siteName) {
21
		$this->{$siteName} = new $siteName();
22
	}
23
}
24
25
abstract class Base_Site_Model extends CI_Model {
26
	public $site          = '';
27
	public $titleFormat   = '';
28
	public $chapterFormat = '';
29
30
	public function __construct() {
31
		parent::__construct();
32
33
		$this->load->database();
34
35
		$this->site = get_class($this);
36
	}
37
38
	abstract public function getFullTitleURL(string $title_url) : string;
39
40
	abstract public function getChapterData(string $title_url, string $chapter) : array;
41
42
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
43
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
44
45
	public function isValidTitleURL(string $title_url) : bool {
46
		$success = (bool) preg_match($this->titleFormat, $title_url);
47
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
48
		return $success;
49
	}
50
	public function isValidChapter(string $chapter) : bool {
51
		$success = (bool) preg_match($this->chapterFormat, $chapter);
52
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
53
		return $success;
54
	}
55
56
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
57
		$ch = curl_init();
58
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
59
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
60
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
61
		curl_setopt($ch, CURLOPT_HEADER, 1);
62
63
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
64
65
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
66
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
67
68
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
69
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
70
71
		//TODO: Check in a while if this being enabled still causes issues
72
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
73
74
		curl_setopt($ch, CURLOPT_URL, $url);
75
76
		if($isPost) {
77
			curl_setopt($ch,CURLOPT_POST, count($postFields));
78
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
79
		}
80
81
		$response = curl_exec($ch);
82
		if($response === FALSE) {
83
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
84
			//FIXME: We don't always account for FALSE return
85
			return FALSE;
86
		}
87
88
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
89
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
90
		$headers     = http_parse_headers(substr($response, 0, $header_size));
91
		$body        = substr($response, $header_size);
92
		curl_close($ch);
93
94
		return [
95
			'headers'     => $headers,
96
			'status_code' => $status_code,
97
			'body'        => $body
98
		];
99
	}
100
101
	/**
102
	 * @param array  $content
103
	 * @param string $title_url
104
	 * @param string $node_title_string
105
	 * @param string $node_row_string
106
	 * @param string $node_latest_string
107
	 * @param string $node_chapter_string
108
	 * @param string $failure_string
109
	 *
110
	 * @return DOMElement[]|false
111
	 */
112
	final protected function parseTitleDataDOM(
113
		$content, string $title_url,
114
		string $node_title_string, string $node_row_string,
115
		string $node_latest_string, string $node_chapter_string,
116
		string $failure_string = "") {
117
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
118
119
		if(!is_array($content)) {
120
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
121
		} else {
122
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
123
			$status_code = $content['status_code'];
124
			$data        = $content['body'];
125
126
			if(!($status_code >= 200 && $status_code < 300)) {
127
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
128
			} else if(empty($data)) {
129
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
130
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
131
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
132
			} else {
133
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
134
135
				$dom = new DOMDocument();
136
				libxml_use_internal_errors(TRUE);
137
				$dom->loadHTML($data);
138
				libxml_use_internal_errors(FALSE);
139
140
				$xpath = new DOMXPath($dom);
141
				$nodes_title = $xpath->query($node_title_string);
142
				$nodes_row   = $xpath->query($node_row_string);
143
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
144
					$firstRow      = $nodes_row->item(0);
145
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
146
147
					if($node_chapter_string !== '') {
148
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
149
					} else {
150
						$nodes_chapter = $nodes_row;
151
					}
152
153
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
154
						return [
155
							'nodes_title'   => $nodes_title->item(0),
156
							'nodes_latest'  => $nodes_latest->item(0),
157
							'nodes_chapter' => $nodes_chapter->item(0)
158
						];
159
					} else {
160
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
161
					}
162
				} else {
163
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
164
				}
165
			}
166
		}
167
168
		return FALSE;
169
	}
170
171
	public function cleanTitleDataDOM(string $data) : string {
172
		return $data;
173
	}
174
175
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
176
	final public function parseFoolSlide(string $fullURL, string $title_url) {
177
		$titleData = [];
178
179
		if($content = $this->get_content($fullURL, "", "", FALSE, TRUE, ['adult' => 'true'])) {
180
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
181
182
			$data = $this->parseTitleDataDOM(
183
				$content,
184
				$title_url,
185
				"//div[@class='large comic']/h1[@class='title']",
186
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
187
				"div[@class='meta_r']",
188
				"div[@class='title']/a"
189
			);
190
			if($data) {
191
				$titleData['title'] = trim($data['nodes_title']->textContent);
192
193
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
194
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
195
196
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
197
			}
198
		}
199
200
		return (!empty($titleData) ? $titleData : NULL);
201
	}
202
203
	public function doCustomFollow(string $data = "", array $extra = []) {}
204
	public function doCustomUpdate() {}
205
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
206
}
207