Completed
Push — master ( 55092a...068893 )
by Angus
02:34
created

Base_FoolSlide_Site_Model   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 48
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Test Coverage

Coverage 0%

Importance

Changes 0
Metric Value
dl 0
loc 48
ccs 0
cts 23
cp 0
rs 10
c 0
b 0
f 0
wmc 11
lcom 1
cbo 1

3 Methods

Rating   Name   Duplication   Size   Complexity  
A getFullTitleURL() 0 3 1
A getChapterData() 0 7 3
C getTitleData() 0 29 7
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
class Tracker_Sites_Model extends CI_Model {
4 119
	public function __construct() {
5 119
		parent::__construct();
6 119
	}
7
8
	public function __get($name) {
9
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
10
		//      It's probably a minor speed reduction, but that isn't much of an issue.
11
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
12
		if(!class_exists($name) || !(in_array(get_parent_class($name), ['Base_Site_Model', 'Base_FoolSlide_Site_Model']))) {
13
			return get_instance()->{$name};
14
		} else {
15
			$this->loadSite($name);
16
			return $this->{$name};
17
		}
18
	}
19
20
	private function loadSite(string $siteName) {
21
		$this->{$siteName} = new $siteName();
22
	}
23
}
24
25
abstract class Base_Site_Model extends CI_Model {
26
	public $site          = '';
27
	public $titleFormat   = '';
28
	public $chapterFormat = '';
29
30 16
	public function __construct() {
31 16
		parent::__construct();
32
33 16
		$this->load->database();
34
35 16
		$this->site = get_class($this);
36 16
	}
37
38
	abstract public function getFullTitleURL(string $title_url) : string;
39
40
	abstract public function getChapterData(string $title_url, string $chapter) : array;
41
42
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
43
44 2
	final public function isValidTitleURL(string $title_url) : bool {
45 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
46 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
47 2
		return $success;
48
	}
49 2
	final public function isValidChapter(string $chapter) : bool {
50 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
51 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
52 2
		return $success;
53
	}
54
55
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
56
		$ch = curl_init();
57
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
58
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
59
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
60
		curl_setopt($ch, CURLOPT_HEADER, 1);
61
62
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
63
64
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
65
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
66
67
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
68
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
69
70
		//TODO: Check in a while if this being enabled still causes issues
71
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
72
73
		curl_setopt($ch, CURLOPT_URL, $url);
74
75
		if($isPost) {
76
			curl_setopt($ch,CURLOPT_POST, count($postFields));
77
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
78
		}
79
80
		$response = curl_exec($ch);
81
		if($response === FALSE) {
82
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
83
			//FIXME: We don't always account for FALSE return
84
			return FALSE;
85
		}
86
87
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
88
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
89
		$headers     = http_parse_headers(substr($response, 0, $header_size));
90
		$body        = substr($response, $header_size);
91
		curl_close($ch);
92
93
		return [
94
			'headers'     => $headers,
95
			'status_code' => $status_code,
96
			'body'        => $body
97
		];
98
	}
99
100
	/**
101
	 * @param array  $content
102
	 * @param string $title_url
103
	 * @param string $node_title_string
104
	 * @param string $node_row_string
105
	 * @param string $node_latest_string
106
	 * @param string $node_chapter_string
107
	 * @param string $failure_string
108
	 *
109
	 * @return DOMElement[]|false
110
	 */
111
	final protected function parseTitleDataDOM(
112
		$content, string $title_url,
113
		string $node_title_string, string $node_row_string,
114
		string $node_latest_string, string $node_chapter_string,
115
		string $failure_string = "") {
116
117
		if(!is_array($content)) {
118
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
119
		} else {
120
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
121
122
			if(!($status_code >= 200 && $status_code < 300)) {
123
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
124
			} else if(empty($data)) {
125
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
126
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
127
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
128
			} else {
129
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
130
131
				$dom = new DOMDocument();
132
				libxml_use_internal_errors(TRUE);
133
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
134
				libxml_use_internal_errors(FALSE);
135
136
				$xpath = new DOMXPath($dom);
137
				$nodes_title = $xpath->query($node_title_string);
138
				$nodes_row   = $xpath->query($node_row_string);
139
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
140
					$firstRow      = $nodes_row->item(0);
141
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
142
143
					if($node_chapter_string !== '') {
144
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
145
					} else {
146
						$nodes_chapter = $nodes_row;
147
					}
148
149
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
150
						return [
151
							'nodes_title'   => $nodes_title->item(0),
152
							'nodes_latest'  => $nodes_latest->item(0),
153
							'nodes_chapter' => $nodes_chapter->item(0)
154
						];
155
					} else {
156
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
157
					}
158
				} else {
159
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
160
				}
161
			}
162
		}
163
164
		return FALSE;
165
	}
166
167
	public function cleanTitleDataDOM(string $data) : string {
168
		return $data;
169
	}
170
171
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
172
		$titleData = [];
173
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
174
			if(is_array($content)) {
175
				if(array_key_exists('status_code', $content)) {
176
					$statusCode = $content['status_code'];
177
					if($statusCode === 200) {
178
						$isCallable = is_callable($successCallback);
179
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
180
							$titleData['followed'] = 'Y';
181
182
							log_message('info', "doCustomFollow succeeded for {$id}");
183
						} else {
184
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
185
						}
186
					} else {
187
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
188
					}
189
				} else {
190
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
191
				}
192
			} else {
193
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
194
			}
195
		}, $data, $extra);
196
		return $titleData;
197
	}
198
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {}
199
	public function doCustomUpdate() {}
200
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
201 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
202
		//FIXME: Make this more generic when we have more site support for it. MangaFox and Batoto have similar chapter formats.
203
204
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
205 12
		$status = FALSE;
206
207
		//Make sure we have a volume element
208 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
209 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
210
211 12
		$oldCount = count($oldChapterSegments);
212 12
		$newCount = count($newChapterSegments);
213 12
		if($newCount === $oldCount) {
214
			//Make sure chapter format looks correct.
215
			//NOTE: We only need to check newCount as we know oldCount is the same count.
216 12
			if($newCount === 2) {
217
				//FIXME: Can we loop this?
218 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
219 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
220
221
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
222 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
223 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
224
225 12
				$oldVolume = floatval($oldVolume);
226 12
				$newVolume = floatval($newVolume);
227
			} else {
228
				$oldVolume = 0;
229
				$newVolume = 0;
230
			}
231 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
232 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
233
234 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
235
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
236
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
237
238 4
				$status = TRUE;
239 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
240
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
241 1
				$status = TRUE;
242 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
243
				//$newVolume is higher, and chapter is higher so no need to check chapter.
244 2
				$status = TRUE;
245 5
			} elseif($newChapter > $oldChapter) {
246
				//$newVolume isn't higher, but chapter is.
247
				$status = TRUE;
248
			}
249
		}
250
251 12
		return $status;
252
	}
253
}
254
255
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
256
	public $titleFormat   = '/^[a-z0-9_-]+$/';
257
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
258
259
	public $baseURL = '';
260
261
	public function getFullTitleURL(string $title_url) : string {
262
		return "{$this->baseURL}/series/{$title_url}";
263
	}
264
265
	public function getChapterData(string $title_url, string $chapter) : array {
266
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
267
		return [
268
			'url'    => "{$this->baseURL}/read/{$title_url}/{$chapter}/",
269
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
270
		];
271
	}
272
273
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
274
		$titleData = [];
275
276
		$jsonURL = "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
277
		if($content = $this->get_content($jsonURL)) {
278
			$json = json_decode($content['body'], TRUE);
279
			if($json && count($json['chapters']) > 0) {
280
				$titleData['title'] = trim($json['comic']['name']);
281
282
				$latestChapter = end($json['chapters'])['chapter'];
283
284
				$latestChapterString = '';
285
				if($latestChapter['volume'] !== '0') {
286
					$latestChapterString .= "v{$latestChapter['volume']}/";
287
				}
288
				$latestChapterString .= "c{$latestChapter['chapter']}";
289
				if($latestChapter['subchapter'] !== '0') {
290
					$latestChapterString .= ".{$latestChapter['subchapter']}";
291
				}
292
293
				$titleData['latest_chapter'] = $latestChapterString;
294
295
				//No need to use date() here since this is already formatted as such.
296
				$titleData['last_updated'] = $latestChapter['updated'];
297
			}
298
		}
299
300
		return (!empty($titleData) ? $titleData : NULL);
301
	}
302
}
303