Completed
Pull Request — master (#186)
by
unknown
02:31
created

Base_FoolSlide_Site_Model::doCustomUpdate()   C

Complexity

Conditions 7
Paths 2

Size

Total Lines 40
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 0
Metric Value
cc 7
eloc 23
nc 2
nop 0
dl 0
loc 40
ccs 0
cts 21
cp 0
crap 56
rs 6.7272
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
class Tracker_Sites_Model extends CI_Model {
4 119
	public function __construct() {
5 119
		parent::__construct();
6 119
	}
7
8
	public function __get($name) {
9
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
10
		//      It's probably a minor speed reduction, but that isn't much of an issue.
11
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
12
		if(!class_exists($name) || !(in_array(get_parent_class($name), ['Base_Site_Model', 'Base_FoolSlide_Site_Model']))) {
13
			return get_instance()->{$name};
14
		} else {
15
			$this->loadSite($name);
16
			return $this->{$name};
17
		}
18
	}
19
20
	private function loadSite(string $siteName) {
21
		$this->{$siteName} = new $siteName();
22
	}
23
}
24
25
abstract class Base_Site_Model extends CI_Model {
26
	public $site          = '';
27
	public $titleFormat   = '';
28
	public $chapterFormat = '';
29
30
	/**
31
	 * 0: No custom updater.
32
	 * 1: Uses following page.
33
	 * 2: Uses latest releases page.
34
	 */
35
	public $customType = 0;
36
37 16
	public function __construct() {
38 16
		parent::__construct();
39
40 16
		$this->load->database();
41
42 16
		$this->site = get_class($this);
43 16
	}
44
45
	abstract public function getFullTitleURL(string $title_url) : string;
46
47
	abstract public function getChapterData(string $title_url, string $chapter) : array;
48
49
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
50
51 2
	final public function isValidTitleURL(string $title_url) : bool {
52 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
53 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
54 2
		return $success;
55
	}
56 2
	final public function isValidChapter(string $chapter) : bool {
57 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
58 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
59 2
		return $success;
60
	}
61
62
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
63
		$ch = curl_init();
64
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
65
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
66
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
67
		curl_setopt($ch, CURLOPT_HEADER, 1);
68
69
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
70
71
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
72
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
73
74
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
75
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
76
77
		//TODO: Check in a while if this being enabled still causes issues
78
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
79
80
		curl_setopt($ch, CURLOPT_URL, $url);
81
82
		if($isPost) {
83
			curl_setopt($ch,CURLOPT_POST, count($postFields));
84
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
85
		}
86
87
		$response = curl_exec($ch);
88
		if($response === FALSE) {
89
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
90
			//FIXME: We don't always account for FALSE return
91
			return FALSE;
92
		}
93
94
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
95
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
96
		$headers     = http_parse_headers(substr($response, 0, $header_size));
97
		$body        = substr($response, $header_size);
98
		curl_close($ch);
99
100
		return [
101
			'headers'     => $headers,
102
			'status_code' => $status_code,
103
			'body'        => $body
104
		];
105
	}
106
107
	/**
108
	 * @param array  $content
109
	 * @param string $title_url
110
	 * @param string $node_title_string
111
	 * @param string $node_row_string
112
	 * @param string $node_latest_string
113
	 * @param string $node_chapter_string
114
	 * @param string $failure_string
115
	 *
116
	 * @return DOMElement[]|false
117
	 */
118
	final protected function parseTitleDataDOM(
119
		$content, string $title_url,
120
		string $node_title_string, string $node_row_string,
121
		string $node_latest_string, string $node_chapter_string,
122
		string $failure_string = "") {
123
124
		if(!is_array($content)) {
125
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
126
		} else {
127
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
128
129
			if(!($status_code >= 200 && $status_code < 300)) {
130
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
131
			} else if(empty($data)) {
132
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
133
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
134
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
135
			} else {
136
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
137
138
				$dom = new DOMDocument();
139
				libxml_use_internal_errors(TRUE);
140
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
141
				libxml_use_internal_errors(FALSE);
142
143
				$xpath = new DOMXPath($dom);
144
				$nodes_title = $xpath->query($node_title_string);
145
				$nodes_row   = $xpath->query($node_row_string);
146
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
147
					$firstRow      = $nodes_row->item(0);
148
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
149
150
					if($node_chapter_string !== '') {
151
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
152
					} else {
153
						$nodes_chapter = $nodes_row;
154
					}
155
156
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
157
						return [
158
							'nodes_title'   => $nodes_title->item(0),
159
							'nodes_latest'  => $nodes_latest->item(0),
160
							'nodes_chapter' => $nodes_chapter->item(0)
161
						];
162
					} else {
163
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
164
					}
165
				} else {
166
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
167
				}
168
			}
169
		}
170
171
		return FALSE;
172
	}
173
174
	public function cleanTitleDataDOM(string $data) : string {
175
		return $data;
176
	}
177
178
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
179
		$titleData = [];
180
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
181
			if(is_array($content)) {
182
				if(array_key_exists('status_code', $content)) {
183
					$statusCode = $content['status_code'];
184
					if($statusCode === 200) {
185
						$isCallable = is_callable($successCallback);
186
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
187
							$titleData['followed'] = 'Y';
188
189
							log_message('info', "doCustomFollow succeeded for {$id}");
190
						} else {
191
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
192
						}
193
					} else {
194
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
195
					}
196
				} else {
197
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
198
				}
199
			} else {
200
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
201
			}
202
		}, $data, $extra);
203
		return $titleData;
204
	}
205
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {}
206
	public function doCustomUpdate() {}
207
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
208 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
209
		//FIXME: Make this more generic when we have more site support for it. MangaFox and Batoto have similar chapter formats.
210
211
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
212 12
		$status = FALSE;
213
214
		//Make sure we have a volume element
215 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
216 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
217
218 12
		$oldCount = count($oldChapterSegments);
219 12
		$newCount = count($newChapterSegments);
220 12
		if($newCount === $oldCount) {
221
			//Make sure chapter format looks correct.
222
			//NOTE: We only need to check newCount as we know oldCount is the same count.
223 12
			if($newCount === 2) {
224
				//FIXME: Can we loop this?
225 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
226 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
227
228
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
229 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
230 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
231
232 12
				$oldVolume = floatval($oldVolume);
233 12
				$newVolume = floatval($newVolume);
234
			} else {
235
				$oldVolume = 0;
236
				$newVolume = 0;
237
			}
238 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
239 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
240
241 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
242
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
243
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
244
245 4
				$status = TRUE;
246 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
247
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
248 1
				$status = TRUE;
249 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
250
				//$newVolume is higher, and chapter is higher so no need to check chapter.
251 2
				$status = TRUE;
252 5
			} elseif($newChapter > $oldChapter) {
253
				//$newVolume isn't higher, but chapter is.
254
				$status = TRUE;
255
			}
256
		}
257
258 12
		return $status;
259
	}
260
}
261
262
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
263
	public $titleFormat   = '/^[a-z0-9_-]+$/';
264
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
265
	public $customType    = 2;
266
267
	public $baseURL = '';
268
269
	public function getFullTitleURL(string $title_url) : string {
270
		return "{$this->baseURL}/series/{$title_url}";
271
	}
272
273
	public function getChapterData(string $title_url, string $chapter) : array {
274
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
275
		return [
276
			'url'    => "{$this->baseURL}/read/{$title_url}/{$chapter}/",
277
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
278
		];
279
	}
280
281
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
282
		$titleData = [];
283
284
		$jsonURL = "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
285
		if($content = $this->get_content($jsonURL)) {
286
			$json = json_decode($content['body'], TRUE);
287
			if($json && isset($json['chapters']) && count($json['chapters']) > 0) {
288
				$titleData['title'] = trim($json['comic']['name']);
289
290
				//FoolSlide title API doesn't appear to let you sort (yet every other API method which has chapters does, so we need to sort ourselves..
291
				usort($json['chapters'], function($a, $b) {
292
					return floatval("{$b['chapter']['chapter']}.{$b['chapter']['subchapter']}") <=> floatval("{$a['chapter']['chapter']}.{$a['chapter']['subchapter']}");
293
				});
294
				$latestChapter = reset($json['chapters'])['chapter'];
295
296
				$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
297
				if($latestChapter['subchapter'] !== '0') {
298
					$latestChapterString .= "/{$latestChapter['subchapter']}";
299
				}
300
				$titleData['latest_chapter'] = $latestChapterString;
301
302
				//No need to use date() here since this is already formatted as such.
303
				$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
304
			}
305
		}
306
307
		return (!empty($titleData) ? $titleData : NULL);
308
	}
309
310
	//Since we're just checking the latest updates page and not a following page, we just need to simulate a follow.
311
	//TODO: It would probably be better to have some kind of var which says that the custom update uses a following page..
312
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
313
		$content = ['status_code' => 200];
314
		$callback($content, $extra['id']);
315
	}
316
	public function doCustomUpdate() {
317
		$titleDataList = [];
318
319
		//NOTE: desc_created might not work well for sites that delay their releases (HelveticaScans for example)
320
		//      There shouldn't be much issue unless a site has a delay on a chapter long than it takes for 20 chapters to release (unlikely).
321
		$jsonURL = "{$this->baseURL}/api/reader/chapters/orderby/desc_created/format/json";
322
		if(($content = $this->get_content($jsonURL)) && $content['status_code'] == 200) {
323
			$json = json_decode($content['body'], TRUE);
324
325
			$parsedTitles = [];
326
			foreach($json['chapters'] as $chapterData) {
327
				if(!in_array($chapterData['comic']['stub'], $parsedTitles)) {
328
					$parsedTitles[] = $chapterData['comic']['stub'];
329
330
					$titleData = [];
331
					$titleData['title'] = trim($chapterData['comic']['name']);
332
333
					$latestChapter = $chapterData['chapter'];
334
335
					$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
336
					if($latestChapter['subchapter'] !== '0') {
337
						$latestChapterString .= "/{$latestChapter['subchapter']}";
338
					}
339
					$titleData['latest_chapter'] = $latestChapterString;
340
341
					//No need to use date() here since this is already formatted as such.
342
					$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
343
344
					$titleDataList[$chapterData['comic']['stub']] = $titleData;
345
				} else {
346
					//We already have title data for this title.
347
					continue;
348
				}
349
			}
350
		} else {
351
			log_message('error', "Custom updating failed for {$this->baseURL}.");
352
		}
353
354
		return $titleDataList;
355
	}
356
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {
357
		$oldChapterSegments = explode('/', $this->getChapterData('', $oldChapterString)['number']);
358
		$newChapterSegments = explode('/', $this->getChapterData('', $newChapterString)['number']);
359
360
		$status = $this->doCustomCheckCompare($oldChapterSegments, $newChapterSegments);
361
362
		return $status;
363
	}
364
}
365