Completed
Push — master ( fd57b4...81f48e )
by Angus
06:22
created

Sites_Model   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 44
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
dl 0
loc 44
ccs 22
cts 22
cp 1
rs 10
c 0
b 0
f 0
wmc 2
lcom 1
cbo 1

2 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 20 1
A loadSite() 0 3 1
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
abstract class Site_Model extends CI_Model {
4
	public $site          = '';
5
	public $titleFormat   = '';
6
	public $chapterFormat = '';
7
8 112
	public function __construct() {
9 112
		parent::__construct();
10
11 112
		$this->load->database();
12 112
	}
13
14
	abstract public function getFullTitleURL(string $title_url) : string;
15
16
	abstract public function getChapterData(string $title_url, string $chapter) : array;
17
18
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
19
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
20
21
	public function isValidTitleURL(string $title_url) : bool {
22
		$success = (bool) preg_match($this->titleFormat, $title_url);
23
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
24
		return $success;
25
	}
26
	public function isValidChapter(string $chapter) : bool {
27
		$success = (bool) preg_match($this->chapterFormat, $chapter);
28
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
29
		return $success;
30
	}
31
32 10
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
33 10
		$ch = curl_init();
34 10
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
35 10
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
36
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
37 10
		curl_setopt($ch, CURLOPT_HEADER, 1);
38
39 10
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
40
41 10
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
42 10
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
43
44
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
45 10
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
46
47
		//TODO: Check in a while if this being enabled still causes issues
48
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
49
50 10
		curl_setopt($ch, CURLOPT_URL, $url);
51
52 10
		if($isPost) {
53
			curl_setopt($ch,CURLOPT_POST, count($postFields));
54
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
55
		}
56
57 10
		$response = curl_exec($ch);
58 10
		if($response === FALSE) {
59
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
60
			//FIXME: We don't always account for FALSE return
61
			return FALSE;
62
		}
63
64 10
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
65 10
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
66 10
		$headers     = http_parse_headers(substr($response, 0, $header_size));
67 10
		$body        = substr($response, $header_size);
68 10
		curl_close($ch);
69
70
		return [
71 10
			'headers'     => $headers,
72 10
			'status_code' => $status_code,
73 10
			'body'        => $body
74
		];
75
	}
76
77
	/**
78
	 * @param array  $content
79
	 * @param string $title_url
80
	 * @param string $node_title_string
81
	 * @param string $node_row_string
82
	 * @param string $node_latest_string
83
	 * @param string $node_chapter_string
84
	 * @param string $failure_string
85
	 *
86
	 * @return DOMElement[]|false
87
	 */
88 9
	final protected function parseTitleDataDOM(
89
		$content, string $title_url,
90
		string $node_title_string, string $node_row_string,
91
		string $node_latest_string, string $node_chapter_string,
92
		string $failure_string = "") {
93
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
94
95 9
		if(!is_array($content)) {
96
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
97
		} else {
98 9
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
99 9
			$status_code = $content['status_code'];
100 9
			$data        = $content['body'];
101
102 9
			if(!($status_code >= 200 && $status_code < 300)) {
103
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
104 9
			} else if(empty($data)) {
105
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
106 9
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
107
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
108
			} else {
109 9
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
110
111 9
				$dom = new DOMDocument();
112 9
				libxml_use_internal_errors(TRUE);
113 9
				$dom->loadHTML($data);
114 9
				libxml_use_internal_errors(FALSE);
115
116 9
				$xpath = new DOMXPath($dom);
117 9
				$nodes_title = $xpath->query($node_title_string);
118 9
				$nodes_row   = $xpath->query($node_row_string);
119 9
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
120 9
					$firstRow      = $nodes_row->item(0);
121 9
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
122
123 9
					if($node_chapter_string !== '') {
124 8
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
125
					} else {
126 1
						$nodes_chapter = $nodes_row;
127
					}
128
129 9
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
130
						return [
131 9
							'nodes_title'   => $nodes_title->item(0),
132 9
							'nodes_latest'  => $nodes_latest->item(0),
133 9
							'nodes_chapter' => $nodes_chapter->item(0)
134
						];
135
					} else {
136
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
137
					}
138
				} else {
139
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
140
				}
141
			}
142
		}
143
144
		return FALSE;
145
	}
146
147 9
	public function cleanTitleDataDOM(string $data) : string {
148 9
		return $data;
149
	}
150
151
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
152 4
	final public function parseFoolSlide(string $fullURL, string $title_url) {
153 4
		$titleData = [];
154
155 4
		if($content = $this->get_content($fullURL)) {
156 4
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
157
158 4
			$data = $this->parseTitleDataDOM(
159
				$content,
160
				$title_url,
161 4
				"//div[@class='large comic']/h1[@class='title']",
162 4
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
163 4
				"div[@class='meta_r']",
164 4
				"div[@class='title']/a"
165
			);
166 4
			if($data) {
167 4
				$titleData['title'] = trim($data['nodes_title']->textContent);
168
169 4
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
170 4
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
171
172 4
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
173
			}
174
		}
175
176 4
		return (!empty($titleData) ? $titleData : NULL);
177
	}
178
179
	public function doCustomFollow(string $data = "", array $extra = []) {}
180
	public function doCustomUpdate() {}
181
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
182
}
183
class Sites_Model extends CI_Model {
184
	//FIXME: Is it possible to automatically generate this in some way or another?
185
	public $MangaFox;
186
	public $MangaHere;
187
	public $Batoto;
188
	public $DynastyScans;
189
	public $MangaPanda;
190
	public $MangaStream;
191
	public $WebToons;
192
	public $KissManga;
193
	public $KireiCake;
194
	public $GameOfScanlation;
195
	public $MangaCow;
196
	public $SeaOtterScans;
197
	public $HelveticaScans;
198
	public $SenseScans;
199
	public $JaiminisBox;
200
	public $DokiFansubs;
201
202 112
	public function __construct() {
203 112
		parent::__construct();
204
205 112
		$this->loadSite('MangaFox');
206 112
		$this->loadSite('MangaHere');
207 112
		$this->loadSite('Batoto');
208 112
		$this->loadSite('DynastyScans');
209 112
		$this->loadSite('MangaPanda');
210 112
		$this->loadSite('MangaStream');
211 112
		$this->loadSite('WebToons');
212 112
		$this->loadSite('KissManga');
213 112
		$this->loadSite('KireiCake');
214 112
		$this->loadSite('GameOfScanlation');
215 112
		$this->loadSite('MangaCow');
216 112
		$this->loadSite('SeaOtterScans');
217 112
		$this->loadSite('HelveticaScans');
218 112
		$this->loadSite('SenseScans');
219 112
		$this->loadSite('JaiminisBox');
220 112
		$this->loadSite('DokiFansubs');
221 112
	}
222
223 112
	private function loadSite(string $siteName) {
224 112
		$this->{$siteName} = new $siteName();
225 112
	}
226
}
227
228
class MangaFox extends Site_Model {
229
	public $site          = 'MangaFox';
230
	public $titleFormat   = '/^[a-z0-9_]+$/';
231
	public $chapterFormat = '/^(?:v[0-9a-zA-Z]+\/)?c[0-9\.]+$/';
232
233 1
	public function getFullTitleURL(string $title_url) : string {
234 1
		return "http://mangafox.me/manga/{$title_url}/";
235
	}
236
237
	public function getChapterData(string $title_url, string $chapter) : array {
238
		return [
239
			'url'    => "http://mangafox.me/manga/{$title_url}/{$chapter}/1.html",
240
			'number' => $chapter
241
		];
242
	}
243
244 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
245 1
		$titleData = [];
246
247 1
		$fullURL = $this->getFullTitleURL($title_url);
248 1
		$content = $this->get_content($fullURL);
249
250 1
		$data = $this->parseTitleDataDOM(
251
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 248 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
252
			$title_url,
253 1
			"//meta[@property='og:title']/@content",
254 1
			"//body/div[@id='page']/div[@class='left']/div[@id='chapters']/ul[1]/li[1]",
255 1
			"div/span[@class='date']",
256 1
			"div/h3/a"
257
		);
258 1
		if($data) {
259 1
			$titleData['title'] = html_entity_decode(substr($data['nodes_title']->textContent, 0, -6));
260
261 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
262 1
			$chapterURLSegments = explode('/', $link);
263 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
264 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
265
266 1
			if($firstGet) {
267
				$this->doCustomFollow($content['body']);
268
			}
269
		}
270
271 1
		return (!empty($titleData) ? $titleData : NULL);
272
	}
273
274
275
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
276
	public function doCustomFollow(string $data = "", array $extra = []) {
277
		preg_match('/var sid=(?<id>[0-9]+);/', $data, $matches);
278
279
		$formData = [
280
			'action' => 'add',
281
			'sid'    => $matches['id']
282
		];
283
284
		$cookies = [
285
			"mfvb_userid={$this->config->item('mangafox_userid')}",
286
			"mfvb_password={$this->config->item('mangafox_password')}",
287
			"bmsort=last_chapter"
288
		];
289
		$content = $this->get_content('http://mangafox.me/ajax/bookmark.php', implode("; ", $cookies), "", TRUE, TRUE, $formData);
290
291
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
292
	}
293
	public function doCustomUpdate() {
294
		$titleDataList = [];
295
296
		$cookies = [
297
			"mfvb_userid={$this->config->item('mangafox_userid')}",
298
			"mfvb_password={$this->config->item('mangafox_password')}",
299
			"bmsort=last_chapter",
300
			"bmorder=za"
301
		];
302
		$content = $this->get_content('http://mangafox.me/bookmark/?status=currentreading&sort=last_chapter&order=za', implode("; ", $cookies), "", TRUE);
303
304
		if(!is_array($content)) {
305
			log_message('error', "{$this->site} /bookmark | Failed to grab URL (See above curl error)");
306
		} else {
307
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
308
			$status_code = $content['status_code'];
309
			$data        = $content['body'];
310
311
			if(!($status_code >= 200 && $status_code < 300)) {
312
				log_message('error', "{$this->site} /bookmark | Bad Status Code ({$status_code})");
313
			} else if(empty($data)) {
314
				log_message('error', "{$this->site} /bookmark | Data is empty? (Status code: {$status_code})");
315
			} else {
316
				$data = preg_replace('/^[\s\S]+<ul id="bmlist">/', '<ul id="bmlist">', $data);
317
				$data = preg_replace('/<!-- end of bookmark -->[\s\S]+$/', '<!-- end of bookmark -->', $data);
318
319
				$dom = new DOMDocument();
320
				libxml_use_internal_errors(TRUE);
321
				$dom->loadHTML($data);
322
				libxml_use_internal_errors(FALSE);
323
324
				$xpath      = new DOMXPath($dom);
325
				$nodes_rows = $xpath->query("//ul[@id='bmlist']/li/div[@class='series_grp' and h2[@class='title']/span[@class='updatedch'] and dl]");
326
				if($nodes_rows->length > 0) {
327
					foreach($nodes_rows as $row) {
328
						$titleData = [];
329
330
						$nodes_title   = $xpath->query("h2[@class='title']/a[contains(@class, 'title')]", $row);
331
						$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
332
						$nodes_latest  = $xpath->query("dl/dt[1]/em/span[@class='timing']", $row);
333
334
						if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
335
							$title = $nodes_title->item(0);
336
337
							$titleData['title'] = trim($title->textContent);
338
339
340
							$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
341
							$chapterURLSegments = explode('/', $link);
342
							$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
343
344
							$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
345
346
							$title_url = explode('/', $title->getAttribute('href'))[4];
347
							$titleDataList[$title_url] = $titleData;
348
						} else {
349
							log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
350
						}
351
					}
352
				} else {
353
					log_message('error', '{$this->site} | Following list is empty?');
354
				}
355
			}
356
		}
357
		return $titleDataList;
358
	}
359
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {
360
		$status = FALSE;
361
362
		$oldChapterSegments = explode('/', $oldChapterString);
363
		$newChapterSegments = explode('/', $newChapterString);
364
365
		//Although it's rare, it's possible for new chapters to have a different amount of segments to the oldChapter (or vice versa).
366
		//Since this can cause errors, we just throw a fail.
367
		$count = count($newChapterSegments);
368
		if($count === count($oldChapterSegments)) {
369
			if($count === 2) {
370
				//FIXME: This feels like a mess.
371
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
372
				$newVolume = substr(array_shift($newChapterSegments), 1);
373
374
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA'])) $oldVolume = 999;
375
				if(in_array($newVolume, ['TBD', 'TBA', 'NA'])) $newVolume = 999;
376
377
				$oldVolume = floatval($oldVolume);
378
				$newVolume = floatval($newVolume);
379
			} else {
380
				$oldVolume = 0;
381
				$newVolume = 0;
382
			}
383
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
384
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
385
386
			if($newVolume > $oldVolume) {
387
				//$newVolume is higher, no need to check chapter.
388
				$status = TRUE;
389
			} elseif($newChapter > $oldChapter) {
390
				//$newVolume isn't higher, but chapter is.
391
				$status = TRUE;
392
			}
393
		}
394
395
		return $status;
396
	}
397
}
398
399
class MangaHere extends Site_Model {
400
	public $site          = 'MangaHere';
401
	public $titleFormat   = '/^[a-z0-9_]+$/';
402
	public $chapterFormat = '/^(?:v[0-9]+\/)?c[0-9]+(?:\.[0-9]+)?$/';
403
404 1
	public function getFullTitleURL(string $title_url) : string {
405 1
		return "http://www.mangahere.co/manga/{$title_url}/";
406
	}
407
408
	public function getChapterData(string $title, string $chapter) : array {
409
		return [
410
			'url'    => "http://www.mangahere.co/manga/{$title}/{$chapter}/",
411
			'number' => $chapter
412
		];
413
	}
414
415 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
416 1
		$titleData = [];
417
418 1
		$fullURL = $this->getFullTitleURL($title_url);
419 1
		$content = $this->get_content($fullURL);
420
421 1
		$data = $this->parseTitleDataDOM(
422
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 419 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
423
			$title_url,
424 1
			"//meta[@property='og:title']/@content",
425 1
			"//body/section/article/div/div[@class='manga_detail']/div[@class='detail_list']/ul[1]/li[1]",
426 1
			"span[@class='right']",
427 1
			"span[@class='left']/a",
428 1
			"<div class=\"error_text\">Sorry, the page you have requested can’t be found."
429
		);
430 1
		if($data) {
431 1
			$titleData['title'] = $data['nodes_title']->textContent;
432
433 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
434 1
			$chapterURLSegments = explode('/', $link);
435 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
436 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
437
		}
438
439 1
		return (!empty($titleData) ? $titleData : NULL);
440
	}
441
}
442
443
class Batoto extends Site_Model {
444
	//Batoto is a bit tricky to track. Unlike MangaFox and MangaHere, it doesn't store anything in the title_url, which means we have to get the data via other methods.
445
	//One problem we have though, is the tracker must support multiple sites, so this means we need to do some weird things to track Batoto.
446
	//title_url is stored like: "ID:--:LANGUAGE"
447
	//chapter_urls are stored like "CHAPTER_ID:--:CHAPTER_NUMBER"
448
449
	public $site          = 'Batoto';
450
	public $titleFormat   = '/^[0-9]+:--:(?:English|Spanish|French|German|Portuguese|Turkish|Indonesian|Greek|Filipino|Italian|Polish|Thai|Malay|Hungarian|Romanian|Arabic|Hebrew|Russian|Vietnamese|Dutch)$/';
451
	//FIXME: We're not validating the chapter name since we don't know what all the possible valid characters can be
452
	//       Preferably we'd just use /^[0-9a-z]+:--:(v[0-9]+\/)?c[0-9]+(\.[0-9]+)?$/
453
	public $chapterFormat = '/^[0-9a-z]+:--:.+$/';
454
455
	public function getFullTitleURL(string $title_string) : string {
456
		//FIXME: This does not point to the language specific title page. Should ask if it is possible to set LANG as arg?
457
		//FIXME: This points to a generic URL which will redirect according to the ID. Preferably we'd try and get the exact URL from the title, but we can't pass it here.
458
		$title_parts = explode(':--:', $title_string);
459
		return "http://bato.to/comic/_/comics/-r".$title_parts[0];
460
	}
461
462 View Code Duplication
	public function getChapterData(string $title_string, string $chapter) : array {
463
		//$title_string isn't used here.
464
465
		$chapter_parts = explode(':--:', $chapter);
466
		return [
467
			'url'    => "http://bato.to/reader#" . $chapter_parts[0],
468
			'number' => $chapter_parts[1]
469
		];
470
	}
471
472
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
473
		$titleData = [];
474
475
		$title_parts = explode(':--:', $title_url);
476
		$fullURL     = $this->getFullTitleURL($title_url);
477
		$lang        = $title_parts[1]; //TODO: Validate title_lang from array?
478
479
480
		//Bato.to is annoying and locks stuff behind auth. See: https://github.com/DakuTree/manga-tracker/issues/14#issuecomment-233830855
481
		$cookies = [
482
			"lang_option={$lang}",
483
			"member_id={$this->config->item('batoto_cookie_member_id')}",
484
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
485
		];
486
		$content = $this->get_content($fullURL, implode("; ", $cookies), "", TRUE);
487
488
		$data = $this->parseTitleDataDOM(
489
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($full...', $cookies), '', TRUE) on line 486 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
490
			$title_url,
491
			"//h1[@class='ipsType_pagetitle']",
492
			"//table[contains(@class, 'chapters_list')]/tbody/tr[2]",
493
			"td[last()]",
494
			"td/a[contains(@href,'reader')]",
495
			">Register now<"
496
		);
497
		if($data) {
498
			$titleData['title'] = html_entity_decode(trim($data['nodes_title']->textContent));
499
500
			///^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/
501
			preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($data['nodes_chapter']->nodeValue), $text);
502
			$titleData['latest_chapter'] = substr($data['nodes_chapter']->getAttribute('href'), 22) . ':--:' . ((!empty($text['volume']) ? 'v'.$text['volume'].'/' : '') . 'c'.$text['chapter'] . (!empty($text['extra']) ? '-'.$text['extra'] : ''));
503
504
			$dateString = $data['nodes_latest']->nodeValue;
505
			if($dateString == 'An hour ago') {
506
				$dateString = '1 hour ago';
507
			}
508
			$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
509
510
			if($firstGet && $lang == 'English') {
511
				//FIXME: English is forced due for now. See #78.
512
				$this->doCustomFollow($content['body'], ['id' => $title_parts[0], 'lang' => $lang]);
513
			}
514
		}
515
516
		return (!empty($titleData) ? $titleData : NULL);
517
	}
518
519
	public function cleanTitleDataDOM(string $data) : string {
520
		$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
521
		$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
522
		$data = preg_replace('/<div id=\'commentsStart\' class=\'ipsBox\'>[\s\S]+$/', '</div></div><!-- end mainContent -->', $data);
523
524
		return $data;
525
	}
526
527
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
528
	public function doCustomFollow(string $data = "", array $extra = []) {
529
		preg_match('/ipb\.vars\[\'secure_hash\'\]\s+=\s+\'(?<secure_hash>[0-9a-z]+)\';[\s\S]+ipb\.vars\[\'session_id\'\]\s+=\s+\'(?<session_id>[0-9a-z]+)\';/', $data, $text);
530
531
		$params = [
532
			's'          => $text['session_id'],
533
			'app'        => 'core',
534
			'module'     => 'ajax',
535
			'section'    => 'like',
536
			'do'         => 'save',
537
			'secure_key' => $text['secure_hash'],
538
			'f_app'      => 'ccs',
539
			'f_area'     => 'ccs_custom_database_3_records',
540
			'f_relid'    => $extra['id']
541
		];
542
		$formData = [
543
			'like_notify' => '0',
544
			'like_freq'   => 'immediate',
545
			'like_anon'   => '0'
546
		];
547
548
		$cookies = [
549
			"lang_option={$extra['lang']}",
550
			"member_id={$this->config->item('batoto_cookie_member_id')}",
551
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
552
		];
553
		$content = $this->get_content('http://bato.to/forums/index.php?'.http_build_query($params), implode("; ", $cookies), "", TRUE, TRUE, $formData);
554
555
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
556
	}
557
	public function doCustomUpdate() {
558
		return FALSE; /* FIXME: Bato.to is disabled for custom updates until we can fix https://github.com/DakuTree/manga-tracker/issues/78#issuecomment-269833624 */
559
560
		$titleDataList = [];
0 ignored issues
show
Unused Code introduced by
/* FIXME: Bato.to is dis...itleDataList = array(); does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
561
562
		$cookies = [
563
			"lang_option=English", //FIXME: English is forced due for now. See #78.
564
			"member_id={$this->config->item('batoto_cookie_member_id')}",
565
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
566
		];
567
		$content = $this->get_content("http://bato.to/myfollows", implode("; ", $cookies), "", TRUE);
568
		if(!is_array($content)) {
569
			log_message('error', "{$this->site} /myfollows | Failed to grab URL (See above curl error)");
570
		} else {
571
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
572
			$status_code = $content['status_code'];
573
			$data        = $content['body'];
574
575
			if(!($status_code >= 200 && $status_code < 300)) {
576
				log_message('error', "{$this->site} /myfollows | Bad Status Code ({$status_code})");
577
			} else if(empty($data)) {
578
				log_message('error', "{$this->site} /myfollows | Data is empty? (Status code: {$status_code})");
579
			} else {
580
				$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
581
				$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
582
583
				$dom = new DOMDocument();
584
				libxml_use_internal_errors(TRUE);
585
				$dom->loadHTML($data);
586
				libxml_use_internal_errors(FALSE);
587
588
				$xpath      = new DOMXPath($dom);
589
				$nodes_rows = $xpath->query("//table[contains(@class, 'chapters_list')]/tbody/tr[position()>1]");
590
				if($nodes_rows->length > 0) {
591
					foreach($nodes_rows as $row) {
592
						$titleData = [];
593
594
						$nodes_title   = $xpath->query("td[2]/a[1]", $row);
595
						$nodes_chapter = $xpath->query("td[2]/a[2]", $row);
596
						$nodes_lang    = $xpath->query("td[3]/div", $row);
597
						$nodes_latest  = $xpath->query("td[5]", $row);
598
599
						if($nodes_lang->length === 1 && $nodes_lang->item(0)->getAttribute('title') == 'English') {
600
							if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
601
								$title = $nodes_title->item(0);
602
603
								preg_match('/(?<id>[0-9]+)$/', $title->getAttribute('href'), $title_url_arr);
604
								$title_url = "{$title_url_arr['id']}:--:English"; //FIXME: English is currently forced, see #78
605
606
								if(!array_key_exists($title_url, $titleDataList)) {
607
									$titleData['title'] = trim($title->textContent);
608
609
									$chapter = $nodes_chapter->item(0);
610
									preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($chapter->nodeValue), $text);
611
									$titleData['latest_chapter'] = substr($chapter->getAttribute('href'), 8) . ':--:' . ((!empty($text['volume']) ? 'v' . $text['volume'] . '/' : '') . 'c' . $text['chapter'] . (!empty($text['extra']) ? '-' . $text['extra'] : ''));
612
613
									$dateString = $nodes_latest->item(0)->nodeValue;
614
									if($dateString == 'An hour ago') {
615
										$dateString = '1 hour ago';
616
									}
617
									$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
618
619
620
									$titleDataList[$title_url] = $titleData;
621
								}
622
							} else {
623
								log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
624
							}
625
						}
626
					}
627
				} else {
628
					log_message('error', '{$this->site} | Following list is empty?');
629
				}
630
			}
631
		}
632
		return $titleDataList;
633
	}
634
}
635
636
class DynastyScans extends Site_Model {
637
	//FIXME: This has some major issues. SEE: https://github.com/DakuTree/manga-tracker/issues/58
638
639
	public $site          = 'DynastyScans';
640
	public $titleFormat   = '/^[a-z0-9_]+:--:(?:0|1)$/';
641
	public $chapterFormat = '/^[0-9a-z_]+$/';
642
643 1
	public function getFullTitleURL(string $title_string) : string {
644 1
		$title_parts = explode(':--:', $title_string);
645 1
		$url_type = ($title_parts[1] == '0' ? 'series' : 'chapters');
646
647 1
		return 'http://dynasty-scans.com/'.$url_type.'/'.$title_parts[0];
648
	}
649
650
	public function getChapterData(string $title_string, string $chapter) : array {
651
		$title_parts = explode(':--:', $title_string);
652
		/* Known chapter url formats (# is numbers):
653
		       chapters_#A_#B - Ch#A-#B
654
		       ch_#A          - Ch#A
655
		       ch_#A_#B       - Ch#A.#B
656
		       <NOTHING>      - Oneshot (This is passed as "oneshot")
657
		*/
658
659
		$chapterData = [
660
			'url'    => 'http://dynasty-scans.com/chapters/' . $title_parts[0].'_'.$chapter,
661
			'number' => ''
662
		];
663
664
		if($chapter == 'oneshot') {
665
			$chapterData['number'] = 'oneshot';
666
		} else {
667
			$chapter = preg_replace("/^([a-zA-Z]+)/", '$1_', $chapter);
668
			$chapterSegments = explode('_', $chapter);
669
			switch($chapterSegments[0]) {
670
				case 'ch':
671
					$chapterData['number'] = 'c'.$chapterSegments[1].(isset($chapterSegments[2]) && !empty($chapterSegments[2]) ? '.'.$chapterSegments[2] : '');
672
					break;
673
674
				case 'chapters':
675
					//This is barely ever used, but I have seen it.
676
					$chapterData['number'] = 'c'.$chapterSegments[1].'-'.$chapterSegments[2];
677
					break;
678
679
				default:
680
					//TODO: FALLBACK, ALERT ADMIN?
681
					$chapterData['number'] = $chapter;
682
					break;
683
			}
684
		}
685
		return $chapterData;
686
	}
687
688 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
689 1
		$titleData = [];
690
691 1
		$fullURL = $this->getFullTitleURL($title_url);
692 1
		$content = $this->get_content($fullURL);
693
694 1
		$title_parts = explode(':--:', $title_url);
695 1
		switch($title_parts[1]) {
696 1
			case '0':
697
				//Normal series.
698 1
				$data = $this->parseTitleDataDOM(
699
					$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 692 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
700
					$title_url,
701 1
					"//h2[@class='tag-title']/b[1]",
702 1
					"(//dl[@class='chapter-list']/dd[a[contains(@href,'/chapters/')]])[last()]",
703 1
					"small",
704 1
					"a[@class='name']"
705
				);
706 1
				if($data) {
707 1
					$titleData['title'] = $data['nodes_title']->textContent;
708
					//In cases where the series is a doujin, try and prepend the copyright.
709 1
					preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $content['body'], $matchesD);
710 1
					if(!empty($matchedD) && substr($matchesD[1], 0, -7) !== 'Original') {
0 ignored issues
show
Bug introduced by
The variable $matchedD seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
711
						$titleData['title'] = substr($matchesD[1], 0, -7).' - '.$titleData['title'];
712
					}
713
714 1
					$chapterURLSegments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
715 1
					if (strpos($chapterURLSegments[2], $title_parts[0]) !== false) {
716 1
						$titleData['latest_chapter'] = substr($chapterURLSegments[2], strlen($title_parts[0]) + 1);
717
					} else {
718
						$titleData['latest_chapter'] = $chapterURLSegments[2];
719
					}
720
721 1
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime(str_replace("'", '', substr((string) $data['nodes_latest']->textContent, 9))));
722
				}
723 1
				break;
724
725
			case '1':
726
				//Oneshot.
727
				$data = $content['body'];
728
729
				preg_match('/<b>.*<\/b>/', $data, $matchesT);
730
				preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $data, $matchesD);
731
				$titleData['title'] = (!empty($matchesD) ? ($matchesD[1] !== 'Original' ? $matchesD[1].' - ' : '') : '') . substr($matchesT[0], 3, -4);
732
733
				$titleData['latest_chapter'] = 'oneshot'; //This will never change
734
735
				preg_match('/<i class="icon-calendar"><\/i> (.*)<\/span>/', $data, $matches);
736
				$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime($matches[1]));
737
738
				//Oneshots are special, and really shouldn't need to be re-tracked
739
				$titleData['status'] = '2';
740
				break;
741
742
			default:
743
				//something went wrong
744
				break;
745
		}
746 1
		return (!empty($titleData) ? $titleData : NULL);
747
	}
748
}
749
750
class MangaPanda extends Site_Model {
751
	public $site          = 'MangaPanda';
752
	//NOTE: MangaPanda has manga pages under the root URL, so we need to filter out pages we know that aren't manga.
753
	public $titleFormat   = '/^(?!(?:latest|search|popular|random|alphabetical|privacy)$)([a-z0-9-]+)$/';
754
	public $chapterFormat = '/^[0-9]+$/';
755
756 1
	public function getFullTitleURL(string $title_url) : string {
757 1
		return "http://www.mangapanda.com/{$title_url}";
758
	}
759
760
	public function getChapterData(string $title_url, string $chapter) : array {
761
		return [
762
			'url'    => "http://www.mangapanda.com/{$title_url}/{$chapter}/",
763
			'number' => 'c'.$chapter
764
		];
765
	}
766
767 1 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
768 1
		$titleData = [];
769
770 1
		$fullURL = $this->getFullTitleURL($title_url);
771 1
		$content = $this->get_content($fullURL);
772
773 1
		$data = $this->parseTitleDataDOM(
774
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 771 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
775
			$title_url,
776 1
			"//h2[@class='aname']",
777 1
			"(//table[@id='listing']/tr)[last()]",
778 1
			"td[2]",
779 1
			"td[1]/a"
780
		);
781 1
		if($data) {
782 1
			$titleData['title'] = $data['nodes_title']->textContent;
783
784 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
785
786 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
787
		}
788
789 1
		return (!empty($titleData) ? $titleData : NULL);
790
	}
791
}
792
793
class MangaStream extends Site_Model {
794
	public $site          = 'MangaStream';
795
	public $titleFormat   = '/^[a-z0-9_]+$/';
796
	public $chapterFormat = '/^(.*?)\/[0-9]+$/';
797
798
	public function getFullTitleURL(string $title_url) : string {
799
		return "https://mangastream.com/manga/{$title_url}/";
800
	}
801
802
	public function getChapterData(string $title_url, string $chapter) : array {
803
		return [
804
			'url'    => "https://mangastream.com/r/{$title_url}/{$chapter}",
805
			'number' => 'c'.explode('/', $chapter)[0]
806
		];
807
	}
808
809 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
810
		$titleData = [];
811
812
		$fullURL = $this->getFullTitleURL($title_url);
813
		$content = $this->get_content($fullURL);
814
815
		$data = $this->parseTitleDataDOM(
816
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 813 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
817
			$title_url,
818
			"//div[contains(@class, 'content')]/div[1]/h1",
819
			"//div[contains(@class, 'content')]/div[1]/table/tr[2]",
820
			"td[2]",
821
			"td[1]/a",
822
			"<h1>Page Not Found</h1>"
823
		);
824
		if($data) {
825
			$titleData['title'] = $data['nodes_title']->textContent;
826
827
			$titleData['latest_chapter'] = preg_replace('/^.*\/(.*?\/[0-9]+)\/[0-9]+$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
828
829
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
830
		}
831
832
		return (!empty($titleData) ? $titleData : NULL);
833
	}
834
}
835
836
class WebToons extends Site_Model {
837
	/* Webtoons.com has a very weird and pointless URL format.
838
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
839
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
840
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
841
842
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
843
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
844
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
845
846
	   FORMATS:
847
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
848
	   CHAPTER:   ID:--:CHAPTER_N
849
	*/
850
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
851
852
	public $site          = 'WebToons';
853
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi)$/';
854
	public $chapterFormat = '/^[0-9]+:--:.*$/';
855
856
	public function getFullTitleURL(string $title_url) : string {
857
		$title_parts = explode(':--:', $title_url);
858
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}/";
859
	}
860
861
	public function getChapterData(string $title_url, string $chapter) : array {
862
		$title_parts   = explode(':--:', $title_url);
863
		$chapter_parts = explode(':--:', $chapter);
864
865
		return [
866
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
867
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
868
		];
869
	}
870
871 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
872 1
		$titleData = [];
873
874
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
875
876
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
877 1
		$title_parts = explode(':--:', $title_url);
878 1
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
879
880 1
		$content = $this->get_content($fullURL);
881 1
		$data = $content['body'];
882 1
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
883 1
			$xml = simplexml_load_string($data) or die("Error: Cannot create object");
884 1
			if(isset($xml->{'channel'}->item[0])) {
885 1
				$titleData['title'] = trim((string) $xml->{'channel'}->title);
886
887 1
				$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
888 1
				$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
889 1
				$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
890
			}
891
		} else {
892
			log_message('error', "Series missing? (WebToons): {$title_url}");
893
			return NULL;
894
		}
895
896 1
		return (!empty($titleData) ? $titleData : NULL);
897
	}
898
}
899
900
class KissManga extends Site_Model {
901
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
902
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
903
904
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
905
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
906
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
907
908
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
909
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
910
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
911
	*/
912
913
	public $site          = 'KissManga';
914
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
915
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
916
917
	public function getFullTitleURL(string $title_url) : string {
918
		return "http://kissmanga.com/Manga/{$title_url}";
919
	}
920
921 View Code Duplication
	public function getChapterData(string $title_url, string $chapter) : array {
922
		$chapter_parts = explode(':--:', $chapter);
923
924
		return [
925
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
926
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
927
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
928
		];
929
	}
930
931
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
932
		$titleData = [];
933
934
		//Check if cookiejar is a day old (so we can know if something went wrong)
935
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
936
		$cookie_last_updated = filemtime($cookiejar_path);
937
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
938
939
			$fullURL = $this->getFullTitleURL($title_url);
940
941
			$content = $this->get_content($fullURL, '', $cookiejar_path);
942
			$data = $content['body'];
943
			if(strpos($data, 'containerRoot') !== FALSE) {
944
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
945
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
946
947
				$dom = new DOMDocument();
948
				libxml_use_internal_errors(true);
949
				$dom->loadHTML($data);
950
				libxml_use_internal_errors(false);
951
952
				$xpath = new DOMXPath($dom);
953
954
				$nodes_title = $xpath->query("//a[@class='bigChar']");
955
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
956
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
957
					$titleData['title'] = $nodes_title->item(0)->textContent;
958
959
					$firstRow      = $nodes_row->item(0);
960
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
961
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
962
963
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
964
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
965
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
966
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
967
				}
968
			} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
969
				//TODO: Throw ERRORS;
970
			}
971
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
972
			//Do nothing, wait until next update.
973
			//TODO: NAG ADMIN??
974
		}
975
976
		return (!empty($titleData) ? $titleData : NULL);
977
	}
978
}
979
980
class GameOfScanlation extends Site_Model {
981
	public $site          = 'GameOfScanlation';
982
	public $titleFormat   = '/^[a-z0-9\.-]+$/';
983
	public $chapterFormat = '/^[a-z0-9\.-]+$/';
984
985
	public function getFullTitleURL(string $title_url) : string {
986
		/* NOTE: GoS is a bit weird in that it has two separate title URL formats. One uses /projects/ and the other uses /fourms/.
987
		         The bad thing is these are interchangeable, despite them showing the exact same listing page.
988
		         Thankfully the title_url of manga which use /forums/ seem to be appended with ".%ID%" which means we can easily check them. */
989
990
		if (strpos($title_url, '.') !== FALSE) {
991
			$format = "https://gameofscanlation.moe/forums/{$title_url}/";
992
		} else {
993
			$format = "https://gameofscanlation.moe/projects/{$title_url}/";
994
		}
995
		return $format;
996
	}
997
998
	public function getChapterData(string $title_url, string $chapter) : array {
999
		return [
1000
			'url'    => "https://gameofscanlation.moe/projects/".preg_replace("/\\.[0-9]+$/", "", $title_url).'/'.$chapter.'/',
1001
			'number' => preg_replace("/chapter-/", "c", preg_replace("/\\.[0-9]+$/", "", $chapter))
1002
		];
1003
	}
1004
1005 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1006
		$titleData = [];
1007
1008
		$fullURL = $this->getFullTitleURL($title_url);
1009
1010
		$content = $this->get_content($fullURL);
1011
1012
		$data = $this->parseTitleDataDOM(
1013
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1010 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1014
			$title_url,
1015
			"//meta[@property='og:title']",
1016
			"//ol[@class='discussionListItems']/li[1]/div[@class='home_list']/ul/li/div[@class='list_press_text']",
1017
			"p[@class='author']/span|p[@class='author']/abbr",
1018
			"p[@class='text_work']/a"
1019
		);
1020
		if($data) {
1021
			$titleData['title'] = trim(html_entity_decode($data['nodes_title']->getAttribute('content')));
1022
1023
			$titleData['latest_chapter'] = preg_replace('/^projects\/.*?\/(.*?)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
1024
1025
			$titleData['last_updated'] =  date("Y-m-d H:i:s",(int) $data['nodes_latest']->getAttribute('title'));
1026
		}
1027
1028
		return (!empty($titleData) ? $titleData : NULL);
1029
	}
1030
}
1031
1032
class MangaCow extends Site_Model {
1033
	public $site          = 'MangaCow';
1034
	public $titleFormat   = '/^[a-zA-Z0-9_]+$/';
1035
	public $chapterFormat = '/^[0-9]+$/';
1036
1037 1
	public function getFullTitleURL(string $title_url) : string {
1038 1
		return "http://mngcow.co/{$title_url}/";
1039
	}
1040
1041
	public function getChapterData(string $title_url, string $chapter) : array {
1042
		return [
1043
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
1044
			'number' => "c{$chapter}"
1045
		];
1046
	}
1047
1048 1 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1049 1
		$titleData = [];
1050
1051 1
		$fullURL = $this->getFullTitleURL($title_url);
1052
1053 1
		$content = $this->get_content($fullURL);
1054
1055 1
		$data = $this->parseTitleDataDOM(
1056
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1053 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1057
			$title_url,
1058 1
			"//h4",
1059 1
			"//ul[contains(@class, 'mng_chp')]/li[1]/a[1]",
1060 1
			"b[@class='dte']",
1061 1
			"",
1062 1
			"404 Page Not Found"
1063
		);
1064 1
		if($data) {
1065 1
			$titleData['title'] = trim($data['nodes_title']->textContent);
1066
1067 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
1068
1069 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) substr($data['nodes_latest']->getAttribute('title'), 13)));
1070
		}
1071
1072 1
		return (!empty($titleData) ? $titleData : NULL);
1073
	}
1074
}
1075
1076
/*** FoolSlide sites ***/
1077
1078 View Code Duplication
class KireiCake extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1079
	public $site          = 'KireiCake';
1080
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1081
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1082
1083 1
	public function getFullTitleURL(string $title_url) : string {
1084 1
		return "https://reader.kireicake.com/series/{$title_url}";
1085
	}
1086
1087
	public function getChapterData(string $title_url, string $chapter) : array {
1088
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1089
		$chapter_parts = explode('/', $chapter);
1090
		return [
1091
			'url'    => "https://reader.kireicake.com/read/{$title_url}/{$chapter}/",
1092
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1093
		];
1094
	}
1095
1096 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1097 1
		$fullURL = $this->getFullTitleURL($title_url);
1098 1
		return $this->parseFoolSlide($fullURL, $title_url);
1099
	}
1100
}
1101
1102 View Code Duplication
class SeaOtterScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1103
	public $site          = 'SeaOtterScans';
1104
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1105
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1106
1107 1
	public function getFullTitleURL(string $title_url) : string {
1108 1
		return "https://reader.seaotterscans.com/series/{$title_url}";
1109
	}
1110
1111
	public function getChapterData(string $title_url, string $chapter) : array {
1112
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1113
		$chapter_parts = explode('/', $chapter);
1114
		return [
1115
			'url'    => "https://reader.seaotterscans.com/read/{$title_url}/{$chapter}/",
1116
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1117
		];
1118
	}
1119
1120 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1121 1
		$fullURL = $this->getFullTitleURL($title_url);
1122 1
		return $this->parseFoolSlide($fullURL, $title_url);
1123
	}
1124
}
1125
1126 View Code Duplication
class HelveticaScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1127
	public $site          = 'HelveticaScans';
1128
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1129
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1130
1131 1
	public function getFullTitleURL(string $title_url) : string {
1132 1
		return "http://helveticascans.com/reader/series/{$title_url}";
1133
	}
1134
1135
	public function getChapterData(string $title_url, string $chapter) : array {
1136
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1137
		$chapter_parts = explode('/', $chapter);
1138
		return [
1139
			'url'    => "http://helveticascans.com/reader/read/{$title_url}/{$chapter}/",
1140
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1141
		];
1142
	}
1143
1144 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1145 1
		$fullURL = $this->getFullTitleURL($title_url);
1146 1
		return $this->parseFoolSlide($fullURL, $title_url);
1147
	}
1148
}
1149
1150 View Code Duplication
class SenseScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1151
	public $site          = 'SenseScans';
1152
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1153
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1154
1155 1
	public function getFullTitleURL(string $title_url) : string {
1156 1
		return "http://reader.sensescans.com/series/{$title_url}";
1157
	}
1158
1159
	public function getChapterData(string $title_url, string $chapter) : array {
1160
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1161
		$chapter_parts = explode('/', $chapter);
1162
		return [
1163
			'url'    => "http://reader.sensescans.com/read/{$title_url}/{$chapter}/",
1164
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1165
		];
1166
	}
1167
1168 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1169 1
		$fullURL = $this->getFullTitleURL($title_url);
1170 1
		return $this->parseFoolSlide($fullURL, $title_url);
1171
	}
1172
}
1173
1174 View Code Duplication
class JaiminisBox extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1175
	public $site          = 'JaiminisBox';
1176
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1177
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1178
1179
	public function getFullTitleURL(string $title_url) : string {
1180
		return "https://jaiminisbox.com/reader/series/{$title_url}";
1181
	}
1182
1183
	public function getChapterData(string $title_url, string $chapter) : array {
1184
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1185
		$chapter_parts = explode('/', $chapter);
1186
		return [
1187
			'url'    => "https://jaiminisbox.com/reader/read/{$title_url}/{$chapter}/",
1188
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1189
		];
1190
	}
1191
1192
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1193
		$fullURL = $this->getFullTitleURL($title_url);
1194
		return $this->parseFoolSlide($fullURL, $title_url);
1195
	}
1196
}
1197
1198 View Code Duplication
class DokiFansubs extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1199
	public $site          = 'DokiFansubs';
1200
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1201
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1202
1203
	public function getFullTitleURL(string $title_url) : string {
1204
		return "https://kobato.hologfx.com/reader/series/{$title_url}";
1205
	}
1206
1207
	public function getChapterData(string $title_url, string $chapter) : array {
1208
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1209
		$chapter_parts = explode('/', $chapter);
1210
		return [
1211
			'url'    => "https://kobato.hologfx.com/reader/read/{$title_url}/{$chapter}/",
1212
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1213
		];
1214
	}
1215
1216
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1217
		$fullURL = $this->getFullTitleURL($title_url);
1218
		return $this->parseFoolSlide($fullURL, $title_url);
1219
	}
1220
}
1221