Completed
Push — master ( d30a44...39481e )
by Angus
03:37
created

DemonicScans::getTitleData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 3

Duplication

Lines 4
Ratio 100 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 3
nc 1
nop 2
dl 4
loc 4
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
abstract class Site_Model extends CI_Model {
4
	public $site          = '';
5
	public $titleFormat   = '';
6
	public $chapterFormat = '';
7
8 11
	public function __construct() {
9 11
		parent::__construct();
10
11 11
		$this->load->database();
12
		
13 11
		$this->site = get_class($this);
14 11
	}
15
16
	abstract public function getFullTitleURL(string $title_url) : string;
17
18
	abstract public function getChapterData(string $title_url, string $chapter) : array;
19
20
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
21
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
22
23
	public function isValidTitleURL(string $title_url) : bool {
24
		$success = (bool) preg_match($this->titleFormat, $title_url);
25
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
26
		return $success;
27
	}
28
	public function isValidChapter(string $chapter) : bool {
29
		$success = (bool) preg_match($this->chapterFormat, $chapter);
30
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
31
		return $success;
32
	}
33
34 11
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
35 11
		$ch = curl_init();
36 11
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
37 11
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
38
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
39 11
		curl_setopt($ch, CURLOPT_HEADER, 1);
40
41 11
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
42
43 11
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
44 11
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
45
46
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
47 11
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
48
49
		//TODO: Check in a while if this being enabled still causes issues
50
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
51
52 11
		curl_setopt($ch, CURLOPT_URL, $url);
53
54 11
		if($isPost) {
55
			curl_setopt($ch,CURLOPT_POST, count($postFields));
56
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
57
		}
58
59 11
		$response = curl_exec($ch);
60 11
		if($response === FALSE) {
61
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
62
			//FIXME: We don't always account for FALSE return
63
			return FALSE;
64
		}
65
66 11
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
67 11
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
68 11
		$headers     = http_parse_headers(substr($response, 0, $header_size));
69 11
		$body        = substr($response, $header_size);
70 11
		curl_close($ch);
71
72
		return [
73 11
			'headers'     => $headers,
74 11
			'status_code' => $status_code,
75 11
			'body'        => $body
76
		];
77
	}
78
79
	/**
80
	 * @param array  $content
81
	 * @param string $title_url
82
	 * @param string $node_title_string
83
	 * @param string $node_row_string
84
	 * @param string $node_latest_string
85
	 * @param string $node_chapter_string
86
	 * @param string $failure_string
87
	 *
88
	 * @return DOMElement[]|false
89
	 */
90 10
	final protected function parseTitleDataDOM(
91
		$content, string $title_url,
92
		string $node_title_string, string $node_row_string,
93
		string $node_latest_string, string $node_chapter_string,
94
		string $failure_string = "") {
95
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
96
97 10
		if(!is_array($content)) {
98
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
99
		} else {
100 10
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
101 10
			$status_code = $content['status_code'];
102 10
			$data        = $content['body'];
103
104 10
			if(!($status_code >= 200 && $status_code < 300)) {
105
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
106 10
			} else if(empty($data)) {
107
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
108 10
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
109
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
110
			} else {
111 10
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
112
113 10
				$dom = new DOMDocument();
114 10
				libxml_use_internal_errors(TRUE);
115 10
				$dom->loadHTML($data);
116 10
				libxml_use_internal_errors(FALSE);
117
118 10
				$xpath = new DOMXPath($dom);
119 10
				$nodes_title = $xpath->query($node_title_string);
120 10
				$nodes_row   = $xpath->query($node_row_string);
121 10
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
122 10
					$firstRow      = $nodes_row->item(0);
123 10
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
124
125 10
					if($node_chapter_string !== '') {
126 9
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
127
					} else {
128 1
						$nodes_chapter = $nodes_row;
129
					}
130
131 10
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
132
						return [
133 10
							'nodes_title'   => $nodes_title->item(0),
134 10
							'nodes_latest'  => $nodes_latest->item(0),
135 10
							'nodes_chapter' => $nodes_chapter->item(0)
136
						];
137
					} else {
138
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
139
					}
140
				} else {
141
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
142
				}
143
			}
144
		}
145
146
		return FALSE;
147
	}
148
149 10
	public function cleanTitleDataDOM(string $data) : string {
150 10
		return $data;
151
	}
152
153
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
154 5
	final public function parseFoolSlide(string $fullURL, string $title_url) {
155 5
		$titleData = [];
156
157 5
		if($content = $this->get_content($fullURL)) {
158 5
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
159
160 5
			$data = $this->parseTitleDataDOM(
161
				$content,
162
				$title_url,
163 5
				"//div[@class='large comic']/h1[@class='title']",
164 5
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
165 5
				"div[@class='meta_r']",
166 5
				"div[@class='title']/a"
167
			);
168 5
			if($data) {
169 5
				$titleData['title'] = trim($data['nodes_title']->textContent);
170
171 5
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
172 5
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
173
174 5
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
175
			}
176
		}
177
178 5
		return (!empty($titleData) ? $titleData : NULL);
179
	}
180
181
	public function doCustomFollow(string $data = "", array $extra = []) {}
182
	public function doCustomUpdate() {}
183
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
184
}
185
class Sites_Model extends CI_Model {
186 11
	public function __get($name) {
187
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
188
		//      It's probably a minor speed reduction, but that isn't much of an issue.
189
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
190 11
		if(!class_exists($name) || !(get_parent_class($name) === 'Site_Model')) {
191
			parent::__get($name);
192
			return FALSE;
193
		} else {
194 11
			$this->loadSite($name);
195 11
			return $this->{$name};
196
		}
197
	}
198
199 11
	private function loadSite(string $siteName) {
200 11
		$this->{$siteName} = new $siteName();
201 11
	}
202
}
203
204
class MangaFox extends Site_Model {
205
	public $titleFormat   = '/^[a-z0-9_]+$/';
206
	public $chapterFormat = '/^(?:v[0-9a-zA-Z]+\/)?c[0-9\.]+$/';
207
208 1
	public function getFullTitleURL(string $title_url) : string {
209 1
		return "http://mangafox.me/manga/{$title_url}/";
210
	}
211
212
	public function getChapterData(string $title_url, string $chapter) : array {
213
		return [
214
			'url'    => "http://mangafox.me/manga/{$title_url}/{$chapter}/1.html",
215
			'number' => $chapter
216
		];
217
	}
218
219 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
220 1
		$titleData = [];
221
222 1
		$fullURL = $this->getFullTitleURL($title_url);
223 1
		$content = $this->get_content($fullURL);
224
225 1
		$data = $this->parseTitleDataDOM(
226
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 223 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
227
			$title_url,
228 1
			"//meta[@property='og:title']/@content",
229 1
			"//body/div[@id='page']/div[@class='left']/div[@id='chapters']/ul[1]/li[1]",
230 1
			"div/span[@class='date']",
231 1
			"div/h3/a"
232
		);
233 1
		if($data) {
234 1
			$titleData['title'] = html_entity_decode(substr($data['nodes_title']->textContent, 0, -6));
235
236 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
237 1
			$chapterURLSegments = explode('/', $link);
238 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
239 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
240
241 1
			if($firstGet) {
242
				$this->doCustomFollow($content['body']);
243
			}
244
		}
245
246 1
		return (!empty($titleData) ? $titleData : NULL);
247
	}
248
249
250
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
251
	public function doCustomFollow(string $data = "", array $extra = []) {
252
		preg_match('/var sid=(?<id>[0-9]+);/', $data, $matches);
253
254
		$formData = [
255
			'action' => 'add',
256
			'sid'    => $matches['id']
257
		];
258
259
		$cookies = [
260
			"mfvb_userid={$this->config->item('mangafox_userid')}",
261
			"mfvb_password={$this->config->item('mangafox_password')}",
262
			"bmsort=last_chapter"
263
		];
264
		$content = $this->get_content('http://mangafox.me/ajax/bookmark.php', implode("; ", $cookies), "", TRUE, TRUE, $formData);
265
266
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
267
	}
268
	public function doCustomUpdate() {
269
		$titleDataList = [];
270
271
		$cookies = [
272
			"mfvb_userid={$this->config->item('mangafox_userid')}",
273
			"mfvb_password={$this->config->item('mangafox_password')}",
274
			"bmsort=last_chapter",
275
			"bmorder=za"
276
		];
277
		$content = $this->get_content('http://mangafox.me/bookmark/?status=currentreading&sort=last_chapter&order=za', implode("; ", $cookies), "", TRUE);
278
279
		if(!is_array($content)) {
280
			log_message('error', "{$this->site} /bookmark | Failed to grab URL (See above curl error)");
281
		} else {
282
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
283
			$status_code = $content['status_code'];
284
			$data        = $content['body'];
285
286
			if(!($status_code >= 200 && $status_code < 300)) {
287
				log_message('error', "{$this->site} /bookmark | Bad Status Code ({$status_code})");
288
			} else if(empty($data)) {
289
				log_message('error', "{$this->site} /bookmark | Data is empty? (Status code: {$status_code})");
290
			} else {
291
				$data = preg_replace('/^[\s\S]+<ul id="bmlist">/', '<ul id="bmlist">', $data);
292
				$data = preg_replace('/<!-- end of bookmark -->[\s\S]+$/', '<!-- end of bookmark -->', $data);
293
294
				$dom = new DOMDocument();
295
				libxml_use_internal_errors(TRUE);
296
				$dom->loadHTML($data);
297
				libxml_use_internal_errors(FALSE);
298
299
				$xpath      = new DOMXPath($dom);
300
				$nodes_rows = $xpath->query("//ul[@id='bmlist']/li/div[@class='series_grp' and h2[@class='title']/span[@class='updatedch'] and dl]");
301
				if($nodes_rows->length > 0) {
302
					foreach($nodes_rows as $row) {
303
						$titleData = [];
304
305
						$nodes_title   = $xpath->query("h2[@class='title']/a[contains(@class, 'title')]", $row);
306
						$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
307
						$nodes_latest  = $xpath->query("dl/dt[1]/em/span[@class='timing']", $row);
308
309
						if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
310
							$title = $nodes_title->item(0);
311
312
							$titleData['title'] = trim($title->textContent);
313
314
315
							$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
316
							$chapterURLSegments = explode('/', $link);
317
							$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
318
319
							$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
320
321
							$title_url = explode('/', $title->getAttribute('href'))[4];
322
							$titleDataList[$title_url] = $titleData;
323
						} else {
324
							log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
325
						}
326
					}
327
				} else {
328
					log_message('error', '{$this->site} | Following list is empty?');
329
				}
330
			}
331
		}
332
		return $titleDataList;
333
	}
334
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {
335
		$status = FALSE;
336
337
		$oldChapterSegments = explode('/', $oldChapterString);
338
		$newChapterSegments = explode('/', $newChapterString);
339
340
		//Although it's rare, it's possible for new chapters to have a different amount of segments to the oldChapter (or vice versa).
341
		//Since this can cause errors, we just throw a fail.
342
		$count = count($newChapterSegments);
343
		if($count === count($oldChapterSegments)) {
344
			if($count === 2) {
345
				//FIXME: This feels like a mess.
346
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
347
				$newVolume = substr(array_shift($newChapterSegments), 1);
348
349
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA'])) $oldVolume = 999;
350
				if(in_array($newVolume, ['TBD', 'TBA', 'NA'])) $newVolume = 999;
351
352
				$oldVolume = floatval($oldVolume);
353
				$newVolume = floatval($newVolume);
354
			} else {
355
				$oldVolume = 0;
356
				$newVolume = 0;
357
			}
358
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
359
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
360
361
			if($newVolume > $oldVolume) {
362
				//$newVolume is higher, no need to check chapter.
363
				$status = TRUE;
364
			} elseif($newChapter > $oldChapter) {
365
				//$newVolume isn't higher, but chapter is.
366
				$status = TRUE;
367
			}
368
		}
369
370
		return $status;
371
	}
372
}
373
374
class MangaHere extends Site_Model {
375
	public $titleFormat   = '/^[a-z0-9_]+$/';
376
	public $chapterFormat = '/^(?:v[0-9]+\/)?c[0-9]+(?:\.[0-9]+)?$/';
377
378 1
	public function getFullTitleURL(string $title_url) : string {
379 1
		return "http://www.mangahere.co/manga/{$title_url}/";
380
	}
381
382
	public function getChapterData(string $title, string $chapter) : array {
383
		return [
384
			'url'    => "http://www.mangahere.co/manga/{$title}/{$chapter}/",
385
			'number' => $chapter
386
		];
387
	}
388
389 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
390 1
		$titleData = [];
391
392 1
		$fullURL = $this->getFullTitleURL($title_url);
393 1
		$content = $this->get_content($fullURL);
394
395 1
		$data = $this->parseTitleDataDOM(
396
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 393 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
397
			$title_url,
398 1
			"//meta[@property='og:title']/@content",
399 1
			"//body/section/article/div/div[@class='manga_detail']/div[@class='detail_list']/ul[1]/li[1]",
400 1
			"span[@class='right']",
401 1
			"span[@class='left']/a",
402 1
			"<div class=\"error_text\">Sorry, the page you have requested can’t be found."
403
		);
404 1
		if($data) {
405 1
			$titleData['title'] = $data['nodes_title']->textContent;
406
407 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
408 1
			$chapterURLSegments = explode('/', $link);
409 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
410 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
411
		}
412
413 1
		return (!empty($titleData) ? $titleData : NULL);
414
	}
415
}
416
417
class Batoto extends Site_Model {
418
	//Batoto is a bit tricky to track. Unlike MangaFox and MangaHere, it doesn't store anything in the title_url, which means we have to get the data via other methods.
419
	//One problem we have though, is the tracker must support multiple sites, so this means we need to do some weird things to track Batoto.
420
	//title_url is stored like: "ID:--:LANGUAGE"
421
	//chapter_urls are stored like "CHAPTER_ID:--:CHAPTER_NUMBER"
422
423
	public $titleFormat   = '/^[0-9]+:--:(?:English|Spanish|French|German|Portuguese|Turkish|Indonesian|Greek|Filipino|Italian|Polish|Thai|Malay|Hungarian|Romanian|Arabic|Hebrew|Russian|Vietnamese|Dutch)$/';
424
	//FIXME: We're not validating the chapter name since we don't know what all the possible valid characters can be
425
	//       Preferably we'd just use /^[0-9a-z]+:--:(v[0-9]+\/)?c[0-9]+(\.[0-9]+)?$/
426
	public $chapterFormat = '/^[0-9a-z]+:--:.+$/';
427
428
	public function getFullTitleURL(string $title_string) : string {
429
		//FIXME: This does not point to the language specific title page. Should ask if it is possible to set LANG as arg?
430
		//FIXME: This points to a generic URL which will redirect according to the ID. Preferably we'd try and get the exact URL from the title, but we can't pass it here.
431
		$title_parts = explode(':--:', $title_string);
432
		return "http://bato.to/comic/_/comics/-r".$title_parts[0];
433
	}
434
435 View Code Duplication
	public function getChapterData(string $title_string, string $chapter) : array {
436
		//$title_string isn't used here.
437
438
		$chapter_parts = explode(':--:', $chapter);
439
		return [
440
			'url'    => "http://bato.to/reader#" . $chapter_parts[0],
441
			'number' => $chapter_parts[1]
442
		];
443
	}
444
445
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
446
		$titleData = [];
447
448
		$title_parts = explode(':--:', $title_url);
449
		$fullURL     = $this->getFullTitleURL($title_url);
450
		$lang        = $title_parts[1]; //TODO: Validate title_lang from array?
451
452
453
		//Bato.to is annoying and locks stuff behind auth. See: https://github.com/DakuTree/manga-tracker/issues/14#issuecomment-233830855
454
		$cookies = [
455
			"lang_option={$lang}",
456
			"member_id={$this->config->item('batoto_cookie_member_id')}",
457
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
458
		];
459
		$content = $this->get_content($fullURL, implode("; ", $cookies), "", TRUE);
460
461
		$data = $this->parseTitleDataDOM(
462
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($full...', $cookies), '', TRUE) on line 459 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
463
			$title_url,
464
			"//h1[@class='ipsType_pagetitle']",
465
			"//table[contains(@class, 'chapters_list')]/tbody/tr[2]",
466
			"td[last()]",
467
			"td/a[contains(@href,'reader')]",
468
			">Register now<"
469
		);
470
		if($data) {
471
			$titleData['title'] = html_entity_decode(trim($data['nodes_title']->textContent));
472
473
			///^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/
474
			preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($data['nodes_chapter']->nodeValue), $text);
475
			$titleData['latest_chapter'] = substr($data['nodes_chapter']->getAttribute('href'), 22) . ':--:' . ((!empty($text['volume']) ? 'v'.$text['volume'].'/' : '') . 'c'.$text['chapter'] . (!empty($text['extra']) ? '-'.$text['extra'] : ''));
476
477
			$dateString = $data['nodes_latest']->nodeValue;
478
			if($dateString == 'An hour ago') {
479
				$dateString = '1 hour ago';
480
			}
481
			$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
482
483
			if($firstGet && $lang == 'English') {
484
				//FIXME: English is forced due for now. See #78.
485
				$this->doCustomFollow($content['body'], ['id' => $title_parts[0], 'lang' => $lang]);
486
			}
487
		}
488
489
		return (!empty($titleData) ? $titleData : NULL);
490
	}
491
492
	public function cleanTitleDataDOM(string $data) : string {
493
		$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
494
		$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
495
		$data = preg_replace('/<div id=\'commentsStart\' class=\'ipsBox\'>[\s\S]+$/', '</div></div><!-- end mainContent -->', $data);
496
497
		return $data;
498
	}
499
500
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
501
	public function doCustomFollow(string $data = "", array $extra = []) {
502
		preg_match('/ipb\.vars\[\'secure_hash\'\]\s+=\s+\'(?<secure_hash>[0-9a-z]+)\';[\s\S]+ipb\.vars\[\'session_id\'\]\s+=\s+\'(?<session_id>[0-9a-z]+)\';/', $data, $text);
503
504
		$params = [
505
			's'          => $text['session_id'],
506
			'app'        => 'core',
507
			'module'     => 'ajax',
508
			'section'    => 'like',
509
			'do'         => 'save',
510
			'secure_key' => $text['secure_hash'],
511
			'f_app'      => 'ccs',
512
			'f_area'     => 'ccs_custom_database_3_records',
513
			'f_relid'    => $extra['id']
514
		];
515
		$formData = [
516
			'like_notify' => '0',
517
			'like_freq'   => 'immediate',
518
			'like_anon'   => '0'
519
		];
520
521
		$cookies = [
522
			"lang_option={$extra['lang']}",
523
			"member_id={$this->config->item('batoto_cookie_member_id')}",
524
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
525
		];
526
		$content = $this->get_content('http://bato.to/forums/index.php?'.http_build_query($params), implode("; ", $cookies), "", TRUE, TRUE, $formData);
527
528
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
529
	}
530
	public function doCustomUpdate() {
531
		return FALSE; /* FIXME: Bato.to is disabled for custom updates until we can fix https://github.com/DakuTree/manga-tracker/issues/78#issuecomment-269833624 */
532
533
		$titleDataList = [];
0 ignored issues
show
Unused Code introduced by
/* FIXME: Bato.to is dis...itleDataList = array(); does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
534
535
		$cookies = [
536
			"lang_option=English", //FIXME: English is forced due for now. See #78.
537
			"member_id={$this->config->item('batoto_cookie_member_id')}",
538
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
539
		];
540
		$content = $this->get_content("http://bato.to/myfollows", implode("; ", $cookies), "", TRUE);
541
		if(!is_array($content)) {
542
			log_message('error', "{$this->site} /myfollows | Failed to grab URL (See above curl error)");
543
		} else {
544
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
545
			$status_code = $content['status_code'];
546
			$data        = $content['body'];
547
548
			if(!($status_code >= 200 && $status_code < 300)) {
549
				log_message('error', "{$this->site} /myfollows | Bad Status Code ({$status_code})");
550
			} else if(empty($data)) {
551
				log_message('error', "{$this->site} /myfollows | Data is empty? (Status code: {$status_code})");
552
			} else {
553
				$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
554
				$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
555
556
				$dom = new DOMDocument();
557
				libxml_use_internal_errors(TRUE);
558
				$dom->loadHTML($data);
559
				libxml_use_internal_errors(FALSE);
560
561
				$xpath      = new DOMXPath($dom);
562
				$nodes_rows = $xpath->query("//table[contains(@class, 'chapters_list')]/tbody/tr[position()>1]");
563
				if($nodes_rows->length > 0) {
564
					foreach($nodes_rows as $row) {
565
						$titleData = [];
566
567
						$nodes_title   = $xpath->query("td[2]/a[1]", $row);
568
						$nodes_chapter = $xpath->query("td[2]/a[2]", $row);
569
						$nodes_lang    = $xpath->query("td[3]/div", $row);
570
						$nodes_latest  = $xpath->query("td[5]", $row);
571
572
						if($nodes_lang->length === 1 && $nodes_lang->item(0)->getAttribute('title') == 'English') {
573
							if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
574
								$title = $nodes_title->item(0);
575
576
								preg_match('/(?<id>[0-9]+)$/', $title->getAttribute('href'), $title_url_arr);
577
								$title_url = "{$title_url_arr['id']}:--:English"; //FIXME: English is currently forced, see #78
578
579
								if(!array_key_exists($title_url, $titleDataList)) {
580
									$titleData['title'] = trim($title->textContent);
581
582
									$chapter = $nodes_chapter->item(0);
583
									preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($chapter->nodeValue), $text);
584
									$titleData['latest_chapter'] = substr($chapter->getAttribute('href'), 8) . ':--:' . ((!empty($text['volume']) ? 'v' . $text['volume'] . '/' : '') . 'c' . $text['chapter'] . (!empty($text['extra']) ? '-' . $text['extra'] : ''));
585
586
									$dateString = $nodes_latest->item(0)->nodeValue;
587
									if($dateString == 'An hour ago') {
588
										$dateString = '1 hour ago';
589
									}
590
									$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
591
592
593
									$titleDataList[$title_url] = $titleData;
594
								}
595
							} else {
596
								log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
597
							}
598
						}
599
					}
600
				} else {
601
					log_message('error', '{$this->site} | Following list is empty?');
602
				}
603
			}
604
		}
605
		return $titleDataList;
606
	}
607
}
608
609
class DynastyScans extends Site_Model {
610
	//FIXME: This has some major issues. SEE: https://github.com/DakuTree/manga-tracker/issues/58
611
612
	public $titleFormat   = '/^[a-z0-9_]+:--:(?:0|1)$/';
613
	public $chapterFormat = '/^[0-9a-z_]+$/';
614
615 1
	public function getFullTitleURL(string $title_string) : string {
616 1
		$title_parts = explode(':--:', $title_string);
617 1
		$url_type = ($title_parts[1] == '0' ? 'series' : 'chapters');
618
619 1
		return 'http://dynasty-scans.com/'.$url_type.'/'.$title_parts[0];
620
	}
621
622
	public function getChapterData(string $title_string, string $chapter) : array {
623
		$title_parts = explode(':--:', $title_string);
624
		/* Known chapter url formats (# is numbers):
625
		       chapters_#A_#B - Ch#A-#B
626
		       ch_#A          - Ch#A
627
		       ch_#A_#B       - Ch#A.#B
628
		       <NOTHING>      - Oneshot (This is passed as "oneshot")
629
		*/
630
631
		$chapterData = [
632
			'url'    => 'http://dynasty-scans.com/chapters/' . $title_parts[0].'_'.$chapter,
633
			'number' => ''
634
		];
635
636
		if($chapter == 'oneshot') {
637
			$chapterData['number'] = 'oneshot';
638
		} else {
639
			$chapter = preg_replace("/^([a-zA-Z]+)/", '$1_', $chapter);
640
			$chapterSegments = explode('_', $chapter);
641
			switch($chapterSegments[0]) {
642
				case 'ch':
643
					$chapterData['number'] = 'c'.$chapterSegments[1].(isset($chapterSegments[2]) && !empty($chapterSegments[2]) ? '.'.$chapterSegments[2] : '');
644
					break;
645
646
				case 'chapters':
647
					//This is barely ever used, but I have seen it.
648
					$chapterData['number'] = 'c'.$chapterSegments[1].'-'.$chapterSegments[2];
649
					break;
650
651
				default:
652
					//TODO: FALLBACK, ALERT ADMIN?
653
					$chapterData['number'] = $chapter;
654
					break;
655
			}
656
		}
657
		return $chapterData;
658
	}
659
660 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
661 1
		$titleData = [];
662
663 1
		$fullURL = $this->getFullTitleURL($title_url);
664 1
		$content = $this->get_content($fullURL);
665
666 1
		$title_parts = explode(':--:', $title_url);
667 1
		switch($title_parts[1]) {
668 1
			case '0':
669
				//Normal series.
670 1
				$data = $this->parseTitleDataDOM(
671
					$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 664 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
672
					$title_url,
673 1
					"//h2[@class='tag-title']/b[1]",
674 1
					"(//dl[@class='chapter-list']/dd[a[contains(@href,'/chapters/')]])[last()]",
675 1
					"small",
676 1
					"a[@class='name']"
677
				);
678 1
				if($data) {
679 1
					$titleData['title'] = $data['nodes_title']->textContent;
680
					//In cases where the series is a doujin, try and prepend the copyright.
681 1
					preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $content['body'], $matchesD);
682 1
					if(!empty($matchedD) && substr($matchesD[1], 0, -7) !== 'Original') {
0 ignored issues
show
Bug introduced by
The variable $matchedD seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
683
						$titleData['title'] = substr($matchesD[1], 0, -7).' - '.$titleData['title'];
684
					}
685
686 1
					$chapterURLSegments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
687 1
					if (strpos($chapterURLSegments[2], $title_parts[0]) !== false) {
688 1
						$titleData['latest_chapter'] = substr($chapterURLSegments[2], strlen($title_parts[0]) + 1);
689
					} else {
690
						$titleData['latest_chapter'] = $chapterURLSegments[2];
691
					}
692
693 1
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime(str_replace("'", '', substr((string) $data['nodes_latest']->textContent, 9))));
694
				}
695 1
				break;
696
697
			case '1':
698
				//Oneshot.
699
				$data = $content['body'];
700
701
				preg_match('/<b>.*<\/b>/', $data, $matchesT);
702
				preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $data, $matchesD);
703
				$titleData['title'] = (!empty($matchesD) ? ($matchesD[1] !== 'Original' ? $matchesD[1].' - ' : '') : '') . substr($matchesT[0], 3, -4);
704
705
				$titleData['latest_chapter'] = 'oneshot'; //This will never change
706
707
				preg_match('/<i class="icon-calendar"><\/i> (.*)<\/span>/', $data, $matches);
708
				$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime($matches[1]));
709
710
				//Oneshots are special, and really shouldn't need to be re-tracked
711
				$titleData['status'] = '2';
712
				break;
713
714
			default:
715
				//something went wrong
716
				break;
717
		}
718 1
		return (!empty($titleData) ? $titleData : NULL);
719
	}
720
}
721
722
class MangaPanda extends Site_Model {
723
	//NOTE: MangaPanda has manga pages under the root URL, so we need to filter out pages we know that aren't manga.
724
	public $titleFormat   = '/^(?!(?:latest|search|popular|random|alphabetical|privacy)$)([a-z0-9-]+)$/';
725
	public $chapterFormat = '/^[0-9]+$/';
726
727 1
	public function getFullTitleURL(string $title_url) : string {
728 1
		return "http://www.mangapanda.com/{$title_url}";
729
	}
730
731
	public function getChapterData(string $title_url, string $chapter) : array {
732
		return [
733
			'url'    => "http://www.mangapanda.com/{$title_url}/{$chapter}/",
734
			'number' => 'c'.$chapter
735
		];
736
	}
737
738 1 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
739 1
		$titleData = [];
740
741 1
		$fullURL = $this->getFullTitleURL($title_url);
742 1
		$content = $this->get_content($fullURL);
743
744 1
		$data = $this->parseTitleDataDOM(
745
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 742 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
746
			$title_url,
747 1
			"//h2[@class='aname']",
748 1
			"(//table[@id='listing']/tr)[last()]",
749 1
			"td[2]",
750 1
			"td[1]/a"
751
		);
752 1
		if($data) {
753 1
			$titleData['title'] = $data['nodes_title']->textContent;
754
755 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
756
757 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
758
		}
759
760 1
		return (!empty($titleData) ? $titleData : NULL);
761
	}
762
}
763
764
class MangaStream extends Site_Model {
765
	public $titleFormat   = '/^[a-z0-9_]+$/';
766
	public $chapterFormat = '/^(.*?)\/[0-9]+$/';
767
768
	public function getFullTitleURL(string $title_url) : string {
769
		return "https://mangastream.com/manga/{$title_url}/";
770
	}
771
772
	public function getChapterData(string $title_url, string $chapter) : array {
773
		return [
774
			'url'    => "https://mangastream.com/r/{$title_url}/{$chapter}",
775
			'number' => 'c'.explode('/', $chapter)[0]
776
		];
777
	}
778
779 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
780
		$titleData = [];
781
782
		$fullURL = $this->getFullTitleURL($title_url);
783
		$content = $this->get_content($fullURL);
784
785
		$data = $this->parseTitleDataDOM(
786
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 783 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
787
			$title_url,
788
			"//div[contains(@class, 'content')]/div[1]/h1",
789
			"//div[contains(@class, 'content')]/div[1]/table/tr[2]",
790
			"td[2]",
791
			"td[1]/a",
792
			"<h1>Page Not Found</h1>"
793
		);
794
		if($data) {
795
			$titleData['title'] = $data['nodes_title']->textContent;
796
797
			$titleData['latest_chapter'] = preg_replace('/^.*\/(.*?\/[0-9]+)\/[0-9]+$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
798
799
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
800
		}
801
802
		return (!empty($titleData) ? $titleData : NULL);
803
	}
804
}
805
806
class WebToons extends Site_Model {
807
	/* Webtoons.com has a very weird and pointless URL format.
808
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
809
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
810
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
811
812
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
813
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
814
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
815
816
	   FORMATS:
817
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
818
	   CHAPTER:   ID:--:CHAPTER_N
819
	*/
820
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
821
822
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi)$/';
823
	public $chapterFormat = '/^[0-9]+:--:.*$/';
824
825
	public function getFullTitleURL(string $title_url) : string {
826
		$title_parts = explode(':--:', $title_url);
827
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}/";
828
	}
829
830
	public function getChapterData(string $title_url, string $chapter) : array {
831
		$title_parts   = explode(':--:', $title_url);
832
		$chapter_parts = explode(':--:', $chapter);
833
834
		return [
835
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
836
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
837
		];
838
	}
839
840 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
841 1
		$titleData = [];
842
843
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
844
845
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
846 1
		$title_parts = explode(':--:', $title_url);
847 1
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
848
849 1
		$content = $this->get_content($fullURL);
850 1
		$data = $content['body'];
851 1
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
852 1
			$xml = simplexml_load_string($data) or die("Error: Cannot create object");
853 1
			if(isset($xml->{'channel'}->item[0])) {
854 1
				$titleData['title'] = trim((string) $xml->{'channel'}->title);
855
856 1
				$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
857 1
				$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
858 1
				$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
859
			}
860
		} else {
861
			log_message('error', "Series missing? (WebToons): {$title_url}");
862
			return NULL;
863
		}
864
865 1
		return (!empty($titleData) ? $titleData : NULL);
866
	}
867
}
868
869
class KissManga extends Site_Model {
870
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
871
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
872
873
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
874
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
875
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
876
877
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
878
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
879
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
880
	*/
881
882
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
883
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
884
885
	public function getFullTitleURL(string $title_url) : string {
886
		return "http://kissmanga.com/Manga/{$title_url}";
887
	}
888
889 View Code Duplication
	public function getChapterData(string $title_url, string $chapter) : array {
890
		$chapter_parts = explode(':--:', $chapter);
891
892
		return [
893
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
894
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
895
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
896
		];
897
	}
898
899
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
900
		$titleData = [];
901
902
		//Check if cookiejar is a day old (so we can know if something went wrong)
903
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
904
		$cookie_last_updated = filemtime($cookiejar_path);
905
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
906
907
			$fullURL = $this->getFullTitleURL($title_url);
908
909
			$content = $this->get_content($fullURL, '', $cookiejar_path);
910
			$data = $content['body'];
911
			if(strpos($data, 'containerRoot') !== FALSE) {
912
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
913
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
914
915
				$dom = new DOMDocument();
916
				libxml_use_internal_errors(true);
917
				$dom->loadHTML($data);
918
				libxml_use_internal_errors(false);
919
920
				$xpath = new DOMXPath($dom);
921
922
				$nodes_title = $xpath->query("//a[@class='bigChar']");
923
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
924
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
925
					$titleData['title'] = $nodes_title->item(0)->textContent;
926
927
					$firstRow      = $nodes_row->item(0);
928
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
929
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
930
931
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
932
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
933
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
934
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
935
				}
936
			} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
937
				//TODO: Throw ERRORS;
938
			}
939
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
940
			//Do nothing, wait until next update.
941
			//TODO: NAG ADMIN??
942
		}
943
944
		return (!empty($titleData) ? $titleData : NULL);
945
	}
946
}
947
948
class GameOfScanlation extends Site_Model {
949
	public $titleFormat   = '/^[a-z0-9\.-]+$/';
950
	public $chapterFormat = '/^[a-z0-9\.-]+$/';
951
952
	public function getFullTitleURL(string $title_url) : string {
953
		/* NOTE: GoS is a bit weird in that it has two separate title URL formats. One uses /projects/ and the other uses /fourms/.
954
		         The bad thing is these are interchangeable, despite them showing the exact same listing page.
955
		         Thankfully the title_url of manga which use /forums/ seem to be appended with ".%ID%" which means we can easily check them. */
956
957
		if (strpos($title_url, '.') !== FALSE) {
958
			$format = "https://gameofscanlation.moe/forums/{$title_url}/";
959
		} else {
960
			$format = "https://gameofscanlation.moe/projects/{$title_url}/";
961
		}
962
		return $format;
963
	}
964
965
	public function getChapterData(string $title_url, string $chapter) : array {
966
		return [
967
			'url'    => "https://gameofscanlation.moe/projects/".preg_replace("/\\.[0-9]+$/", "", $title_url).'/'.$chapter.'/',
968
			'number' => preg_replace("/chapter-/", "c", preg_replace("/\\.[0-9]+$/", "", $chapter))
969
		];
970
	}
971
972 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
973
		$titleData = [];
974
975
		$fullURL = $this->getFullTitleURL($title_url);
976
977
		$content = $this->get_content($fullURL);
978
979
		$data = $this->parseTitleDataDOM(
980
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 977 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
981
			$title_url,
982
			"//meta[@property='og:title']",
983
			"//ol[@class='discussionListItems']/li[1]/div[@class='home_list']/ul/li/div[@class='list_press_text']",
984
			"p[@class='author']/span|p[@class='author']/abbr",
985
			"p[@class='text_work']/a"
986
		);
987
		if($data) {
988
			$titleData['title'] = trim(html_entity_decode($data['nodes_title']->getAttribute('content')));
989
990
			$titleData['latest_chapter'] = preg_replace('/^projects\/.*?\/(.*?)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
991
992
			$titleData['last_updated'] =  date("Y-m-d H:i:s",(int) $data['nodes_latest']->getAttribute('title'));
993
		}
994
995
		return (!empty($titleData) ? $titleData : NULL);
996
	}
997
}
998
999
class MangaCow extends Site_Model {
1000
	public $titleFormat   = '/^[a-zA-Z0-9_]+$/';
1001
	public $chapterFormat = '/^[0-9]+$/';
1002
1003 1
	public function getFullTitleURL(string $title_url) : string {
1004 1
		return "http://mngcow.co/{$title_url}/";
1005
	}
1006
1007
	public function getChapterData(string $title_url, string $chapter) : array {
1008
		return [
1009
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
1010
			'number' => "c{$chapter}"
1011
		];
1012
	}
1013
1014 1 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1015 1
		$titleData = [];
1016
1017 1
		$fullURL = $this->getFullTitleURL($title_url);
1018
1019 1
		$content = $this->get_content($fullURL);
1020
1021 1
		$data = $this->parseTitleDataDOM(
1022
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1019 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1023
			$title_url,
1024 1
			"//h4",
1025 1
			"//ul[contains(@class, 'mng_chp')]/li[1]/a[1]",
1026 1
			"b[@class='dte']",
1027 1
			"",
1028 1
			"404 Page Not Found"
1029
		);
1030 1
		if($data) {
1031 1
			$titleData['title'] = trim($data['nodes_title']->textContent);
1032
1033 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
1034
1035 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) substr($data['nodes_latest']->getAttribute('title'), 13)));
1036
		}
1037
1038 1
		return (!empty($titleData) ? $titleData : NULL);
1039
	}
1040
}
1041
1042
/*** FoolSlide sites ***/
1043
1044 View Code Duplication
class KireiCake extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1045
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1046
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1047
1048 1
	public function getFullTitleURL(string $title_url) : string {
1049 1
		return "https://reader.kireicake.com/series/{$title_url}";
1050
	}
1051
1052
	public function getChapterData(string $title_url, string $chapter) : array {
1053
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1054
		$chapter_parts = explode('/', $chapter);
1055
		return [
1056
			'url'    => "https://reader.kireicake.com/read/{$title_url}/{$chapter}/",
1057
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1058
		];
1059
	}
1060
1061 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1062 1
		$fullURL = $this->getFullTitleURL($title_url);
1063 1
		return $this->parseFoolSlide($fullURL, $title_url);
1064
	}
1065
}
1066
1067 View Code Duplication
class SeaOtterScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1068
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1069
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1070
1071 1
	public function getFullTitleURL(string $title_url) : string {
1072 1
		return "https://reader.seaotterscans.com/series/{$title_url}";
1073
	}
1074
1075
	public function getChapterData(string $title_url, string $chapter) : array {
1076
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1077
		$chapter_parts = explode('/', $chapter);
1078
		return [
1079
			'url'    => "https://reader.seaotterscans.com/read/{$title_url}/{$chapter}/",
1080
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1081
		];
1082
	}
1083
1084 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1085 1
		$fullURL = $this->getFullTitleURL($title_url);
1086 1
		return $this->parseFoolSlide($fullURL, $title_url);
1087
	}
1088
}
1089
1090 View Code Duplication
class HelveticaScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1091
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1092
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1093
1094 1
	public function getFullTitleURL(string $title_url) : string {
1095 1
		return "http://helveticascans.com/reader/series/{$title_url}";
1096
	}
1097
1098
	public function getChapterData(string $title_url, string $chapter) : array {
1099
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1100
		$chapter_parts = explode('/', $chapter);
1101
		return [
1102
			'url'    => "http://helveticascans.com/reader/read/{$title_url}/{$chapter}/",
1103
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1104
		];
1105
	}
1106
1107 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1108 1
		$fullURL = $this->getFullTitleURL($title_url);
1109 1
		return $this->parseFoolSlide($fullURL, $title_url);
1110
	}
1111
}
1112
1113 View Code Duplication
class SenseScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1114
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1115
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1116
1117 1
	public function getFullTitleURL(string $title_url) : string {
1118 1
		return "http://reader.sensescans.com/series/{$title_url}";
1119
	}
1120
1121
	public function getChapterData(string $title_url, string $chapter) : array {
1122
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1123
		$chapter_parts = explode('/', $chapter);
1124
		return [
1125
			'url'    => "http://reader.sensescans.com/read/{$title_url}/{$chapter}/",
1126
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1127
		];
1128
	}
1129
1130 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1131 1
		$fullURL = $this->getFullTitleURL($title_url);
1132 1
		return $this->parseFoolSlide($fullURL, $title_url);
1133
	}
1134
}
1135
1136 View Code Duplication
class JaiminisBox extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1137
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1138
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1139
1140
	public function getFullTitleURL(string $title_url) : string {
1141
		return "https://jaiminisbox.com/reader/series/{$title_url}";
1142
	}
1143
1144
	public function getChapterData(string $title_url, string $chapter) : array {
1145
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1146
		$chapter_parts = explode('/', $chapter);
1147
		return [
1148
			'url'    => "https://jaiminisbox.com/reader/read/{$title_url}/{$chapter}/",
1149
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1150
		];
1151
	}
1152
1153
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1154
		$fullURL = $this->getFullTitleURL($title_url);
1155
		return $this->parseFoolSlide($fullURL, $title_url);
1156
	}
1157
}
1158
1159 View Code Duplication
class DokiFansubs extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1160
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1161
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1162
1163
	public function getFullTitleURL(string $title_url) : string {
1164
		return "https://kobato.hologfx.com/reader/series/{$title_url}";
1165
	}
1166
1167
	public function getChapterData(string $title_url, string $chapter) : array {
1168
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1169
		$chapter_parts = explode('/', $chapter);
1170
		return [
1171
			'url'    => "https://kobato.hologfx.com/reader/read/{$title_url}/{$chapter}/",
1172
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1173
		];
1174
	}
1175
1176
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1177
		$fullURL = $this->getFullTitleURL($title_url);
1178
		return $this->parseFoolSlide($fullURL, $title_url);
1179
	}
1180
}
1181
1182 View Code Duplication
class DemonicScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1183
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1184
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1185
1186 1
	public function getFullTitleURL(string $title_url) : string {
1187 1
		return "http://www.demonicscans.com/FoOlSlide/series/{$title_url}";
1188
	}
1189
1190
	public function getChapterData(string $title_url, string $chapter) : array {
1191
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1192
		$chapter_parts = explode('/', $chapter);
1193
		return [
1194
			'url'    => "http://www.demonicscans.com/FoOlSlide/read/{$title_url}/{$chapter}/",
1195
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1196
		];
1197
	}
1198
1199 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1200 1
		$fullURL = $this->getFullTitleURL($title_url);
1201 1
		return $this->parseFoolSlide($fullURL, $title_url);
1202
	}
1203
}
1204