Completed
Push — master ( 81f48e...d30a44 )
by Angus
03:06
created

MangaFox::doCustomFollow()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 17
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 11
nc 3
nop 2
dl 0
loc 17
ccs 0
cts 0
cp 0
crap 12
rs 9.4285
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
abstract class Site_Model extends CI_Model {
4
	public $site          = '';
5
	public $titleFormat   = '';
6
	public $chapterFormat = '';
7
8 10
	public function __construct() {
9 10
		parent::__construct();
10
11 10
		$this->load->database();
12 10
	}
13
14
	abstract public function getFullTitleURL(string $title_url) : string;
15
16
	abstract public function getChapterData(string $title_url, string $chapter) : array;
17
18
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
19
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
20
21
	public function isValidTitleURL(string $title_url) : bool {
22
		$success = (bool) preg_match($this->titleFormat, $title_url);
23
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
24
		return $success;
25
	}
26
	public function isValidChapter(string $chapter) : bool {
27
		$success = (bool) preg_match($this->chapterFormat, $chapter);
28
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
29
		return $success;
30
	}
31
32 10
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
33 10
		$ch = curl_init();
34 10
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
35 10
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
36
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
37 10
		curl_setopt($ch, CURLOPT_HEADER, 1);
38
39 10
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
40
41 10
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
42 10
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
43
44
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
45 10
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
46
47
		//TODO: Check in a while if this being enabled still causes issues
48
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
49
50 10
		curl_setopt($ch, CURLOPT_URL, $url);
51
52 10
		if($isPost) {
53
			curl_setopt($ch,CURLOPT_POST, count($postFields));
54
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
55
		}
56
57 10
		$response = curl_exec($ch);
58 10
		if($response === FALSE) {
59
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
60
			//FIXME: We don't always account for FALSE return
61
			return FALSE;
62
		}
63
64 10
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
65 10
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
66 10
		$headers     = http_parse_headers(substr($response, 0, $header_size));
67 10
		$body        = substr($response, $header_size);
68 10
		curl_close($ch);
69
70
		return [
71 10
			'headers'     => $headers,
72 10
			'status_code' => $status_code,
73 10
			'body'        => $body
74
		];
75
	}
76
77
	/**
78
	 * @param array  $content
79
	 * @param string $title_url
80
	 * @param string $node_title_string
81
	 * @param string $node_row_string
82
	 * @param string $node_latest_string
83
	 * @param string $node_chapter_string
84
	 * @param string $failure_string
85
	 *
86
	 * @return DOMElement[]|false
87
	 */
88 9
	final protected function parseTitleDataDOM(
89
		$content, string $title_url,
90
		string $node_title_string, string $node_row_string,
91
		string $node_latest_string, string $node_chapter_string,
92
		string $failure_string = "") {
93
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
94
95 9
		if(!is_array($content)) {
96
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
97
		} else {
98 9
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
99 9
			$status_code = $content['status_code'];
100 9
			$data        = $content['body'];
101
102 9
			if(!($status_code >= 200 && $status_code < 300)) {
103
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
104 9
			} else if(empty($data)) {
105
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
106 9
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
107
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
108
			} else {
109 9
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
110
111 9
				$dom = new DOMDocument();
112 9
				libxml_use_internal_errors(TRUE);
113 9
				$dom->loadHTML($data);
114 9
				libxml_use_internal_errors(FALSE);
115
116 9
				$xpath = new DOMXPath($dom);
117 9
				$nodes_title = $xpath->query($node_title_string);
118 9
				$nodes_row   = $xpath->query($node_row_string);
119 9
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
120 9
					$firstRow      = $nodes_row->item(0);
121 9
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
122
123 9
					if($node_chapter_string !== '') {
124 8
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
125
					} else {
126 1
						$nodes_chapter = $nodes_row;
127
					}
128
129 9
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
130
						return [
131 9
							'nodes_title'   => $nodes_title->item(0),
132 9
							'nodes_latest'  => $nodes_latest->item(0),
133 9
							'nodes_chapter' => $nodes_chapter->item(0)
134
						];
135
					} else {
136
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
137
					}
138
				} else {
139
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
140
				}
141
			}
142
		}
143
144
		return FALSE;
145
	}
146
147 9
	public function cleanTitleDataDOM(string $data) : string {
148 9
		return $data;
149
	}
150
151
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
152 4
	final public function parseFoolSlide(string $fullURL, string $title_url) {
153 4
		$titleData = [];
154
155 4
		if($content = $this->get_content($fullURL)) {
156 4
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
157
158 4
			$data = $this->parseTitleDataDOM(
159
				$content,
160
				$title_url,
161 4
				"//div[@class='large comic']/h1[@class='title']",
162 4
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
163 4
				"div[@class='meta_r']",
164 4
				"div[@class='title']/a"
165
			);
166 4
			if($data) {
167 4
				$titleData['title'] = trim($data['nodes_title']->textContent);
168
169 4
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
170 4
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
171
172 4
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
173
			}
174
		}
175
176 4
		return (!empty($titleData) ? $titleData : NULL);
177
	}
178
179
	public function doCustomFollow(string $data = "", array $extra = []) {}
180
	public function doCustomUpdate() {}
181
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
182
}
183
class Sites_Model extends CI_Model {
184 10
	public function __get($name) {
185
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
186
		//      It's probably a minor speed reduction, but that isn't much of an issue.
187 10
		if(!class_exists($name) || !(get_parent_class($name) === 'Site_Model')) {
188
			parent::__get($name);
189
			return FALSE;
190
		} else {
191 10
			$this->loadSite($name);
192 10
			return $this->{$name};
193
		}
194
	}
195
196 10
	private function loadSite(string $siteName) {
197 10
		$this->{$siteName} = new $siteName();
198 10
	}
199
}
200
201
class MangaFox extends Site_Model {
202
	public $site          = 'MangaFox';
203
	public $titleFormat   = '/^[a-z0-9_]+$/';
204
	public $chapterFormat = '/^(?:v[0-9a-zA-Z]+\/)?c[0-9\.]+$/';
205
206 1
	public function getFullTitleURL(string $title_url) : string {
207 1
		return "http://mangafox.me/manga/{$title_url}/";
208
	}
209
210
	public function getChapterData(string $title_url, string $chapter) : array {
211
		return [
212
			'url'    => "http://mangafox.me/manga/{$title_url}/{$chapter}/1.html",
213
			'number' => $chapter
214
		];
215
	}
216
217 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
218 1
		$titleData = [];
219
220 1
		$fullURL = $this->getFullTitleURL($title_url);
221 1
		$content = $this->get_content($fullURL);
222
223 1
		$data = $this->parseTitleDataDOM(
224
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 221 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
225
			$title_url,
226 1
			"//meta[@property='og:title']/@content",
227 1
			"//body/div[@id='page']/div[@class='left']/div[@id='chapters']/ul[1]/li[1]",
228 1
			"div/span[@class='date']",
229 1
			"div/h3/a"
230
		);
231 1
		if($data) {
232 1
			$titleData['title'] = html_entity_decode(substr($data['nodes_title']->textContent, 0, -6));
233
234 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
235 1
			$chapterURLSegments = explode('/', $link);
236 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
237 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
238
239 1
			if($firstGet) {
240
				$this->doCustomFollow($content['body']);
241
			}
242
		}
243
244 1
		return (!empty($titleData) ? $titleData : NULL);
245
	}
246
247
248
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
249
	public function doCustomFollow(string $data = "", array $extra = []) {
250
		preg_match('/var sid=(?<id>[0-9]+);/', $data, $matches);
251
252
		$formData = [
253
			'action' => 'add',
254
			'sid'    => $matches['id']
255
		];
256
257
		$cookies = [
258
			"mfvb_userid={$this->config->item('mangafox_userid')}",
259
			"mfvb_password={$this->config->item('mangafox_password')}",
260
			"bmsort=last_chapter"
261
		];
262
		$content = $this->get_content('http://mangafox.me/ajax/bookmark.php', implode("; ", $cookies), "", TRUE, TRUE, $formData);
263
264
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
265
	}
266
	public function doCustomUpdate() {
267
		$titleDataList = [];
268
269
		$cookies = [
270
			"mfvb_userid={$this->config->item('mangafox_userid')}",
271
			"mfvb_password={$this->config->item('mangafox_password')}",
272
			"bmsort=last_chapter",
273
			"bmorder=za"
274
		];
275
		$content = $this->get_content('http://mangafox.me/bookmark/?status=currentreading&sort=last_chapter&order=za', implode("; ", $cookies), "", TRUE);
276
277
		if(!is_array($content)) {
278
			log_message('error', "{$this->site} /bookmark | Failed to grab URL (See above curl error)");
279
		} else {
280
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
281
			$status_code = $content['status_code'];
282
			$data        = $content['body'];
283
284
			if(!($status_code >= 200 && $status_code < 300)) {
285
				log_message('error', "{$this->site} /bookmark | Bad Status Code ({$status_code})");
286
			} else if(empty($data)) {
287
				log_message('error', "{$this->site} /bookmark | Data is empty? (Status code: {$status_code})");
288
			} else {
289
				$data = preg_replace('/^[\s\S]+<ul id="bmlist">/', '<ul id="bmlist">', $data);
290
				$data = preg_replace('/<!-- end of bookmark -->[\s\S]+$/', '<!-- end of bookmark -->', $data);
291
292
				$dom = new DOMDocument();
293
				libxml_use_internal_errors(TRUE);
294
				$dom->loadHTML($data);
295
				libxml_use_internal_errors(FALSE);
296
297
				$xpath      = new DOMXPath($dom);
298
				$nodes_rows = $xpath->query("//ul[@id='bmlist']/li/div[@class='series_grp' and h2[@class='title']/span[@class='updatedch'] and dl]");
299
				if($nodes_rows->length > 0) {
300
					foreach($nodes_rows as $row) {
301
						$titleData = [];
302
303
						$nodes_title   = $xpath->query("h2[@class='title']/a[contains(@class, 'title')]", $row);
304
						$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
305
						$nodes_latest  = $xpath->query("dl/dt[1]/em/span[@class='timing']", $row);
306
307
						if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
308
							$title = $nodes_title->item(0);
309
310
							$titleData['title'] = trim($title->textContent);
311
312
313
							$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
314
							$chapterURLSegments = explode('/', $link);
315
							$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
316
317
							$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
318
319
							$title_url = explode('/', $title->getAttribute('href'))[4];
320
							$titleDataList[$title_url] = $titleData;
321
						} else {
322
							log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
323
						}
324
					}
325
				} else {
326
					log_message('error', '{$this->site} | Following list is empty?');
327
				}
328
			}
329
		}
330
		return $titleDataList;
331
	}
332
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {
333
		$status = FALSE;
334
335
		$oldChapterSegments = explode('/', $oldChapterString);
336
		$newChapterSegments = explode('/', $newChapterString);
337
338
		//Although it's rare, it's possible for new chapters to have a different amount of segments to the oldChapter (or vice versa).
339
		//Since this can cause errors, we just throw a fail.
340
		$count = count($newChapterSegments);
341
		if($count === count($oldChapterSegments)) {
342
			if($count === 2) {
343
				//FIXME: This feels like a mess.
344
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
345
				$newVolume = substr(array_shift($newChapterSegments), 1);
346
347
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA'])) $oldVolume = 999;
348
				if(in_array($newVolume, ['TBD', 'TBA', 'NA'])) $newVolume = 999;
349
350
				$oldVolume = floatval($oldVolume);
351
				$newVolume = floatval($newVolume);
352
			} else {
353
				$oldVolume = 0;
354
				$newVolume = 0;
355
			}
356
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
357
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
358
359
			if($newVolume > $oldVolume) {
360
				//$newVolume is higher, no need to check chapter.
361
				$status = TRUE;
362
			} elseif($newChapter > $oldChapter) {
363
				//$newVolume isn't higher, but chapter is.
364
				$status = TRUE;
365
			}
366
		}
367
368
		return $status;
369
	}
370
}
371
372
class MangaHere extends Site_Model {
373
	public $site          = 'MangaHere';
374
	public $titleFormat   = '/^[a-z0-9_]+$/';
375
	public $chapterFormat = '/^(?:v[0-9]+\/)?c[0-9]+(?:\.[0-9]+)?$/';
376
377 1
	public function getFullTitleURL(string $title_url) : string {
378 1
		return "http://www.mangahere.co/manga/{$title_url}/";
379
	}
380
381
	public function getChapterData(string $title, string $chapter) : array {
382
		return [
383
			'url'    => "http://www.mangahere.co/manga/{$title}/{$chapter}/",
384
			'number' => $chapter
385
		];
386
	}
387
388 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
389 1
		$titleData = [];
390
391 1
		$fullURL = $this->getFullTitleURL($title_url);
392 1
		$content = $this->get_content($fullURL);
393
394 1
		$data = $this->parseTitleDataDOM(
395
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 392 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
396
			$title_url,
397 1
			"//meta[@property='og:title']/@content",
398 1
			"//body/section/article/div/div[@class='manga_detail']/div[@class='detail_list']/ul[1]/li[1]",
399 1
			"span[@class='right']",
400 1
			"span[@class='left']/a",
401 1
			"<div class=\"error_text\">Sorry, the page you have requested can’t be found."
402
		);
403 1
		if($data) {
404 1
			$titleData['title'] = $data['nodes_title']->textContent;
405
406 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
407 1
			$chapterURLSegments = explode('/', $link);
408 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
409 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
410
		}
411
412 1
		return (!empty($titleData) ? $titleData : NULL);
413
	}
414
}
415
416
class Batoto extends Site_Model {
417
	//Batoto is a bit tricky to track. Unlike MangaFox and MangaHere, it doesn't store anything in the title_url, which means we have to get the data via other methods.
418
	//One problem we have though, is the tracker must support multiple sites, so this means we need to do some weird things to track Batoto.
419
	//title_url is stored like: "ID:--:LANGUAGE"
420
	//chapter_urls are stored like "CHAPTER_ID:--:CHAPTER_NUMBER"
421
422
	public $site          = 'Batoto';
423
	public $titleFormat   = '/^[0-9]+:--:(?:English|Spanish|French|German|Portuguese|Turkish|Indonesian|Greek|Filipino|Italian|Polish|Thai|Malay|Hungarian|Romanian|Arabic|Hebrew|Russian|Vietnamese|Dutch)$/';
424
	//FIXME: We're not validating the chapter name since we don't know what all the possible valid characters can be
425
	//       Preferably we'd just use /^[0-9a-z]+:--:(v[0-9]+\/)?c[0-9]+(\.[0-9]+)?$/
426
	public $chapterFormat = '/^[0-9a-z]+:--:.+$/';
427
428
	public function getFullTitleURL(string $title_string) : string {
429
		//FIXME: This does not point to the language specific title page. Should ask if it is possible to set LANG as arg?
430
		//FIXME: This points to a generic URL which will redirect according to the ID. Preferably we'd try and get the exact URL from the title, but we can't pass it here.
431
		$title_parts = explode(':--:', $title_string);
432
		return "http://bato.to/comic/_/comics/-r".$title_parts[0];
433
	}
434
435 View Code Duplication
	public function getChapterData(string $title_string, string $chapter) : array {
436
		//$title_string isn't used here.
437
438
		$chapter_parts = explode(':--:', $chapter);
439
		return [
440
			'url'    => "http://bato.to/reader#" . $chapter_parts[0],
441
			'number' => $chapter_parts[1]
442
		];
443
	}
444
445
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
446
		$titleData = [];
447
448
		$title_parts = explode(':--:', $title_url);
449
		$fullURL     = $this->getFullTitleURL($title_url);
450
		$lang        = $title_parts[1]; //TODO: Validate title_lang from array?
451
452
453
		//Bato.to is annoying and locks stuff behind auth. See: https://github.com/DakuTree/manga-tracker/issues/14#issuecomment-233830855
454
		$cookies = [
455
			"lang_option={$lang}",
456
			"member_id={$this->config->item('batoto_cookie_member_id')}",
457
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
458
		];
459
		$content = $this->get_content($fullURL, implode("; ", $cookies), "", TRUE);
460
461
		$data = $this->parseTitleDataDOM(
462
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($full...', $cookies), '', TRUE) on line 459 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
463
			$title_url,
464
			"//h1[@class='ipsType_pagetitle']",
465
			"//table[contains(@class, 'chapters_list')]/tbody/tr[2]",
466
			"td[last()]",
467
			"td/a[contains(@href,'reader')]",
468
			">Register now<"
469
		);
470
		if($data) {
471
			$titleData['title'] = html_entity_decode(trim($data['nodes_title']->textContent));
472
473
			///^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/
474
			preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($data['nodes_chapter']->nodeValue), $text);
475
			$titleData['latest_chapter'] = substr($data['nodes_chapter']->getAttribute('href'), 22) . ':--:' . ((!empty($text['volume']) ? 'v'.$text['volume'].'/' : '') . 'c'.$text['chapter'] . (!empty($text['extra']) ? '-'.$text['extra'] : ''));
476
477
			$dateString = $data['nodes_latest']->nodeValue;
478
			if($dateString == 'An hour ago') {
479
				$dateString = '1 hour ago';
480
			}
481
			$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
482
483
			if($firstGet && $lang == 'English') {
484
				//FIXME: English is forced due for now. See #78.
485
				$this->doCustomFollow($content['body'], ['id' => $title_parts[0], 'lang' => $lang]);
486
			}
487
		}
488
489
		return (!empty($titleData) ? $titleData : NULL);
490
	}
491
492
	public function cleanTitleDataDOM(string $data) : string {
493
		$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
494
		$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
495
		$data = preg_replace('/<div id=\'commentsStart\' class=\'ipsBox\'>[\s\S]+$/', '</div></div><!-- end mainContent -->', $data);
496
497
		return $data;
498
	}
499
500
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
501
	public function doCustomFollow(string $data = "", array $extra = []) {
502
		preg_match('/ipb\.vars\[\'secure_hash\'\]\s+=\s+\'(?<secure_hash>[0-9a-z]+)\';[\s\S]+ipb\.vars\[\'session_id\'\]\s+=\s+\'(?<session_id>[0-9a-z]+)\';/', $data, $text);
503
504
		$params = [
505
			's'          => $text['session_id'],
506
			'app'        => 'core',
507
			'module'     => 'ajax',
508
			'section'    => 'like',
509
			'do'         => 'save',
510
			'secure_key' => $text['secure_hash'],
511
			'f_app'      => 'ccs',
512
			'f_area'     => 'ccs_custom_database_3_records',
513
			'f_relid'    => $extra['id']
514
		];
515
		$formData = [
516
			'like_notify' => '0',
517
			'like_freq'   => 'immediate',
518
			'like_anon'   => '0'
519
		];
520
521
		$cookies = [
522
			"lang_option={$extra['lang']}",
523
			"member_id={$this->config->item('batoto_cookie_member_id')}",
524
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
525
		];
526
		$content = $this->get_content('http://bato.to/forums/index.php?'.http_build_query($params), implode("; ", $cookies), "", TRUE, TRUE, $formData);
527
528
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
529
	}
530
	public function doCustomUpdate() {
531
		return FALSE; /* FIXME: Bato.to is disabled for custom updates until we can fix https://github.com/DakuTree/manga-tracker/issues/78#issuecomment-269833624 */
532
533
		$titleDataList = [];
0 ignored issues
show
Unused Code introduced by
/* FIXME: Bato.to is dis...itleDataList = array(); does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
534
535
		$cookies = [
536
			"lang_option=English", //FIXME: English is forced due for now. See #78.
537
			"member_id={$this->config->item('batoto_cookie_member_id')}",
538
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
539
		];
540
		$content = $this->get_content("http://bato.to/myfollows", implode("; ", $cookies), "", TRUE);
541
		if(!is_array($content)) {
542
			log_message('error', "{$this->site} /myfollows | Failed to grab URL (See above curl error)");
543
		} else {
544
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
545
			$status_code = $content['status_code'];
546
			$data        = $content['body'];
547
548
			if(!($status_code >= 200 && $status_code < 300)) {
549
				log_message('error', "{$this->site} /myfollows | Bad Status Code ({$status_code})");
550
			} else if(empty($data)) {
551
				log_message('error', "{$this->site} /myfollows | Data is empty? (Status code: {$status_code})");
552
			} else {
553
				$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
554
				$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
555
556
				$dom = new DOMDocument();
557
				libxml_use_internal_errors(TRUE);
558
				$dom->loadHTML($data);
559
				libxml_use_internal_errors(FALSE);
560
561
				$xpath      = new DOMXPath($dom);
562
				$nodes_rows = $xpath->query("//table[contains(@class, 'chapters_list')]/tbody/tr[position()>1]");
563
				if($nodes_rows->length > 0) {
564
					foreach($nodes_rows as $row) {
565
						$titleData = [];
566
567
						$nodes_title   = $xpath->query("td[2]/a[1]", $row);
568
						$nodes_chapter = $xpath->query("td[2]/a[2]", $row);
569
						$nodes_lang    = $xpath->query("td[3]/div", $row);
570
						$nodes_latest  = $xpath->query("td[5]", $row);
571
572
						if($nodes_lang->length === 1 && $nodes_lang->item(0)->getAttribute('title') == 'English') {
573
							if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
574
								$title = $nodes_title->item(0);
575
576
								preg_match('/(?<id>[0-9]+)$/', $title->getAttribute('href'), $title_url_arr);
577
								$title_url = "{$title_url_arr['id']}:--:English"; //FIXME: English is currently forced, see #78
578
579
								if(!array_key_exists($title_url, $titleDataList)) {
580
									$titleData['title'] = trim($title->textContent);
581
582
									$chapter = $nodes_chapter->item(0);
583
									preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($chapter->nodeValue), $text);
584
									$titleData['latest_chapter'] = substr($chapter->getAttribute('href'), 8) . ':--:' . ((!empty($text['volume']) ? 'v' . $text['volume'] . '/' : '') . 'c' . $text['chapter'] . (!empty($text['extra']) ? '-' . $text['extra'] : ''));
585
586
									$dateString = $nodes_latest->item(0)->nodeValue;
587
									if($dateString == 'An hour ago') {
588
										$dateString = '1 hour ago';
589
									}
590
									$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
591
592
593
									$titleDataList[$title_url] = $titleData;
594
								}
595
							} else {
596
								log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
597
							}
598
						}
599
					}
600
				} else {
601
					log_message('error', '{$this->site} | Following list is empty?');
602
				}
603
			}
604
		}
605
		return $titleDataList;
606
	}
607
}
608
609
class DynastyScans extends Site_Model {
610
	//FIXME: This has some major issues. SEE: https://github.com/DakuTree/manga-tracker/issues/58
611
612
	public $site          = 'DynastyScans';
613
	public $titleFormat   = '/^[a-z0-9_]+:--:(?:0|1)$/';
614
	public $chapterFormat = '/^[0-9a-z_]+$/';
615
616 1
	public function getFullTitleURL(string $title_string) : string {
617 1
		$title_parts = explode(':--:', $title_string);
618 1
		$url_type = ($title_parts[1] == '0' ? 'series' : 'chapters');
619
620 1
		return 'http://dynasty-scans.com/'.$url_type.'/'.$title_parts[0];
621
	}
622
623
	public function getChapterData(string $title_string, string $chapter) : array {
624
		$title_parts = explode(':--:', $title_string);
625
		/* Known chapter url formats (# is numbers):
626
		       chapters_#A_#B - Ch#A-#B
627
		       ch_#A          - Ch#A
628
		       ch_#A_#B       - Ch#A.#B
629
		       <NOTHING>      - Oneshot (This is passed as "oneshot")
630
		*/
631
632
		$chapterData = [
633
			'url'    => 'http://dynasty-scans.com/chapters/' . $title_parts[0].'_'.$chapter,
634
			'number' => ''
635
		];
636
637
		if($chapter == 'oneshot') {
638
			$chapterData['number'] = 'oneshot';
639
		} else {
640
			$chapter = preg_replace("/^([a-zA-Z]+)/", '$1_', $chapter);
641
			$chapterSegments = explode('_', $chapter);
642
			switch($chapterSegments[0]) {
643
				case 'ch':
644
					$chapterData['number'] = 'c'.$chapterSegments[1].(isset($chapterSegments[2]) && !empty($chapterSegments[2]) ? '.'.$chapterSegments[2] : '');
645
					break;
646
647
				case 'chapters':
648
					//This is barely ever used, but I have seen it.
649
					$chapterData['number'] = 'c'.$chapterSegments[1].'-'.$chapterSegments[2];
650
					break;
651
652
				default:
653
					//TODO: FALLBACK, ALERT ADMIN?
654
					$chapterData['number'] = $chapter;
655
					break;
656
			}
657
		}
658
		return $chapterData;
659
	}
660
661 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
662 1
		$titleData = [];
663
664 1
		$fullURL = $this->getFullTitleURL($title_url);
665 1
		$content = $this->get_content($fullURL);
666
667 1
		$title_parts = explode(':--:', $title_url);
668 1
		switch($title_parts[1]) {
669 1
			case '0':
670
				//Normal series.
671 1
				$data = $this->parseTitleDataDOM(
672
					$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 665 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
673
					$title_url,
674 1
					"//h2[@class='tag-title']/b[1]",
675 1
					"(//dl[@class='chapter-list']/dd[a[contains(@href,'/chapters/')]])[last()]",
676 1
					"small",
677 1
					"a[@class='name']"
678
				);
679 1
				if($data) {
680 1
					$titleData['title'] = $data['nodes_title']->textContent;
681
					//In cases where the series is a doujin, try and prepend the copyright.
682 1
					preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $content['body'], $matchesD);
683 1
					if(!empty($matchedD) && substr($matchesD[1], 0, -7) !== 'Original') {
0 ignored issues
show
Bug introduced by
The variable $matchedD seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
684
						$titleData['title'] = substr($matchesD[1], 0, -7).' - '.$titleData['title'];
685
					}
686
687 1
					$chapterURLSegments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
688 1
					if (strpos($chapterURLSegments[2], $title_parts[0]) !== false) {
689 1
						$titleData['latest_chapter'] = substr($chapterURLSegments[2], strlen($title_parts[0]) + 1);
690
					} else {
691
						$titleData['latest_chapter'] = $chapterURLSegments[2];
692
					}
693
694 1
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime(str_replace("'", '', substr((string) $data['nodes_latest']->textContent, 9))));
695
				}
696 1
				break;
697
698
			case '1':
699
				//Oneshot.
700
				$data = $content['body'];
701
702
				preg_match('/<b>.*<\/b>/', $data, $matchesT);
703
				preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $data, $matchesD);
704
				$titleData['title'] = (!empty($matchesD) ? ($matchesD[1] !== 'Original' ? $matchesD[1].' - ' : '') : '') . substr($matchesT[0], 3, -4);
705
706
				$titleData['latest_chapter'] = 'oneshot'; //This will never change
707
708
				preg_match('/<i class="icon-calendar"><\/i> (.*)<\/span>/', $data, $matches);
709
				$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime($matches[1]));
710
711
				//Oneshots are special, and really shouldn't need to be re-tracked
712
				$titleData['status'] = '2';
713
				break;
714
715
			default:
716
				//something went wrong
717
				break;
718
		}
719 1
		return (!empty($titleData) ? $titleData : NULL);
720
	}
721
}
722
723
class MangaPanda extends Site_Model {
724
	public $site          = 'MangaPanda';
725
	//NOTE: MangaPanda has manga pages under the root URL, so we need to filter out pages we know that aren't manga.
726
	public $titleFormat   = '/^(?!(?:latest|search|popular|random|alphabetical|privacy)$)([a-z0-9-]+)$/';
727
	public $chapterFormat = '/^[0-9]+$/';
728
729 1
	public function getFullTitleURL(string $title_url) : string {
730 1
		return "http://www.mangapanda.com/{$title_url}";
731
	}
732
733
	public function getChapterData(string $title_url, string $chapter) : array {
734
		return [
735
			'url'    => "http://www.mangapanda.com/{$title_url}/{$chapter}/",
736
			'number' => 'c'.$chapter
737
		];
738
	}
739
740 1 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
741 1
		$titleData = [];
742
743 1
		$fullURL = $this->getFullTitleURL($title_url);
744 1
		$content = $this->get_content($fullURL);
745
746 1
		$data = $this->parseTitleDataDOM(
747
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 744 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
748
			$title_url,
749 1
			"//h2[@class='aname']",
750 1
			"(//table[@id='listing']/tr)[last()]",
751 1
			"td[2]",
752 1
			"td[1]/a"
753
		);
754 1
		if($data) {
755 1
			$titleData['title'] = $data['nodes_title']->textContent;
756
757 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
758
759 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
760
		}
761
762 1
		return (!empty($titleData) ? $titleData : NULL);
763
	}
764
}
765
766
class MangaStream extends Site_Model {
767
	public $site          = 'MangaStream';
768
	public $titleFormat   = '/^[a-z0-9_]+$/';
769
	public $chapterFormat = '/^(.*?)\/[0-9]+$/';
770
771
	public function getFullTitleURL(string $title_url) : string {
772
		return "https://mangastream.com/manga/{$title_url}/";
773
	}
774
775
	public function getChapterData(string $title_url, string $chapter) : array {
776
		return [
777
			'url'    => "https://mangastream.com/r/{$title_url}/{$chapter}",
778
			'number' => 'c'.explode('/', $chapter)[0]
779
		];
780
	}
781
782 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
783
		$titleData = [];
784
785
		$fullURL = $this->getFullTitleURL($title_url);
786
		$content = $this->get_content($fullURL);
787
788
		$data = $this->parseTitleDataDOM(
789
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 786 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
790
			$title_url,
791
			"//div[contains(@class, 'content')]/div[1]/h1",
792
			"//div[contains(@class, 'content')]/div[1]/table/tr[2]",
793
			"td[2]",
794
			"td[1]/a",
795
			"<h1>Page Not Found</h1>"
796
		);
797
		if($data) {
798
			$titleData['title'] = $data['nodes_title']->textContent;
799
800
			$titleData['latest_chapter'] = preg_replace('/^.*\/(.*?\/[0-9]+)\/[0-9]+$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
801
802
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
803
		}
804
805
		return (!empty($titleData) ? $titleData : NULL);
806
	}
807
}
808
809
class WebToons extends Site_Model {
810
	/* Webtoons.com has a very weird and pointless URL format.
811
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
812
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
813
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
814
815
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
816
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
817
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
818
819
	   FORMATS:
820
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
821
	   CHAPTER:   ID:--:CHAPTER_N
822
	*/
823
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
824
825
	public $site          = 'WebToons';
826
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi)$/';
827
	public $chapterFormat = '/^[0-9]+:--:.*$/';
828
829
	public function getFullTitleURL(string $title_url) : string {
830
		$title_parts = explode(':--:', $title_url);
831
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}/";
832
	}
833
834
	public function getChapterData(string $title_url, string $chapter) : array {
835
		$title_parts   = explode(':--:', $title_url);
836
		$chapter_parts = explode(':--:', $chapter);
837
838
		return [
839
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
840
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
841
		];
842
	}
843
844 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
845 1
		$titleData = [];
846
847
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
848
849
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
850 1
		$title_parts = explode(':--:', $title_url);
851 1
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
852
853 1
		$content = $this->get_content($fullURL);
854 1
		$data = $content['body'];
855 1
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
856 1
			$xml = simplexml_load_string($data) or die("Error: Cannot create object");
857 1
			if(isset($xml->{'channel'}->item[0])) {
858 1
				$titleData['title'] = trim((string) $xml->{'channel'}->title);
859
860 1
				$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
861 1
				$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
862 1
				$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
863
			}
864
		} else {
865
			log_message('error', "Series missing? (WebToons): {$title_url}");
866
			return NULL;
867
		}
868
869 1
		return (!empty($titleData) ? $titleData : NULL);
870
	}
871
}
872
873
class KissManga extends Site_Model {
874
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
875
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
876
877
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
878
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
879
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
880
881
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
882
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
883
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
884
	*/
885
886
	public $site          = 'KissManga';
887
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
888
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
889
890
	public function getFullTitleURL(string $title_url) : string {
891
		return "http://kissmanga.com/Manga/{$title_url}";
892
	}
893
894 View Code Duplication
	public function getChapterData(string $title_url, string $chapter) : array {
895
		$chapter_parts = explode(':--:', $chapter);
896
897
		return [
898
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
899
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
900
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
901
		];
902
	}
903
904
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
905
		$titleData = [];
906
907
		//Check if cookiejar is a day old (so we can know if something went wrong)
908
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
909
		$cookie_last_updated = filemtime($cookiejar_path);
910
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
911
912
			$fullURL = $this->getFullTitleURL($title_url);
913
914
			$content = $this->get_content($fullURL, '', $cookiejar_path);
915
			$data = $content['body'];
916
			if(strpos($data, 'containerRoot') !== FALSE) {
917
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
918
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
919
920
				$dom = new DOMDocument();
921
				libxml_use_internal_errors(true);
922
				$dom->loadHTML($data);
923
				libxml_use_internal_errors(false);
924
925
				$xpath = new DOMXPath($dom);
926
927
				$nodes_title = $xpath->query("//a[@class='bigChar']");
928
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
929
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
930
					$titleData['title'] = $nodes_title->item(0)->textContent;
931
932
					$firstRow      = $nodes_row->item(0);
933
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
934
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
935
936
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
937
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
938
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
939
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
940
				}
941
			} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
942
				//TODO: Throw ERRORS;
943
			}
944
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
945
			//Do nothing, wait until next update.
946
			//TODO: NAG ADMIN??
947
		}
948
949
		return (!empty($titleData) ? $titleData : NULL);
950
	}
951
}
952
953
class GameOfScanlation extends Site_Model {
954
	public $site          = 'GameOfScanlation';
955
	public $titleFormat   = '/^[a-z0-9\.-]+$/';
956
	public $chapterFormat = '/^[a-z0-9\.-]+$/';
957
958
	public function getFullTitleURL(string $title_url) : string {
959
		/* NOTE: GoS is a bit weird in that it has two separate title URL formats. One uses /projects/ and the other uses /fourms/.
960
		         The bad thing is these are interchangeable, despite them showing the exact same listing page.
961
		         Thankfully the title_url of manga which use /forums/ seem to be appended with ".%ID%" which means we can easily check them. */
962
963
		if (strpos($title_url, '.') !== FALSE) {
964
			$format = "https://gameofscanlation.moe/forums/{$title_url}/";
965
		} else {
966
			$format = "https://gameofscanlation.moe/projects/{$title_url}/";
967
		}
968
		return $format;
969
	}
970
971
	public function getChapterData(string $title_url, string $chapter) : array {
972
		return [
973
			'url'    => "https://gameofscanlation.moe/projects/".preg_replace("/\\.[0-9]+$/", "", $title_url).'/'.$chapter.'/',
974
			'number' => preg_replace("/chapter-/", "c", preg_replace("/\\.[0-9]+$/", "", $chapter))
975
		];
976
	}
977
978 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
979
		$titleData = [];
980
981
		$fullURL = $this->getFullTitleURL($title_url);
982
983
		$content = $this->get_content($fullURL);
984
985
		$data = $this->parseTitleDataDOM(
986
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 983 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
987
			$title_url,
988
			"//meta[@property='og:title']",
989
			"//ol[@class='discussionListItems']/li[1]/div[@class='home_list']/ul/li/div[@class='list_press_text']",
990
			"p[@class='author']/span|p[@class='author']/abbr",
991
			"p[@class='text_work']/a"
992
		);
993
		if($data) {
994
			$titleData['title'] = trim(html_entity_decode($data['nodes_title']->getAttribute('content')));
995
996
			$titleData['latest_chapter'] = preg_replace('/^projects\/.*?\/(.*?)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
997
998
			$titleData['last_updated'] =  date("Y-m-d H:i:s",(int) $data['nodes_latest']->getAttribute('title'));
999
		}
1000
1001
		return (!empty($titleData) ? $titleData : NULL);
1002
	}
1003
}
1004
1005
class MangaCow extends Site_Model {
1006
	public $site          = 'MangaCow';
1007
	public $titleFormat   = '/^[a-zA-Z0-9_]+$/';
1008
	public $chapterFormat = '/^[0-9]+$/';
1009
1010 1
	public function getFullTitleURL(string $title_url) : string {
1011 1
		return "http://mngcow.co/{$title_url}/";
1012
	}
1013
1014
	public function getChapterData(string $title_url, string $chapter) : array {
1015
		return [
1016
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
1017
			'number' => "c{$chapter}"
1018
		];
1019
	}
1020
1021 1 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1022 1
		$titleData = [];
1023
1024 1
		$fullURL = $this->getFullTitleURL($title_url);
1025
1026 1
		$content = $this->get_content($fullURL);
1027
1028 1
		$data = $this->parseTitleDataDOM(
1029
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1026 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1030
			$title_url,
1031 1
			"//h4",
1032 1
			"//ul[contains(@class, 'mng_chp')]/li[1]/a[1]",
1033 1
			"b[@class='dte']",
1034 1
			"",
1035 1
			"404 Page Not Found"
1036
		);
1037 1
		if($data) {
1038 1
			$titleData['title'] = trim($data['nodes_title']->textContent);
1039
1040 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
1041
1042 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) substr($data['nodes_latest']->getAttribute('title'), 13)));
1043
		}
1044
1045 1
		return (!empty($titleData) ? $titleData : NULL);
1046
	}
1047
}
1048
1049
/*** FoolSlide sites ***/
1050
1051 View Code Duplication
class KireiCake extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1052
	public $site          = 'KireiCake';
1053
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1054
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1055
1056 1
	public function getFullTitleURL(string $title_url) : string {
1057 1
		return "https://reader.kireicake.com/series/{$title_url}";
1058
	}
1059
1060
	public function getChapterData(string $title_url, string $chapter) : array {
1061
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1062
		$chapter_parts = explode('/', $chapter);
1063
		return [
1064
			'url'    => "https://reader.kireicake.com/read/{$title_url}/{$chapter}/",
1065
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1066
		];
1067
	}
1068
1069 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1070 1
		$fullURL = $this->getFullTitleURL($title_url);
1071 1
		return $this->parseFoolSlide($fullURL, $title_url);
1072
	}
1073
}
1074
1075 View Code Duplication
class SeaOtterScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1076
	public $site          = 'SeaOtterScans';
1077
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1078
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1079
1080 1
	public function getFullTitleURL(string $title_url) : string {
1081 1
		return "https://reader.seaotterscans.com/series/{$title_url}";
1082
	}
1083
1084
	public function getChapterData(string $title_url, string $chapter) : array {
1085
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1086
		$chapter_parts = explode('/', $chapter);
1087
		return [
1088
			'url'    => "https://reader.seaotterscans.com/read/{$title_url}/{$chapter}/",
1089
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1090
		];
1091
	}
1092
1093 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1094 1
		$fullURL = $this->getFullTitleURL($title_url);
1095 1
		return $this->parseFoolSlide($fullURL, $title_url);
1096
	}
1097
}
1098
1099 View Code Duplication
class HelveticaScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1100
	public $site          = 'HelveticaScans';
1101
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1102
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1103
1104 1
	public function getFullTitleURL(string $title_url) : string {
1105 1
		return "http://helveticascans.com/reader/series/{$title_url}";
1106
	}
1107
1108
	public function getChapterData(string $title_url, string $chapter) : array {
1109
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1110
		$chapter_parts = explode('/', $chapter);
1111
		return [
1112
			'url'    => "http://helveticascans.com/reader/read/{$title_url}/{$chapter}/",
1113
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1114
		];
1115
	}
1116
1117 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1118 1
		$fullURL = $this->getFullTitleURL($title_url);
1119 1
		return $this->parseFoolSlide($fullURL, $title_url);
1120
	}
1121
}
1122
1123 View Code Duplication
class SenseScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1124
	public $site          = 'SenseScans';
1125
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1126
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1127
1128 1
	public function getFullTitleURL(string $title_url) : string {
1129 1
		return "http://reader.sensescans.com/series/{$title_url}";
1130
	}
1131
1132
	public function getChapterData(string $title_url, string $chapter) : array {
1133
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1134
		$chapter_parts = explode('/', $chapter);
1135
		return [
1136
			'url'    => "http://reader.sensescans.com/read/{$title_url}/{$chapter}/",
1137
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1138
		];
1139
	}
1140
1141 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1142 1
		$fullURL = $this->getFullTitleURL($title_url);
1143 1
		return $this->parseFoolSlide($fullURL, $title_url);
1144
	}
1145
}
1146
1147 View Code Duplication
class JaiminisBox extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1148
	public $site          = 'JaiminisBox';
1149
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1150
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1151
1152
	public function getFullTitleURL(string $title_url) : string {
1153
		return "https://jaiminisbox.com/reader/series/{$title_url}";
1154
	}
1155
1156
	public function getChapterData(string $title_url, string $chapter) : array {
1157
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1158
		$chapter_parts = explode('/', $chapter);
1159
		return [
1160
			'url'    => "https://jaiminisbox.com/reader/read/{$title_url}/{$chapter}/",
1161
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1162
		];
1163
	}
1164
1165
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1166
		$fullURL = $this->getFullTitleURL($title_url);
1167
		return $this->parseFoolSlide($fullURL, $title_url);
1168
	}
1169
}
1170
1171 View Code Duplication
class DokiFansubs extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1172
	public $site          = 'DokiFansubs';
1173
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1174
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1175
1176
	public function getFullTitleURL(string $title_url) : string {
1177
		return "https://kobato.hologfx.com/reader/series/{$title_url}";
1178
	}
1179
1180
	public function getChapterData(string $title_url, string $chapter) : array {
1181
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1182
		$chapter_parts = explode('/', $chapter);
1183
		return [
1184
			'url'    => "https://kobato.hologfx.com/reader/read/{$title_url}/{$chapter}/",
1185
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1186
		];
1187
	}
1188
1189
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1190
		$fullURL = $this->getFullTitleURL($title_url);
1191
		return $this->parseFoolSlide($fullURL, $title_url);
1192
	}
1193
}
1194