Completed
Push — master ( 1dfd21...36ade4 )
by Angus
03:51
created

Site_Model::isValidChapter()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 1
dl 0
loc 5
ccs 0
cts 0
cp 0
crap 6
rs 9.4285
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
abstract class Site_Model extends CI_Model {
4
	public $site          = '';
5
	public $titleFormat   = '';
6
	public $chapterFormat = '';
7
8 121
	public function __construct() {
9 121
		parent::__construct();
10
11 121
		$this->load->database();
12 121
	}
13
14
	abstract public function getFullTitleURL(string $title_url) : string;
15
16
	abstract public function getChapterData(string $title_url, string $chapter) : array;
17
18
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
19
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
20
21
	public function isValidTitleURL(string $title_url) : bool {
22
		$success = (bool) preg_match($this->titleFormat, $title_url);
23
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
24
		return $success;
25
	}
26
	public function isValidChapter(string $chapter) : bool {
27
		$success = (bool) preg_match($this->chapterFormat, $chapter);
28
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
29
		return $success;
30
	}
31
32 19
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
33 19
		$ch = curl_init();
34 19
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
35 19
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
36
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
37 19
		curl_setopt($ch, CURLOPT_HEADER, 1);
38
39 19
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
40
41 19
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
42 19
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
43
44
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
45 19
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
46
47
		//TODO: Check in a while if this being enabled still causes issues
48
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
49
50 19
		curl_setopt($ch, CURLOPT_URL, $url);
51
52 19
		if($isPost) {
53
			curl_setopt($ch,CURLOPT_POST, count($postFields));
54
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
55
		}
56
57 19
		$response = curl_exec($ch);
58 19
		if($response === FALSE) {
59
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
60
			//FIXME: We don't always account for FALSE return
61
			return FALSE;
62
		}
63
64 19
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
65 19
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
66 19
		$headers     = http_parse_headers(substr($response, 0, $header_size));
67 19
		$body        = substr($response, $header_size);
68 19
		curl_close($ch);
69
70
		return [
71 19
			'headers'     => $headers,
72 19
			'status_code' => $status_code,
73 19
			'body'        => $body
74
		];
75
	}
76
77
	/**
78
	 * @param array  $content
79
	 * @param string $title_url
80
	 * @param string $node_title_string
81
	 * @param string $node_row_string
82
	 * @param string $node_latest_string
83
	 * @param string $node_chapter_string
84
	 * @param string $failure_string
85
	 *
86
	 * @return DOMElement[]|false
87
	 */
88 18
	final protected function parseTitleDataDOM(
89
		$content, string $title_url,
90
		string $node_title_string, string $node_row_string,
91
		string $node_latest_string, string $node_chapter_string,
92
		string $failure_string = "") {
93
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
94
95 18
		if(!is_array($content)) {
96
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
97
		} else {
98 18
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
99 18
			$status_code = $content['status_code'];
100 18
			$data        = $content['body'];
101
102 18
			if(!($status_code >= 200 && $status_code < 300)) {
103 8
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
104 10
			} else if(empty($data)) {
105
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
106 10
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
107 1
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
108
			} else {
109 9
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
110
111 9
				$dom = new DOMDocument();
112 9
				libxml_use_internal_errors(TRUE);
113 9
				$dom->loadHTML($data);
114 9
				libxml_use_internal_errors(FALSE);
115
116 9
				$xpath = new DOMXPath($dom);
117 9
				$nodes_title = $xpath->query($node_title_string);
118 9
				$nodes_row   = $xpath->query($node_row_string);
119 9
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
120 9
					$firstRow      = $nodes_row->item(0);
121 9
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
122
123 9
					if($node_chapter_string !== '') {
124 8
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
125
					} else {
126 1
						$nodes_chapter = $nodes_row;
127
					}
128
129 9
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
130
						return [
131 9
							'nodes_title'   => $nodes_title->item(0),
132 9
							'nodes_latest'  => $nodes_latest->item(0),
133 9
							'nodes_chapter' => $nodes_chapter->item(0)
134
						];
135
					} else {
136
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
137
					}
138
				} else {
139
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
140
				}
141
			}
142
		}
143
144 9
		return FALSE;
145
	}
146
147 9
	public function cleanTitleDataDOM(string $data) : string {
148 9
		return $data;
149
	}
150
151
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
152 8
	final public function parseFoolSlide(string $fullURL, string $title_url) {
153 8
		$titleData = [];
154
155 8
		if($content = $this->get_content($fullURL)) {
156 8
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
157
158 8
			$data = $this->parseTitleDataDOM(
159
				$content,
160
				$title_url,
161 8
				"//div[@class='large comic']/h1[@class='title']",
162 8
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
163 8
				"div[@class='meta_r']",
164 8
				"div[@class='title']/a"
165
			);
166 8
			if($data) {
167 4
				$titleData['title'] = trim($data['nodes_title']->textContent);
168
169 4
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
170 4
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
171
172 4
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
173
			}
174
		}
175
176 8
		return (!empty($titleData) ? $titleData : NULL);
177
	}
178
179
	public function doCustomFollow(string $data = "", array $extra = []) {}
180
	public function doCustomUpdate() {}
181
	public function doCustomCheck(string $oldChapter, string $newChapter) {}
182
}
183
class Sites_Model extends CI_Model {
184
	//FIXME: Is it possible to automatically generate this in some way or another?
185
	public $MangaFox;
186
	public $MangaHere;
187
	public $Batoto;
188
	public $DynastyScans;
189
	public $MangaPanda;
190
	public $MangaStream;
191
	public $WebToons;
192
	public $KissManga;
193
	public $KireiCake;
194
	public $GameOfScanlation;
195
	public $MangaCow;
196
	public $SeaOtterScans;
197
	public $HelveticaScans;
198
	public $SenseScans;
199
	public $JaiminisBox;
200
201 121
	public function __construct() {
202 121
		parent::__construct();
203
204 121
		$this->MangaFox         = new MangaFox();
205 121
		$this->MangaHere        = new MangaHere();
206 121
		$this->Batoto           = new Batoto();
207 121
		$this->DynastyScans     = new DynastyScans();
208 121
		$this->MangaPanda       = new MangaPanda();
209 121
		$this->MangaStream      = new MangaStream();
210 121
		$this->WebToons         = new WebToons();
211 121
		$this->KissManga        = new KissManga();
212 121
		$this->KireiCake        = new KireiCake();
213 121
		$this->GameOfScanlation = new GameOfScanlation();
214 121
		$this->MangaCow         = new MangaCow();
215 121
		$this->SeaOtterScans    = new SeaOtterScans();
216 121
		$this->HelveticaScans   = new HelveticaScans();
217 121
		$this->SenseScans       = new SenseScans();
218 121
		$this->JaiminisBox      = new JaiminisBox();
219 121
	}
220
}
221
222
class MangaFox extends Site_Model {
223
	public $site          = 'MangaFox';
224
	public $titleFormat   = '/^[a-z0-9_]+$/';
225
	public $chapterFormat = '/^(?:v[0-9a-zA-Z]+\/)?c[0-9\.]+$/';
226
227 2
	public function getFullTitleURL(string $title_url) : string {
228 2
		return "http://mangafox.me/manga/{$title_url}/";
229
	}
230
231
	public function getChapterData(string $title_url, string $chapter) : array {
232
		return [
233
			'url'    => "http://mangafox.me/manga/{$title_url}/{$chapter}/",
234
			'number' => $chapter
235
		];
236
	}
237
238 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
239 2
		$titleData = [];
240
241 2
		$fullURL = $this->getFullTitleURL($title_url);
242 2
		$content = $this->get_content($fullURL);
243
244 2
		$data = $this->parseTitleDataDOM(
245
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 242 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
246
			$title_url,
247 2
			"//meta[@property='og:title']/@content",
248 2
			"//body/div[@id='page']/div[@class='left']/div[@id='chapters']/ul[1]/li[1]",
249 2
			"div/span[@class='date']",
250 2
			"div/h3/a"
251
		);
252 2
		if($data) {
253 1
			$titleData['title'] = html_entity_decode(substr($data['nodes_title']->textContent, 0, -6));
254
255 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
256 1
			$chapterURLSegments = explode('/', $link);
257 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
258 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
259
260 1
			if($firstGet) {
261
				$this->doCustomFollow($content['body']);
262
			}
263
		}
264
265 2
		return (!empty($titleData) ? $titleData : NULL);
266
	}
267
268
269
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
270
	public function doCustomFollow(string $data = "", array $extra = []) {
271
		preg_match('/var sid=(?<id>[0-9]+);/', $data, $matches);
272
273
		$formData = [
274
			'action' => 'add',
275
			'sid'    => $matches['id']
276
		];
277
278
		$cookies = [
279
			"mfvb_userid={$this->config->item('mangafox_userid')}",
280
			"mfvb_password={$this->config->item('mangafox_password')}",
281
		    "bmsort=last_chapter"
282
		];
283
		$content = $this->get_content('http://mangafox.me/ajax/bookmark.php', implode("; ", $cookies), "", TRUE, TRUE, $formData);
284
285
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
286
	}
287
	public function doCustomUpdate() {
288
		$titleDataList = [];
289
290
		$cookies = [
291
			"mfvb_userid={$this->config->item('mangafox_userid')}",
292
			"mfvb_password={$this->config->item('mangafox_password')}",
293
			"bmsort=last_chapter",
294
			"bmorder=za"
295
		];
296
		$content = $this->get_content('http://mangafox.me/bookmark/?status=currentreading&sort=last_chapter&order=za', implode("; ", $cookies), "", TRUE);
297
298
		if(!is_array($content)) {
299
			log_message('error', "{$this->site} /bookmark | Failed to grab URL (See above curl error)");
300
		} else {
301
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
302
			$status_code = $content['status_code'];
303
			$data        = $content['body'];
304
305
			if(!($status_code >= 200 && $status_code < 300)) {
306
				log_message('error', "{$this->site} /bookmark | Bad Status Code ({$status_code})");
307
			} else if(empty($data)) {
308
				log_message('error', "{$this->site} /bookmark | Data is empty? (Status code: {$status_code})");
309
			} else {
310
				$data = preg_replace('/^[\s\S]+<ul id="bmlist">/', '<ul id="bmlist">', $data);
311
				$data = preg_replace('/<!-- end of bookmark -->[\s\S]+$/', '<!-- end of bookmark -->', $data);
312
313
				$dom = new DOMDocument();
314
				libxml_use_internal_errors(TRUE);
315
				$dom->loadHTML($data);
316
				libxml_use_internal_errors(FALSE);
317
318
				$xpath      = new DOMXPath($dom);
319
				$nodes_rows = $xpath->query("//ul[@id='bmlist']/li/div[@class='series_grp' and h2[@class='title']/span[@class='updatedch'] and dl]");
320
				if($nodes_rows->length > 0) {
321
					foreach($nodes_rows as $row) {
322
						$titleData = [];
323
324
						$nodes_title   = $xpath->query("h2[@class='title']/a[contains(@class, 'title')]", $row);
325
						$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
326
						$nodes_latest  = $xpath->query("dl/dt[1]/em/span[@class='timing']", $row);
327
328
						if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
329
							$title = $nodes_title->item(0);
330
331
							$titleData['title'] = trim($title->textContent);
332
333
334
							$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
335
							$chapterURLSegments = explode('/', $link);
336
							$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
337
338
							$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
339
340
							$title_url = explode('/', $title->getAttribute('href'))[4];
341
							$titleDataList[$title_url] = $titleData;
342
						} else {
343
							log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
344
						}
345
					}
346
				} else {
347
					log_message('error', '{$this->site} | Following list is empty?');
348
				}
349
			}
350
		}
351
		return $titleDataList;
352
	}
353
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {
354
		$status = FALSE;
355
356
		$oldChapterSegments = explode('/', $oldChapterString);
357
		$newChapterSegments = explode('/', $newChapterString);
358
359
		//Although it's rare, it's possible for new chapters to have a different amount of segments to the oldChapter (or vice versa).
360
		//Since this can cause errors, we just throw a fail.
361
		$count = count($newChapterSegments);
362
		if($count === count($oldChapterSegments)) {
363
			if($count === 2) {
364
				//FIXME: This feels like a mess.
365
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
366
				$newVolume = substr(array_shift($newChapterSegments), 1);
367
368
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA'])) $oldVolume = 999;
369
				if(in_array($newVolume, ['TBD', 'TBA', 'NA'])) $newVolume = 999;
370
371
				$oldVolume = floatval($oldVolume);
372
				$newVolume = floatval($newVolume);
373
			} else {
374
				$oldVolume = 0;
375
				$newVolume = 0;
376
			}
377
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
378
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
379
380
			if($newVolume > $oldVolume) {
381
				//$newVolume is higher, no need to check chapter.
382
				$status = TRUE;
383
			} elseif($newChapter > $oldChapter) {
384
				//$newVolume isn't higher, but chapter is.
385
				$status = TRUE;
386
			}
387
		}
388
389
		return $status;
390
	}
391
}
392
393
class MangaHere extends Site_Model {
394
	public $site          = 'MangaHere';
395
	public $titleFormat   = '/^[a-z0-9_]+$/';
396
	public $chapterFormat = '/^(?:v[0-9]+\/)?c[0-9]+(?:\.[0-9]+)?$/';
397
398 2
	public function getFullTitleURL(string $title_url) : string {
399 2
		return "http://www.mangahere.co/manga/{$title_url}/";
400
	}
401
402
	public function getChapterData(string $title, string $chapter) : array {
403
		return [
404
			'url'    => "http://www.mangahere.co/manga/{$title}/{$chapter}/",
405
			'number' => $chapter
406
		];
407
	}
408
409 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
410 2
		$titleData = [];
411
412 2
		$fullURL = $this->getFullTitleURL($title_url);
413 2
		$content = $this->get_content($fullURL);
414
415 2
		$data = $this->parseTitleDataDOM(
416
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 413 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
417
			$title_url,
418 2
			"//meta[@property='og:title']/@content",
419 2
			"//body/section/article/div/div[@class='manga_detail']/div[@class='detail_list']/ul[1]/li[1]",
420 2
			"span[@class='right']",
421 2
			"span[@class='left']/a",
422 2
			"<div class=\"error_text\">Sorry, the page you have requested can’t be found."
423
		);
424 2
		if($data) {
425 1
			$titleData['title'] = $data['nodes_title']->textContent;
426
427 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
428 1
			$chapterURLSegments = explode('/', $link);
429 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
430 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
431
		}
432
433 2
		return (!empty($titleData) ? $titleData : NULL);
434
	}
435
}
436
437
class Batoto extends Site_Model {
438
	//Batoto is a bit tricky to track. Unlike MangaFox and MangaHere, it doesn't store anything in the title_url, which means we have to get the data via other methods.
439
	//One problem we have though, is the tracker must support multiple sites, so this means we need to do some weird things to track Batoto.
440
	//title_url is stored like: "ID:--:LANGUAGE"
441
	//chapter_urls are stored like "CHAPTER_ID:--:CHAPTER_NUMBER"
442
443
	public $site          = 'Batoto';
444
	public $titleFormat   = '/^[0-9]+:--:(?:English|Spanish|French|German|Portuguese|Turkish|Indonesian|Greek|Filipino|Italian|Polish|Thai|Malay|Hungarian|Romanian|Arabic|Hebrew|Russian|Vietnamese|Dutch)$/';
445
	//FIXME: We're not validating the chapter name since we don't know what all the possible valid characters can be
446
	//       Preferably we'd just use /^[0-9a-z]+:--:(v[0-9]+\/)?c[0-9]+(\.[0-9]+)?$/
447
	public $chapterFormat = '/^[0-9a-z]+:--:.+$/';
448
449 1
	public function getFullTitleURL(string $title_string) : string {
450
		//FIXME: This does not point to the language specific title page. Should ask if it is possible to set LANG as arg?
451
		//FIXME: This points to a generic URL which will redirect according to the ID. Preferably we'd try and get the exact URL from the title, but we can't pass it here.
452 1
		$title_parts = explode(':--:', $title_string);
453 1
		return "http://bato.to/comic/_/comics/-r".$title_parts[0];
454
	}
455
456 View Code Duplication
	public function getChapterData(string $title_string, string $chapter) : array {
457
		//$title_string isn't used here.
458
459
		$chapter_parts = explode(':--:', $chapter);
460
		return [
461
			'url'    => "http://bato.to/reader#" . $chapter_parts[0],
462
			'number' => $chapter_parts[1]
463
		];
464
	}
465
466 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
467 1
		$titleData = [];
468
469 1
		$title_parts = explode(':--:', $title_url);
470 1
		$fullURL     = $this->getFullTitleURL($title_url);
471 1
		$lang        = $title_parts[1]; //TODO: Validate title_lang from array?
472
473
474
		//Bato.to is annoying and locks stuff behind auth. See: https://github.com/DakuTree/manga-tracker/issues/14#issuecomment-233830855
475
		$cookies = [
476 1
			"lang_option={$lang}",
477 1
			"member_id={$this->config->item('batoto_cookie_member_id')}",
478 1
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
479
		];
480 1
		$content = $this->get_content($fullURL, implode("; ", $cookies), "", TRUE);
481
482 1
		$data = $this->parseTitleDataDOM(
483
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($full...', $cookies), '', TRUE) on line 480 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
484
			$title_url,
485 1
			"//h1[@class='ipsType_pagetitle']",
486 1
			"//table[contains(@class, 'chapters_list')]/tbody/tr[2]",
487 1
			"td[last()]",
488 1
			"td/a[contains(@href,'reader')]",
489 1
			">Register now<"
490
		);
491 1
		if($data) {
492
			$titleData['title'] = html_entity_decode(trim($data['nodes_title']->textContent));
493
494
			///^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/
495
			preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($data['nodes_chapter']->nodeValue), $text);
496
			$titleData['latest_chapter'] = substr($data['nodes_chapter']->getAttribute('href'), 22) . ':--:' . ((!empty($text['volume']) ? 'v'.$text['volume'].'/' : '') . 'c'.$text['chapter'] . (!empty($text['extra']) ? '-'.$text['extra'] : ''));
497
498
			$dateString = $data['nodes_latest']->nodeValue;
499
			if($dateString == 'An hour ago') {
500
				$dateString = '1 hour ago';
501
			}
502
			$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
503
504
			if($firstGet && $lang == 'English') {
505
				//FIXME: English is forced due for now. See #78.
506
				$this->doCustomFollow($content['body'], ['id' => $title_parts[0], 'lang' => $lang]);
507
			}
508
		}
509
510 1
		return (!empty($titleData) ? $titleData : NULL);
511
	}
512
513
	public function cleanTitleDataDOM(string $data) : string {
514
		$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
515
		$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
516
		$data = preg_replace('/<div id=\'commentsStart\' class=\'ipsBox\'>[\s\S]+$/', '</div></div><!-- end mainContent -->', $data);
517
518
		return $data;
519
	}
520
521
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
522
	public function doCustomFollow(string $data = "", array $extra = []) {
523
		preg_match('/ipb\.vars\[\'secure_hash\'\]\s+=\s+\'(?<secure_hash>[0-9a-z]+)\';[\s\S]+ipb\.vars\[\'session_id\'\]\s+=\s+\'(?<session_id>[0-9a-z]+)\';/', $data, $text);
524
525
		$params = [
526
			's'          => $text['session_id'],
527
			'app'        => 'core',
528
			'module'     => 'ajax',
529
			'section'    => 'like',
530
			'do'         => 'save',
531
			'secure_key' => $text['secure_hash'],
532
			'f_app'      => 'ccs',
533
			'f_area'     => 'ccs_custom_database_3_records',
534
			'f_relid'    => $extra['id']
535
		];
536
		$formData = [
537
			'like_notify' => '0',
538
			'like_freq'   => 'immediate',
539
			'like_anon'   => '0'
540
		];
541
542
		$cookies = [
543
			"lang_option={$extra['lang']}",
544
			"member_id={$this->config->item('batoto_cookie_member_id')}",
545
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
546
		];
547
		$content = $this->get_content('http://bato.to/forums/index.php?'.http_build_query($params), implode("; ", $cookies), "", TRUE, TRUE, $formData);
548
549
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
550
	}
551
	public function doCustomUpdate() {
552
		return FALSE; /* FIXME: Bato.to is disabled for custom updates until we can fix https://github.com/DakuTree/manga-tracker/issues/78#issuecomment-269833624 */
553
554
		$titleDataList = [];
0 ignored issues
show
Unused Code introduced by
/* FIXME: Bato.to is dis...itleDataList = array(); does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
555
556
		$cookies = [
557
			"lang_option=English", //FIXME: English is forced due for now. See #78.
558
			"member_id={$this->config->item('batoto_cookie_member_id')}",
559
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
560
		];
561
		$content = $this->get_content("http://bato.to/myfollows", implode("; ", $cookies), "", TRUE);
562
		if(!is_array($content)) {
563
			log_message('error', "{$this->site} /myfollows | Failed to grab URL (See above curl error)");
564
		} else {
565
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
566
			$status_code = $content['status_code'];
567
			$data        = $content['body'];
568
569
			if(!($status_code >= 200 && $status_code < 300)) {
570
				log_message('error', "{$this->site} /myfollows | Bad Status Code ({$status_code})");
571
			} else if(empty($data)) {
572
				log_message('error', "{$this->site} /myfollows | Data is empty? (Status code: {$status_code})");
573
			} else {
574
				$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
575
				$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
576
577
				$dom = new DOMDocument();
578
				libxml_use_internal_errors(TRUE);
579
				$dom->loadHTML($data);
580
				libxml_use_internal_errors(FALSE);
581
582
				$xpath      = new DOMXPath($dom);
583
				$nodes_rows = $xpath->query("//table[contains(@class, 'chapters_list')]/tbody/tr[position()>1]");
584
				if($nodes_rows->length > 0) {
585
					foreach($nodes_rows as $row) {
586
						$titleData = [];
587
588
						$nodes_title   = $xpath->query("td[2]/a[1]", $row);
589
						$nodes_chapter = $xpath->query("td[2]/a[2]", $row);
590
						$nodes_lang    = $xpath->query("td[3]/div", $row);
591
						$nodes_latest  = $xpath->query("td[5]", $row);
592
593
						if($nodes_lang->length === 1 && $nodes_lang->item(0)->getAttribute('title') == 'English') {
594
							if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
595
								$title = $nodes_title->item(0);
596
597
								preg_match('/(?<id>[0-9]+)$/', $title->getAttribute('href'), $title_url_arr);
598
								$title_url = "{$title_url_arr['id']}:--:English"; //FIXME: English is currently forced, see #78
599
600
								if(!array_key_exists($title_url, $titleDataList)) {
601
									$titleData['title'] = trim($title->textContent);
602
603
									$chapter = $nodes_chapter->item(0);
604
									preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($chapter->nodeValue), $text);
605
									$titleData['latest_chapter'] = substr($chapter->getAttribute('href'), 8) . ':--:' . ((!empty($text['volume']) ? 'v' . $text['volume'] . '/' : '') . 'c' . $text['chapter'] . (!empty($text['extra']) ? '-' . $text['extra'] : ''));
606
607
									$dateString = $nodes_latest->item(0)->nodeValue;
608
									if($dateString == 'An hour ago') {
609
										$dateString = '1 hour ago';
610
									}
611
									$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
612
613
614
									$titleDataList[$title_url] = $titleData;
615
								}
616
							} else {
617
								log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
618
							}
619
						}
620
					}
621
				} else {
622
					log_message('error', '{$this->site} | Following list is empty?');
623
				}
624
			}
625
		}
626
		return $titleDataList;
627
	}
628
}
629
630
class DynastyScans extends Site_Model {
631
	//FIXME: This has some major issues. SEE: https://github.com/DakuTree/manga-tracker/issues/58
632
633
	public $site          = 'DynastyScans';
634
	public $titleFormat   = '/^[a-z0-9_]+:--:(?:0|1)$/';
635
	public $chapterFormat = '/^[0-9a-z_]+$/';
636
637 1
	public function getFullTitleURL(string $title_string) : string {
638 1
		$title_parts = explode(':--:', $title_string);
639 1
		$url_type = ($title_parts[1] == '0' ? 'series' : 'chapters');
640
641 1
		return 'http://dynasty-scans.com/'.$url_type.'/'.$title_parts[0];
642
	}
643
644
	public function getChapterData(string $title_string, string $chapter) : array {
645
		$title_parts = explode(':--:', $title_string);
646
		/* Known chapter url formats (# is numbers):
647
		       chapters_#A_#B - Ch#A-#B
648
		       ch_#A          - Ch#A
649
		       ch_#A_#B       - Ch#A.#B
650
		       <NOTHING>      - Oneshot (This is passed as "oneshot")
651
		*/
652
653
		$chapterData = [
654
			'url'    => 'http://dynasty-scans.com/chapters/' . $title_parts[0].'_'.$chapter,
655
			'number' => ''
656
		];
657
658
		if($chapter == 'oneshot') {
659
			$chapterData['number'] = 'oneshot';
660
		} else {
661
			$chapter = preg_replace("/^([a-zA-Z]+)/", '$1_', $chapter);
662
			$chapterSegments = explode('_', $chapter);
663
			switch($chapterSegments[0]) {
664
				case 'ch':
665
					$chapterData['number'] = 'c'.$chapterSegments[1].(isset($chapterSegments[2]) && !empty($chapterSegments[2]) ? '.'.$chapterSegments[2] : '');
666
					break;
667
668
				case 'chapters':
669
					//This is barely ever used, but I have seen it.
670
					$chapterData['number'] = 'c'.$chapterSegments[1].'-'.$chapterSegments[2];
671
					break;
672
673
				default:
674
					//TODO: FALLBACK, ALERT ADMIN?
675
					$chapterData['number'] = $chapter;
676
					break;
677
			}
678
		}
679
		return $chapterData;
680
	}
681
682 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
683 1
		$titleData = [];
684
685 1
		$fullURL = $this->getFullTitleURL($title_url);
686 1
		$content = $this->get_content($fullURL);
687
688 1
		$title_parts = explode(':--:', $title_url);
689 1
		switch($title_parts[1]) {
690 1
			case '0':
691
				//Normal series.
692 1
				$data = $this->parseTitleDataDOM(
693
					$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 686 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
694
					$title_url,
695 1
					"//h2[@class='tag-title']/b[1]",
696 1
					"(//dl[@class='chapter-list']/dd[a[contains(@href,'/chapters/')]])[last()]",
697 1
					"small",
698 1
					"a[@class='name']"
699
				);
700 1
				if($data) {
701 1
					$titleData['title'] = $data['nodes_title']->textContent;
702
					//In cases where the series is a doujin, try and prepend the copyright.
703 1
					preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $content['body'], $matchesD);
704 1
					if(!empty($matchedD) && substr($matchesD[1], 0, -7) !== 'Original') {
0 ignored issues
show
Bug introduced by
The variable $matchedD seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
705
						$titleData['title'] = substr($matchesD[1], 0, -7).' - '.$titleData['title'];
706
					}
707
708 1
					$chapterURLSegments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
709 1
					if (strpos($chapterURLSegments[2], $title_parts[0]) !== false) {
710 1
						$titleData['latest_chapter'] = substr($chapterURLSegments[2], strlen($title_parts[0]) + 1);
711
					} else {
712
						$titleData['latest_chapter'] = $chapterURLSegments[2];
713
					}
714
715 1
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime(str_replace("'", '', substr((string) $data['nodes_latest']->textContent, 9))));
716
				}
717 1
				break;
718
719
			case '1':
720
				//Oneshot.
721
				$data = $content['body'];
722
723
				preg_match('/<b>.*<\/b>/', $data, $matchesT);
724
				preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $data, $matchesD);
725
				$titleData['title'] = (!empty($matchesD) ? ($matchesD[1] !== 'Original' ? $matchesD[1].' - ' : '') : '') . substr($matchesT[0], 3, -4);
726
727
				$titleData['latest_chapter'] = 'oneshot'; //This will never change
728
729
				preg_match('/<i class="icon-calendar"><\/i> (.*)<\/span>/', $data, $matches);
730
				$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime($matches[1]));
731
732
				//Oneshots are special, and really shouldn't need to be re-tracked
733
				$titleData['status'] = '2';
734
				break;
735
736
			default:
737
				//something went wrong
738
				break;
739
		}
740 1
		return (!empty($titleData) ? $titleData : NULL);
741
	}
742
}
743
744
class MangaPanda extends Site_Model {
745
	public $site          = 'MangaPanda';
746
	//NOTE: MangaPanda has manga pages under the root URL, so we need to filter out pages we know that aren't manga.
747
	public $titleFormat   = '/^(?!(?:latest|search|popular|random|alphabetical|privacy)$)([a-z0-9-]+)$/';
748
	public $chapterFormat = '/^[0-9]+$/';
749
750 2
	public function getFullTitleURL(string $title_url) : string {
751 2
		return "http://www.mangapanda.com/{$title_url}";
752
	}
753
754
	public function getChapterData(string $title_url, string $chapter) : array {
755
		return [
756
			'url'    => "http://www.mangapanda.com/{$title_url}/{$chapter}/",
757
			'number' => 'c'.$chapter
758
		];
759
	}
760
761 2 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
762 2
		$titleData = [];
763
764 2
		$fullURL = $this->getFullTitleURL($title_url);
765 2
		$content = $this->get_content($fullURL);
766
767 2
		$data = $this->parseTitleDataDOM(
768
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 765 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
769
			$title_url,
770 2
			"//h2[@class='aname']",
771 2
			"(//table[@id='listing']/tr)[last()]",
772 2
			"td[2]",
773 2
			"td[1]/a"
774
		);
775 2
		if($data) {
776 1
			$titleData['title'] = $data['nodes_title']->textContent;
777
778 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
779
780 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
781
		}
782
783 2
		return (!empty($titleData) ? $titleData : NULL);
784
	}
785
}
786
787
class MangaStream extends Site_Model {
788
	public $site          = 'MangaStream';
789
	public $titleFormat   = '/^[a-z0-9_]+$/';
790
	public $chapterFormat = '/^(.*?)\/[0-9]+$/';
791
792
	public function getFullTitleURL(string $title_url) : string {
793
		return "https://mangastream.com/manga/{$title_url}/";
794
	}
795
796
	public function getChapterData(string $title_url, string $chapter) : array {
797
		return [
798
			'url'    => "https://mangastream.com/r/{$title_url}/{$chapter}",
799
			'number' => 'c'.explode('/', $chapter)[0]
800
		];
801
	}
802
803 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
804
		$titleData = [];
805
806
		$fullURL = $this->getFullTitleURL($title_url);
807
		$content = $this->get_content($fullURL);
808
809
		$data = $this->parseTitleDataDOM(
810
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 807 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
811
			$title_url,
812
			"//div[contains(@class, 'content')]/div[1]/h1",
813
			"//div[contains(@class, 'content')]/div[1]/table/tr[2]",
814
			"td[2]",
815
			"td[1]/a",
816
			"<h1>Page Not Found</h1>"
817
		);
818
		if($data) {
819
			$titleData['title'] = $data['nodes_title']->textContent;
820
821
			$titleData['latest_chapter'] = preg_replace('/^.*\/(.*?\/[0-9]+)\/[0-9]+$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
822
823
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
824
		}
825
826
		return (!empty($titleData) ? $titleData : NULL);
827
	}
828
}
829
830
class WebToons extends Site_Model {
831
	/* Webtoons.com has a very weird and pointless URL format.
832
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
833
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
834
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
835
836
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
837
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
838
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
839
840
	   FORMATS:
841
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
842
	   CHAPTER:   ID:--:CHAPTER_N
843
	*/
844
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
845
846
	public $site          = 'WebToons';
847
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi)$/';
848
	public $chapterFormat = '/^[0-9]+:--:.*$/';
849
850
	public function getFullTitleURL(string $title_url) : string {
851
		$title_parts = explode(':--:', $title_url);
852
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}/";
853
	}
854
855
	public function getChapterData(string $title_url, string $chapter) : array {
856
		$title_parts   = explode(':--:', $title_url);
857
		$chapter_parts = explode(':--:', $chapter);
858
859
		return [
860
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
861
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
862
		];
863
	}
864
865 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
866 1
		$titleData = [];
867
868
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
869
870
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
871 1
		$title_parts = explode(':--:', $title_url);
872 1
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
873
874 1
		$content = $this->get_content($fullURL);
875 1
		$data = $content['body'];
876 1
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
877 1
			$xml = simplexml_load_string($data) or die("Error: Cannot create object");
878 1
			if(isset($xml->{'channel'}->item[0])) {
879 1
				$titleData['title'] = trim((string) $xml->{'channel'}->title);
880
881 1
				$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
882 1
				$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
883 1
				$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
884
			}
885
		} else {
886
			log_message('error', "Series missing? (WebToons): {$title_url}");
887
			return NULL;
888
		}
889
890 1
		return (!empty($titleData) ? $titleData : NULL);
891
	}
892
}
893
894
class KissManga extends Site_Model {
895
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
896
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
897
898
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
899
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
900
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
901
902
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
903
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
904
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
905
	*/
906
907
	public $site          = 'KissManga';
908
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
909
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
910
911
	public function getFullTitleURL(string $title_url) : string {
912
		return "http://kissmanga.com/Manga/{$title_url}";
913
	}
914
915 View Code Duplication
	public function getChapterData(string $title_url, string $chapter) : array {
916
		$chapter_parts = explode(':--:', $chapter);
917
918
		return [
919
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
920
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
921
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
922
		];
923
	}
924
925
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
926
		$titleData = [];
927
928
		//Check if cookiejar is a day old (so we can know if something went wrong)
929
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
930
		$cookie_last_updated = filemtime($cookiejar_path);
931
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
932
933
			$fullURL = $this->getFullTitleURL($title_url);
934
935
			$content = $this->get_content($fullURL, '', $cookiejar_path);
936
			$data = $content['body'];
937
			if(strpos($data, 'containerRoot') !== FALSE) {
938
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
939
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
940
941
				$dom = new DOMDocument();
942
				libxml_use_internal_errors(true);
943
				$dom->loadHTML($data);
944
				libxml_use_internal_errors(false);
945
946
				$xpath = new DOMXPath($dom);
947
948
				$nodes_title = $xpath->query("//a[@class='bigChar']");
949
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
950
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
951
					$titleData['title'] = $nodes_title->item(0)->textContent;
952
953
					$firstRow      = $nodes_row->item(0);
954
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
955
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
956
957
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
958
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
959
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
960
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
961
				}
962
			} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
963
				//TODO: Throw ERRORS;
964
			}
965
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
966
			//Do nothing, wait until next update.
967
			//TODO: NAG ADMIN??
968
		}
969
970
		return (!empty($titleData) ? $titleData : NULL);
971
	}
972
}
973
974
class GameOfScanlation extends Site_Model {
975
	public $site          = 'GameOfScanlation';
976
	public $titleFormat   = '/^[a-z0-9\.-]+$/';
977
	public $chapterFormat = '/^[a-z0-9\.-]+$/';
978
979
	public function getFullTitleURL(string $title_url) : string {
980
		/* NOTE: GoS is a bit weird in that it has two separate title URL formats. One uses /projects/ and the other uses /fourms/.
981
		         The bad thing is these are interchangeable, despite them showing the exact same listing page.
982
		         Thankfully the title_url of manga which use /forums/ seem to be appended with ".%ID%" which means we can easily check them. */
983
984
		if (strpos($title_url, '.') !== FALSE) {
985
			$format = "https://gameofscanlation.moe/forums/{$title_url}/";
986
		} else {
987
			$format = "https://gameofscanlation.moe/projects/{$title_url}/";
988
		}
989
		return $format;
990
	}
991
992
	public function getChapterData(string $title_url, string $chapter) : array {
993
		return [
994
			'url'    => "https://gameofscanlation.moe/projects/".preg_replace("/\\.[0-9]+$/", "", $title_url).'/'.$chapter.'/',
995
			'number' => preg_replace("/chapter-/", "c", preg_replace("/\\.[0-9]+$/", "", $chapter))
996
		];
997
	}
998
999 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1000
		$titleData = [];
1001
1002
		$fullURL = $this->getFullTitleURL($title_url);
1003
1004
		$content = $this->get_content($fullURL);
1005
1006
		$data = $this->parseTitleDataDOM(
1007
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1004 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1008
			$title_url,
1009
			"//meta[@property='og:title']",
1010
			"//ol[@class='discussionListItems']/li[1]/div[@class='home_list']/ul/li/div[@class='list_press_text']",
1011
			"p[@class='author']/span|p[@class='author']/abbr",
1012
			"p[@class='text_work']/a"
1013
		);
1014
		if($data) {
1015
			$titleData['title'] = trim(html_entity_decode($data['nodes_title']->getAttribute('content')));
1016
1017
			$titleData['latest_chapter'] = preg_replace('/^projects\/.*?\/(.*?)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
1018
1019
			$titleData['last_updated'] =  date("Y-m-d H:i:s",(int) $data['nodes_latest']->getAttribute('title'));
1020
		}
1021
1022
		return (!empty($titleData) ? $titleData : NULL);
1023
	}
1024
}
1025
1026
class MangaCow extends Site_Model {
1027
	public $site          = 'MangaCow';
1028
	public $titleFormat   = '/^[a-zA-Z0-9_]+$/';
1029
	public $chapterFormat = '/^[0-9]+$/';
1030
1031 2
	public function getFullTitleURL(string $title_url) : string {
1032 2
		return "http://mngcow.co/{$title_url}/";
1033
	}
1034
1035
	public function getChapterData(string $title_url, string $chapter) : array {
1036
		return [
1037
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
1038
			'number' => "c{$chapter}"
1039
		];
1040
	}
1041
1042 2 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1043 2
		$titleData = [];
1044
1045 2
		$fullURL = $this->getFullTitleURL($title_url);
1046
1047 2
		$content = $this->get_content($fullURL);
1048
1049 2
		$data = $this->parseTitleDataDOM(
1050
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1047 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1051
			$title_url,
1052 2
			"//h4",
1053 2
			"//ul[contains(@class, 'mng_chp')]/li[1]/a[1]",
1054 2
			"b[@class='dte']",
1055 2
			"",
1056 2
			"404 Page Not Found"
1057
		);
1058 2
		if($data) {
1059 1
			$titleData['title'] = trim($data['nodes_title']->textContent);
1060
1061 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
1062
1063 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) substr($data['nodes_latest']->getAttribute('title'), 13)));
1064
		}
1065
1066 2
		return (!empty($titleData) ? $titleData : NULL);
1067
	}
1068
}
1069
1070
/*** FoolSlide sites ***/
1071
1072 View Code Duplication
class KireiCake extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1073
	public $site          = 'KireiCake';
1074
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1075
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1076
1077 2
	public function getFullTitleURL(string $title_url) : string {
1078 2
		return "https://reader.kireicake.com/series/{$title_url}";
1079
	}
1080
1081
	public function getChapterData(string $title_url, string $chapter) : array {
1082
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1083
		$chapter_parts = explode('/', $chapter);
1084
		return [
1085
			'url'    => "https://reader.kireicake.com/read/{$title_url}/{$chapter}/",
1086
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1087
		];
1088
	}
1089
1090 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1091 2
		$fullURL = $this->getFullTitleURL($title_url);
1092 2
		return $this->parseFoolSlide($fullURL, $title_url);
1093
	}
1094
}
1095
1096 View Code Duplication
class SeaOtterScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1097
	public $site          = 'SeaOtterScans';
1098
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1099
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1100
1101 2
	public function getFullTitleURL(string $title_url) : string {
1102 2
		return "https://reader.seaotterscans.com/series/{$title_url}";
1103
	}
1104
1105
	public function getChapterData(string $title_url, string $chapter) : array {
1106
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1107
		$chapter_parts = explode('/', $chapter);
1108
		return [
1109
			'url'    => "https://reader.seaotterscans.com/read/{$title_url}/{$chapter}/",
1110
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1111
		];
1112
	}
1113
1114 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1115 2
		$fullURL = $this->getFullTitleURL($title_url);
1116 2
		return $this->parseFoolSlide($fullURL, $title_url);
1117
	}
1118
}
1119
1120 View Code Duplication
class HelveticaScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1121
	public $site          = 'HelveticaScans';
1122
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1123
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1124
1125 2
	public function getFullTitleURL(string $title_url) : string {
1126 2
		return "http://helveticascans.com/reader/series/{$title_url}";
1127
	}
1128
1129
	public function getChapterData(string $title_url, string $chapter) : array {
1130
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1131
		$chapter_parts = explode('/', $chapter);
1132
		return [
1133
			'url'    => "http://helveticascans.com/reader/read/{$title_url}/{$chapter}/",
1134
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1135
		];
1136
	}
1137
1138 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1139 2
		$fullURL = $this->getFullTitleURL($title_url);
1140 2
		return $this->parseFoolSlide($fullURL, $title_url);
1141
	}
1142
}
1143
1144 View Code Duplication
class SenseScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1145
	public $site          = 'SenseScans';
1146
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1147
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1148
1149 2
	public function getFullTitleURL(string $title_url) : string {
1150 2
		return "http://reader.sensescans.com/series/{$title_url}";
1151
	}
1152
1153
	public function getChapterData(string $title_url, string $chapter) : array {
1154
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1155
		$chapter_parts = explode('/', $chapter);
1156
		return [
1157
			'url'    => "http://reader.sensescans.com/read/{$title_url}/{$chapter}/",
1158
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1159
		];
1160
	}
1161
1162 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1163 2
		$fullURL = $this->getFullTitleURL($title_url);
1164 2
		return $this->parseFoolSlide($fullURL, $title_url);
1165
	}
1166
}
1167
1168 View Code Duplication
class JaiminisBox extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1169
	public $site          = 'JaiminisBox';
1170
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1171
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1172
1173
	public function getFullTitleURL(string $title_url) : string {
1174
		return "https://jaiminisbox.com/reader/series/{$title_url}";
1175
	}
1176
1177
	public function getChapterData(string $title_url, string $chapter) : array {
1178
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1179
		$chapter_parts = explode('/', $chapter);
1180
		return [
1181
			'url'    => "https://jaiminisbox.com/reader/read/{$title_url}/{$chapter}/",
1182
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1183
		];
1184
	}
1185
1186
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1187
		$fullURL = $this->getFullTitleURL($title_url);
1188
		return $this->parseFoolSlide($fullURL, $title_url);
1189
	}
1190
}
1191