Completed
Push — master ( d8ce3e...a6a102 )
by Angus
04:15
created

MangaFox::getChapterData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 4
nc 1
nop 2
dl 0
loc 6
ccs 0
cts 0
cp 0
crap 2
rs 9.4285
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
abstract class Site_Model extends CI_Model {
4
	public $site          = '';
5
	public $titleFormat   = '';
6
	public $chapterFormat = '';
7
8 121
	public function __construct() {
9 121
		parent::__construct();
10
11 121
		$this->load->database();
12 121
	}
13
14
	abstract public function getFullTitleURL(string $title_url) : string;
15
16
	abstract public function getChapterData(string $title_url, string $chapter) : array;
17
18
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
19
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
20
21
	public function isValidTitleURL(string $title_url) : bool {
22
		$success = (bool) preg_match($this->titleFormat, $title_url);
23
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
24
		return $success;
25
	}
26
	public function isValidChapter(string $chapter) : bool {
27
		$success = (bool) preg_match($this->chapterFormat, $chapter);
28
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
29
		return $success;
30
	}
31
32 19
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
33 19
		$ch = curl_init();
34 19
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
35 19
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
36
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
37 19
		curl_setopt($ch, CURLOPT_HEADER, 1);
38
39 19
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
40
41 19
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
42 19
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
43
44
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
45 19
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
46
47
		//TODO: Check in a while if this being enabled still causes issues
48
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
49
50 19
		curl_setopt($ch, CURLOPT_URL, $url);
51
52 19
		if($isPost) {
53
			curl_setopt($ch,CURLOPT_POST, count($postFields));
54
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
55
		}
56
57 19
		$response = curl_exec($ch);
58 19
		if($response === FALSE) {
59
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
60
			//FIXME: We don't always account for FALSE return
61
			return FALSE;
62
		}
63
64 19
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
65 19
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
66 19
		$headers     = http_parse_headers(substr($response, 0, $header_size));
67 19
		$body        = substr($response, $header_size);
68 19
		curl_close($ch);
69
70
		return [
71 19
			'headers'     => $headers,
72 19
			'status_code' => $status_code,
73 19
			'body'        => $body
74
		];
75
	}
76
77
	/**
78
	 * @param array  $content
79
	 * @param string $title_url
80
	 * @param string $node_title_string
81
	 * @param string $node_row_string
82
	 * @param string $node_latest_string
83
	 * @param string $node_chapter_string
84
	 * @param string $failure_string
85
	 *
86
	 * @return DOMElement[]|false
87
	 */
88 18
	final protected function parseTitleDataDOM(
89
		$content, string $title_url,
90
		string $node_title_string, string $node_row_string,
91
		string $node_latest_string, string $node_chapter_string,
92
		string $failure_string = "") {
93
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
94
95 18
		if(!is_array($content)) {
96
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
97
		} else {
98 18
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
99 18
			$status_code = $content['status_code'];
100 18
			$data        = $content['body'];
101
102 18
			if(!($status_code >= 200 && $status_code < 300)) {
103 8
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
104 10
			} else if(empty($data)) {
105
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
106 10
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
107 1
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
108
			} else {
109 9
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
110
111 9
				$dom = new DOMDocument();
112 9
				libxml_use_internal_errors(TRUE);
113 9
				$dom->loadHTML($data);
114 9
				libxml_use_internal_errors(FALSE);
115
116 9
				$xpath = new DOMXPath($dom);
117 9
				$nodes_title = $xpath->query($node_title_string);
118 9
				$nodes_row   = $xpath->query($node_row_string);
119 9
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
120 9
					$firstRow      = $nodes_row->item(0);
121 9
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
122
123 9
					if($node_chapter_string !== '') {
124 8
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
125
					} else {
126 1
						$nodes_chapter = $nodes_row;
127
					}
128
129 9
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
130
						return [
131 9
							'nodes_title'   => $nodes_title->item(0),
132 9
							'nodes_latest'  => $nodes_latest->item(0),
133 9
							'nodes_chapter' => $nodes_chapter->item(0)
134
						];
135
					} else {
136
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
137
					}
138
				} else {
139
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
140
				}
141
			}
142
		}
143
144 9
		return FALSE;
145
	}
146
147 9
	public function cleanTitleDataDOM(string $data) : string {
148 9
		return $data;
149
	}
150
151
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
152 8
	final public function parseFoolSlide(string $fullURL, string $title_url) {
153 8
		$titleData = [];
154
155 8
		if($content = $this->get_content($fullURL)) {
156 8
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
157
158 8
			$data = $this->parseTitleDataDOM(
159
				$content,
160
				$title_url,
161 8
				"//div[@class='large comic']/h1[@class='title']",
162 8
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
163 8
				"div[@class='meta_r']",
164 8
				"div[@class='title']/a"
165
			);
166 8
			if($data) {
167 4
				$titleData['title'] = trim($data['nodes_title']->textContent);
168
169 4
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
170 4
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
171
172 4
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
173
			}
174
		}
175
176 8
		return (!empty($titleData) ? $titleData : NULL);
177
	}
178
179
	public function doCustomFollow(string $data = "", array $extra = []) {}
180
	public function doCustomUpdate() {}
181
}
182
class Sites_Model extends CI_Model {
183
	//FIXME: Is it possible to automatically generate this in some way or another?
184
	public $MangaFox;
185
	public $MangaHere;
186
	public $Batoto;
187
	public $DynastyScans;
188
	public $MangaPanda;
189
	public $MangaStream;
190
	public $WebToons;
191
	public $KissManga;
192
	public $KireiCake;
193
	public $GameOfScanlation;
194
	public $MangaCow;
195
	public $SeaOtterScans;
196
	public $HelveticaScans;
197
	public $SenseScans;
198
	public $JaiminisBox;
199
200 121
	public function __construct() {
201 121
		parent::__construct();
202
203 121
		$this->MangaFox         = new MangaFox();
204 121
		$this->MangaHere        = new MangaHere();
205 121
		$this->Batoto           = new Batoto();
206 121
		$this->DynastyScans     = new DynastyScans();
207 121
		$this->MangaPanda       = new MangaPanda();
208 121
		$this->MangaStream      = new MangaStream();
209 121
		$this->WebToons         = new WebToons();
210 121
		$this->KissManga        = new KissManga();
211 121
		$this->KireiCake        = new KireiCake();
212 121
		$this->GameOfScanlation = new GameOfScanlation();
213 121
		$this->MangaCow         = new MangaCow();
214 121
		$this->SeaOtterScans    = new SeaOtterScans();
215 121
		$this->HelveticaScans   = new HelveticaScans();
216 121
		$this->SenseScans       = new SenseScans();
217 121
		$this->JaiminisBox      = new JaiminisBox();
218 121
	}
219
}
220
221
class MangaFox extends Site_Model {
222
	public $site          = 'MangaFox';
223
	public $titleFormat   = '/^[a-z0-9_]+$/';
224
	public $chapterFormat = '/^(?:v[0-9a-zA-Z]+\/)?c[0-9\.]+$/';
225
226 2
	public function getFullTitleURL(string $title_url) : string {
227 2
		return "http://mangafox.me/manga/{$title_url}/";
228
	}
229
230
	public function getChapterData(string $title_url, string $chapter) : array {
231
		return [
232
			'url'    => "http://mangafox.me/manga/{$title_url}/{$chapter}/",
233
			'number' => $chapter
234
		];
235
	}
236
237 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
238 2
		$titleData = [];
239
240 2
		$fullURL = $this->getFullTitleURL($title_url);
241 2
		$content = $this->get_content($fullURL);
242
243 2
		$data = $this->parseTitleDataDOM(
244
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 241 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
245
			$title_url,
246 2
			"//meta[@property='og:title']/@content",
247 2
			"//body/div[@id='page']/div[@class='left']/div[@id='chapters']/ul[1]/li[1]",
248 2
			"div/span[@class='date']",
249 2
			"div/h3/a"
250
		);
251 2
		if($data) {
252 1
			$titleData['title'] = html_entity_decode(substr($data['nodes_title']->textContent, 0, -6));
253
254 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
255 1
			$chapterURLSegments = explode('/', $link);
256 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
257 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
258
259 1
			if($firstGet) {
260
				$this->doCustomFollow($content['body']);
261
			}
262
		}
263
264 2
		return (!empty($titleData) ? $titleData : NULL);
265
	}
266
267
268
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
269
	public function doCustomFollow(string $data = "", array $extra = []) {
270
		preg_match('/var sid=(?<id>[0-9]+);/', $data, $matches);
271
272
		$formData = [
273
			'action' => 'add',
274
			'sid'    => $matches['id']
275
		];
276
277
		$cookies = [
278
			"mfvb_userid={$this->config->item('mangafox_userid')}",
279
			"mfvb_password={$this->config->item('mangafox_password')}",
280
		    "bmsort=last_chapter"
281
		];
282
		$content = $this->get_content('http://mangafox.me/ajax/bookmark.php', implode("; ", $cookies), "", TRUE, TRUE, $formData);
283
284
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
285
	}
286
	public function doCustomUpdate() {
287
		//http://mangafox.me/bookmark/?sort=last_chapter
288
	}
289
}
290
291
class MangaHere extends Site_Model {
292
	public $site          = 'MangaHere';
293
	public $titleFormat   = '/^[a-z0-9_]+$/';
294
	public $chapterFormat = '/^(?:v[0-9]+\/)?c[0-9]+(?:\.[0-9]+)?$/';
295
296 2
	public function getFullTitleURL(string $title_url) : string {
297 2
		return "http://www.mangahere.co/manga/{$title_url}/";
298
	}
299
300
	public function getChapterData(string $title, string $chapter) : array {
301
		return [
302
			'url'    => "http://www.mangahere.co/manga/{$title}/{$chapter}/",
303
			'number' => $chapter
304
		];
305
	}
306
307 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
308 2
		$titleData = [];
309
310 2
		$fullURL = $this->getFullTitleURL($title_url);
311 2
		$content = $this->get_content($fullURL);
312
313 2
		$data = $this->parseTitleDataDOM(
314
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 311 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
315
			$title_url,
316 2
			"//meta[@property='og:title']/@content",
317 2
			"//body/section/article/div/div[@class='manga_detail']/div[@class='detail_list']/ul[1]/li[1]",
318 2
			"span[@class='right']",
319 2
			"span[@class='left']/a",
320 2
			"<div class=\"error_text\">Sorry, the page you have requested can’t be found."
321
		);
322 2
		if($data) {
323 1
			$titleData['title'] = $data['nodes_title']->textContent;
324
325 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
326 1
			$chapterURLSegments = explode('/', $link);
327 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
328 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
329
		}
330
331 2
		return (!empty($titleData) ? $titleData : NULL);
332
	}
333
}
334
335
class Batoto extends Site_Model {
336
	//Batoto is a bit tricky to track. Unlike MangaFox and MangaHere, it doesn't store anything in the title_url, which means we have to get the data via other methods.
337
	//One problem we have though, is the tracker must support multiple sites, so this means we need to do some weird things to track Batoto.
338
	//title_url is stored like: "ID:--:LANGUAGE"
339
	//chapter_urls are stored like "CHAPTER_ID:--:CHAPTER_NUMBER"
340
341
	public $site          = 'Batoto';
342
	public $titleFormat   = '/^[0-9]+:--:(?:English|Spanish|French|German|Portuguese|Turkish|Indonesian|Greek|Filipino|Italian|Polish|Thai|Malay|Hungarian|Romanian|Arabic|Hebrew|Russian|Vietnamese|Dutch)$/';
343
	//FIXME: We're not validating the chapter name since we don't know what all the possible valid characters can be
344
	//       Preferably we'd just use /^[0-9a-z]+:--:(v[0-9]+\/)?c[0-9]+(\.[0-9]+)?$/
345
	public $chapterFormat = '/^[0-9a-z]+:--:.+$/';
346
347 1
	public function getFullTitleURL(string $title_string) : string {
348
		//FIXME: This does not point to the language specific title page. Should ask if it is possible to set LANG as arg?
349
		//FIXME: This points to a generic URL which will redirect according to the ID. Preferably we'd try and get the exact URL from the title, but we can't pass it here.
350 1
		$title_parts = explode(':--:', $title_string);
351 1
		return "http://bato.to/comic/_/comics/-r".$title_parts[0];
352
	}
353
354 View Code Duplication
	public function getChapterData(string $title_string, string $chapter) : array {
355
		//$title_string isn't used here.
356
357
		$chapter_parts = explode(':--:', $chapter);
358
		return [
359
			'url'    => "http://bato.to/reader#" . $chapter_parts[0],
360
			'number' => $chapter_parts[1]
361
		];
362
	}
363
364 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
365 1
		$titleData = [];
366
367 1
		$title_parts = explode(':--:', $title_url);
368 1
		$fullURL     = $this->getFullTitleURL($title_url);
369 1
		$lang        = $title_parts[1]; //TODO: Validate title_lang from array?
370
371
372
		//Bato.to is annoying and locks stuff behind auth. See: https://github.com/DakuTree/manga-tracker/issues/14#issuecomment-233830855
373
		$cookies = [
374 1
			"lang_option={$lang}",
375 1
			"member_id={$this->config->item('batoto_cookie_member_id')}",
376 1
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
377
		];
378 1
		$content = $this->get_content($fullURL, implode("; ", $cookies), "", TRUE);
379
380 1
		$data = $this->parseTitleDataDOM(
381
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($full...', $cookies), '', TRUE) on line 378 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
382
			$title_url,
383 1
			"//h1[@class='ipsType_pagetitle']",
384 1
			"//table[contains(@class, 'chapters_list')]/tbody/tr[2]",
385 1
			"td[last()]",
386 1
			"td/a[contains(@href,'reader')]",
387 1
			">Register now<"
388
		);
389 1
		if($data) {
390
			$titleData['title'] = html_entity_decode(trim($data['nodes_title']->textContent));
391
392
			///^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/
393
			preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($data['nodes_chapter']->nodeValue), $text);
394
			$titleData['latest_chapter'] = substr($data['nodes_chapter']->getAttribute('href'), 22) . ':--:' . ((!empty($text['volume']) ? 'v'.$text['volume'].'/' : '') . 'c'.$text['chapter'] . (!empty($text['extra']) ? '-'.$text['extra'] : ''));
395
396
			$dateString = $data['nodes_latest']->nodeValue;
397
			if($dateString == 'An hour ago') {
398
				$dateString = '1 hour ago';
399
			}
400
			$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
401
402
			if($firstGet && $lang == 'English') {
403
				//FIXME: English is forced due for now. See #78.
404
				$this->doCustomFollow($content['body'], ['id' => $title_parts[0], 'lang' => $lang]);
405
			}
406
		}
407
408 1
		return (!empty($titleData) ? $titleData : NULL);
409
	}
410
411
	public function cleanTitleDataDOM(string $data) : string {
412
		$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
413
		$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
414
		$data = preg_replace('/<div id=\'commentsStart\' class=\'ipsBox\'>[\s\S]+$/', '</div></div><!-- end mainContent -->', $data);
415
416
		return $data;
417
	}
418
419
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
420
	public function doCustomFollow(string $data = "", array $extra = []) {
421
		preg_match('/ipb\.vars\[\'secure_hash\'\]\s+=\s+\'(?<secure_hash>[0-9a-z]+)\';[\s\S]+ipb\.vars\[\'session_id\'\]\s+=\s+\'(?<session_id>[0-9a-z]+)\';/', $data, $text);
422
423
		$params = [
424
			's'          => $text['session_id'],
425
			'app'        => 'core',
426
			'module'     => 'ajax',
427
			'section'    => 'like',
428
			'do'         => 'save',
429
			'secure_key' => $text['secure_hash'],
430
			'f_app'      => 'ccs',
431
			'f_area'     => 'ccs_custom_database_3_records',
432
			'f_relid'    => $extra['id']
433
		];
434
		$formData = [
435
			'like_notify' => '0',
436
			'like_freq'   => 'immediate',
437
			'like_anon'   => '0'
438
		];
439
440
		$cookies = [
441
			"lang_option={$extra['lang']}",
442
			"member_id={$this->config->item('batoto_cookie_member_id')}",
443
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
444
		];
445
		$content = $this->get_content('http://bato.to/forums/index.php?'.http_build_query($params), implode("; ", $cookies), "", TRUE, TRUE, $formData);
446
447
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
448
	}
449
	public function doCustomUpdate() {
450
		$titleDataList = [];
451
452
		$cookies = [
453
			"lang_option=English", //FIXME: English is forced due for now. See #78.
454
			"member_id={$this->config->item('batoto_cookie_member_id')}",
455
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
456
		];
457
		$content = $this->get_content("http://bato.to/myfollows", implode("; ", $cookies), "", TRUE);
458
		if(!is_array($content)) {
459
			log_message('error', "{$this->site} /myfollows | Failed to grab URL (See above curl error)");
460
		} else {
461
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
462
			$status_code = $content['status_code'];
463
			$data        = $content['body'];
464
465
			if(!($status_code >= 200 && $status_code < 300)) {
466
				log_message('error', "{$this->site} /myfollows | Bad Status Code ({$status_code})");
467
			} else if(empty($data)) {
468
				log_message('error', "{$this->site} /myfollows | Data is empty? (Status code: {$status_code})");
469
			} else {
470
				$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
471
				$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
472
473
				$dom = new DOMDocument();
474
				libxml_use_internal_errors(TRUE);
475
				$dom->loadHTML($data);
476
				libxml_use_internal_errors(FALSE);
477
478
				$xpath      = new DOMXPath($dom);
479
				$nodes_rows = $xpath->query("//table[contains(@class, 'chapters_list')]/tbody/tr[position()>1]");
480
				if($nodes_rows->length > 0) {
481
					foreach($nodes_rows as $row) {
482
						$titleData = [];
483
484
						$nodes_title   = $xpath->query("td[2]/a[1]", $row);
485
						$nodes_chapter = $xpath->query("td[2]/a[2]", $row);
486
						$nodes_lang    = $xpath->query("td[3]/div", $row);
487
						$nodes_latest  = $xpath->query("td[5]", $row);
488
489
						if($nodes_lang->length === 1 && $nodes_lang->item(0)->getAttribute('title') == 'English') {
490
							if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
491
								$title = $nodes_title->item(0);
492
493
								preg_match('/(?<id>[0-9]+)$/', $title->getAttribute('href'), $title_url_arr);
494
								$title_url = "{$title_url_arr['id']}:--:English"; //FIXME: English is currently forced, see #78
495
496
								if(!array_key_exists($title_url, $titleDataList)) {
497
									$titleData['title'] = trim($title->textContent);
498
499
									$chapter = $nodes_chapter->item(0);
500
									preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($chapter->nodeValue), $text);
501
									$titleData['latest_chapter'] = substr($chapter->getAttribute('href'), 8) . ':--:' . ((!empty($text['volume']) ? 'v' . $text['volume'] . '/' : '') . 'c' . $text['chapter'] . (!empty($text['extra']) ? '-' . $text['extra'] : ''));
502
503
									$dateString = $nodes_latest->item(0)->nodeValue;
504
									if($dateString == 'An hour ago') {
505
										$dateString = '1 hour ago';
506
									}
507
									$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
508
509
510
									$titleDataList[$title_url] = $titleData;
511
								}
512
							} else {
513
								log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
514
							}
515
						}
516
					}
517
				} else {
518
					log_message('error', '{$this->site} | Following list is empty?');
519
				}
520
			}
521
		}
522
		return $titleDataList;
523
	}
524
}
525
526
class DynastyScans extends Site_Model {
527
	//FIXME: This has some major issues. SEE: https://github.com/DakuTree/manga-tracker/issues/58
528
529
	public $site          = 'DynastyScans';
530
	public $titleFormat   = '/^[a-z0-9_]+:--:(?:0|1)$/';
531
	public $chapterFormat = '/^[0-9a-z_]+$/';
532
533 1
	public function getFullTitleURL(string $title_string) : string {
534 1
		$title_parts = explode(':--:', $title_string);
535 1
		$url_type = ($title_parts[1] == '0' ? 'series' : 'chapters');
536
537 1
		return 'http://dynasty-scans.com/'.$url_type.'/'.$title_parts[0];
538
	}
539
540
	public function getChapterData(string $title_string, string $chapter) : array {
541
		$title_parts = explode(':--:', $title_string);
542
		/* Known chapter url formats (# is numbers):
543
		       chapters_#A_#B - Ch#A-#B
544
		       ch_#A          - Ch#A
545
		       ch_#A_#B       - Ch#A.#B
546
		       <NOTHING>      - Oneshot (This is passed as "oneshot")
547
		*/
548
549
		$chapterData = [
550
			'url'    => 'http://dynasty-scans.com/chapters/' . $title_parts[0].'_'.$chapter,
551
			'number' => ''
552
		];
553
554
		if($chapter == 'oneshot') {
555
			$chapterData['number'] = 'oneshot';
556
		} else {
557
			$chapter = preg_replace("/^([a-zA-Z]+)/", '$1_', $chapter);
558
			$chapterSegments = explode('_', $chapter);
559
			switch($chapterSegments[0]) {
560
				case 'ch':
561
					$chapterData['number'] = 'c'.$chapterSegments[1].(isset($chapterSegments[2]) && !empty($chapterSegments[2]) ? '.'.$chapterSegments[2] : '');
562
					break;
563
564
				case 'chapters':
565
					//This is barely ever used, but I have seen it.
566
					$chapterData['number'] = 'c'.$chapterSegments[1].'-'.$chapterSegments[2];
567
					break;
568
569
				default:
570
					//TODO: FALLBACK, ALERT ADMIN?
571
					$chapterData['number'] = $chapter;
572
					break;
573
			}
574
		}
575
		return $chapterData;
576
	}
577
578 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
579 1
		$titleData = [];
580
581 1
		$fullURL = $this->getFullTitleURL($title_url);
582 1
		$content = $this->get_content($fullURL);
583
584 1
		$title_parts = explode(':--:', $title_url);
585 1
		switch($title_parts[1]) {
586 1
			case '0':
587
				//Normal series.
588 1
				$data = $this->parseTitleDataDOM(
589
					$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 582 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
590
					$title_url,
591 1
					"//h2[@class='tag-title']/b[1]",
592 1
					"(//dl[@class='chapter-list']/dd[a[contains(@href,'/chapters/')]])[last()]",
593 1
					"small",
594 1
					"a[@class='name']"
595
				);
596 1
				if($data) {
597 1
					$titleData['title'] = $data['nodes_title']->textContent;
598
					//In cases where the series is a doujin, try and prepend the copyright.
599 1
					preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $content['body'], $matchesD);
600 1
					if(!empty($matchedD) && substr($matchesD[1], 0, -7) !== 'Original') {
0 ignored issues
show
Bug introduced by
The variable $matchedD seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
601
						$titleData['title'] = substr($matchesD[1], 0, -7).' - '.$titleData['title'];
602
					}
603
604 1
					$chapterURLSegments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
605 1
					if (strpos($chapterURLSegments[2], $title_parts[0]) !== false) {
606 1
						$titleData['latest_chapter'] = substr($chapterURLSegments[2], strlen($title_parts[0]) + 1);
607
					} else {
608
						$titleData['latest_chapter'] = $chapterURLSegments[2];
609
					}
610
611 1
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime(str_replace("'", '', substr((string) $data['nodes_latest']->textContent, 9))));
612
				}
613 1
				break;
614
615
			case '1':
616
				//Oneshot.
617
				$data = $content['body'];
618
619
				preg_match('/<b>.*<\/b>/', $data, $matchesT);
620
				preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $data, $matchesD);
621
				$titleData['title'] = (!empty($matchesD) ? ($matchesD[1] !== 'Original' ? $matchesD[1].' - ' : '') : '') . substr($matchesT[0], 3, -4);
622
623
				$titleData['latest_chapter'] = 'oneshot'; //This will never change
624
625
				preg_match('/<i class="icon-calendar"><\/i> (.*)<\/span>/', $data, $matches);
626
				$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime($matches[1]));
627
628
				//Oneshots are special, and really shouldn't need to be re-tracked
629
				$titleData['status'] = '2';
630
				break;
631
632
			default:
633
				//something went wrong
634
				break;
635
		}
636 1
		return (!empty($titleData) ? $titleData : NULL);
637
	}
638
}
639
640
class MangaPanda extends Site_Model {
641
	public $site          = 'MangaPanda';
642
	//NOTE: MangaPanda has manga pages under the root URL, so we need to filter out pages we know that aren't manga.
643
	public $titleFormat   = '/^(?!(?:latest|search|popular|random|alphabetical|privacy)$)([a-z0-9-]+)$/';
644
	public $chapterFormat = '/^[0-9]+$/';
645
646 2
	public function getFullTitleURL(string $title_url) : string {
647 2
		return "http://www.mangapanda.com/{$title_url}";
648
	}
649
650
	public function getChapterData(string $title_url, string $chapter) : array {
651
		return [
652
			'url'    => "http://www.mangapanda.com/{$title_url}/{$chapter}/",
653
			'number' => 'c'.$chapter
654
		];
655
	}
656
657 2 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
658 2
		$titleData = [];
659
660 2
		$fullURL = $this->getFullTitleURL($title_url);
661 2
		$content = $this->get_content($fullURL);
662
663 2
		$data = $this->parseTitleDataDOM(
664
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 661 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
665
			$title_url,
666 2
			"//h2[@class='aname']",
667 2
			"(//table[@id='listing']/tr)[last()]",
668 2
			"td[2]",
669 2
			"td[1]/a"
670
		);
671 2
		if($data) {
672 1
			$titleData['title'] = $data['nodes_title']->textContent;
673
674 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
675
676 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
677
		}
678
679 2
		return (!empty($titleData) ? $titleData : NULL);
680
	}
681
}
682
683
class MangaStream extends Site_Model {
684
	public $site          = 'MangaStream';
685
	public $titleFormat   = '/^[a-z0-9_]+$/';
686
	public $chapterFormat = '/^(.*?)\/[0-9]+$/';
687
688
	public function getFullTitleURL(string $title_url) : string {
689
		return "https://mangastream.com/manga/{$title_url}/";
690
	}
691
692
	public function getChapterData(string $title_url, string $chapter) : array {
693
		return [
694
			'url'    => "https://mangastream.com/r/{$title_url}/{$chapter}",
695
			'number' => 'c'.explode('/', $chapter)[0]
696
		];
697
	}
698
699 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
700
		$titleData = [];
701
702
		$fullURL = $this->getFullTitleURL($title_url);
703
		$content = $this->get_content($fullURL);
704
705
		$data = $this->parseTitleDataDOM(
706
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 703 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
707
			$title_url,
708
			"//div[contains(@class, 'content')]/div[1]/h1",
709
			"//div[contains(@class, 'content')]/div[1]/table/tr[2]",
710
			"td[2]",
711
			"td[1]/a",
712
			"<h1>Page Not Found</h1>"
713
		);
714
		if($data) {
715
			$titleData['title'] = $data['nodes_title']->textContent;
716
717
			$titleData['latest_chapter'] = preg_replace('/^.*\/(.*?\/[0-9]+)\/[0-9]+$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
718
719
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
720
		}
721
722
		return (!empty($titleData) ? $titleData : NULL);
723
	}
724
}
725
726
class WebToons extends Site_Model {
727
	/* Webtoons.com has a very weird and pointless URL format.
728
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
729
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
730
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
731
732
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
733
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
734
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
735
736
	   FORMATS:
737
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
738
	   CHAPTER:   ID:--:CHAPTER_N
739
	*/
740
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
741
742
	public $site          = 'WebToons';
743
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi)$/';
744
	public $chapterFormat = '/^[0-9]+:--:.*$/';
745
746
	public function getFullTitleURL(string $title_url) : string {
747
		$title_parts = explode(':--:', $title_url);
748
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}/";
749
	}
750
751
	public function getChapterData(string $title_url, string $chapter) : array {
752
		$title_parts   = explode(':--:', $title_url);
753
		$chapter_parts = explode(':--:', $chapter);
754
755
		return [
756
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
757
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
758
		];
759
	}
760
761 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
762 1
		$titleData = [];
763
764
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
765
766
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
767 1
		$title_parts = explode(':--:', $title_url);
768 1
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
769
770 1
		$content = $this->get_content($fullURL);
771 1
		$data = $content['body'];
772 1
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
773 1
			$xml = simplexml_load_string($data) or die("Error: Cannot create object");
774 1
			if(isset($xml->{'channel'}->item[0])) {
775 1
				$titleData['title'] = trim((string) $xml->{'channel'}->title);
776
777 1
				$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
778 1
				$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
779 1
				$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
780
			}
781
		} else {
782
			log_message('error', "Series missing? (WebToons): {$title_url}");
783
			return NULL;
784
		}
785
786 1
		return (!empty($titleData) ? $titleData : NULL);
787
	}
788
}
789
790
class KissManga extends Site_Model {
791
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
792
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
793
794
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
795
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
796
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
797
798
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
799
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
800
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
801
	*/
802
803
	public $site          = 'KissManga';
804
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
805
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
806
807
	public function getFullTitleURL(string $title_url) : string {
808
		return "http://kissmanga.com/Manga/{$title_url}";
809
	}
810
811 View Code Duplication
	public function getChapterData(string $title_url, string $chapter) : array {
812
		$chapter_parts = explode(':--:', $chapter);
813
814
		return [
815
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
816
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
817
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
818
		];
819
	}
820
821
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
822
		$titleData = [];
823
824
		//Check if cookiejar is a day old (so we can know if something went wrong)
825
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
826
		$cookie_last_updated = filemtime($cookiejar_path);
827
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
828
829
			$fullURL = $this->getFullTitleURL($title_url);
830
831
			$content = $this->get_content($fullURL, '', $cookiejar_path);
832
			$data = $content['body'];
833
			if(strpos($data, 'containerRoot') !== FALSE) {
834
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
835
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
836
837
				$dom = new DOMDocument();
838
				libxml_use_internal_errors(true);
839
				$dom->loadHTML($data);
840
				libxml_use_internal_errors(false);
841
842
				$xpath = new DOMXPath($dom);
843
844
				$nodes_title = $xpath->query("//a[@class='bigChar']");
845
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
846
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
847
					$titleData['title'] = $nodes_title->item(0)->textContent;
848
849
					$firstRow      = $nodes_row->item(0);
850
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
851
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
852
853
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
854
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
855
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
856
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
857
				}
858
			} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
859
				//TODO: Throw ERRORS;
860
			}
861
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
862
			//Do nothing, wait until next update.
863
			//TODO: NAG ADMIN??
864
		}
865
866
		return (!empty($titleData) ? $titleData : NULL);
867
	}
868
}
869
870
class GameOfScanlation extends Site_Model {
871
	public $site          = 'GameOfScanlation';
872
	public $titleFormat   = '/^[a-z0-9\.-]+$/';
873
	public $chapterFormat = '/^[a-z0-9\.-]+$/';
874
875
	public function getFullTitleURL(string $title_url) : string {
876
		/* NOTE: GoS is a bit weird in that it has two separate title URL formats. One uses /projects/ and the other uses /fourms/.
877
		         The bad thing is these are interchangeable, despite them showing the exact same listing page.
878
		         Thankfully the title_url of manga which use /forums/ seem to be appended with ".%ID%" which means we can easily check them. */
879
880
		if (strpos($title_url, '.') !== FALSE) {
881
			$format = "https://gameofscanlation.moe/forums/{$title_url}/";
882
		} else {
883
			$format = "https://gameofscanlation.moe/projects/{$title_url}/";
884
		}
885
		return $format;
886
	}
887
888
	public function getChapterData(string $title_url, string $chapter) : array {
889
		return [
890
			'url'    => "https://gameofscanlation.moe/projects/".preg_replace("/\\.[0-9]+$/", "", $title_url).'/'.$chapter.'/',
891
			'number' => preg_replace("/chapter-/", "c", preg_replace("/\\.[0-9]+$/", "", $chapter))
892
		];
893
	}
894
895 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
896
		$titleData = [];
897
898
		$fullURL = $this->getFullTitleURL($title_url);
899
900
		$content = $this->get_content($fullURL);
901
902
		$data = $this->parseTitleDataDOM(
903
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 900 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
904
			$title_url,
905
			"//meta[@property='og:title']",
906
			"//ol[@class='discussionListItems']/li[1]/div[@class='home_list']/ul/li/div[@class='list_press_text']",
907
			"p[@class='author']/span|p[@class='author']/abbr",
908
			"p[@class='text_work']/a"
909
		);
910
		if($data) {
911
			$titleData['title'] = trim(html_entity_decode($data['nodes_title']->getAttribute('content')));
912
913
			$titleData['latest_chapter'] = preg_replace('/^projects\/.*?\/(.*?)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
914
915
			$titleData['last_updated'] =  date("Y-m-d H:i:s",(int) $data['nodes_latest']->getAttribute('title'));
916
		}
917
918
		return (!empty($titleData) ? $titleData : NULL);
919
	}
920
}
921
922
class MangaCow extends Site_Model {
923
	public $site          = 'MangaCow';
924
	public $titleFormat   = '/^[a-zA-Z0-9_]+$/';
925
	public $chapterFormat = '/^[0-9]+$/';
926
927 2
	public function getFullTitleURL(string $title_url) : string {
928 2
		return "http://mngcow.co/{$title_url}/";
929
	}
930
931
	public function getChapterData(string $title_url, string $chapter) : array {
932
		return [
933
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
934
			'number' => "c{$chapter}"
935
		];
936
	}
937
938 2 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
939 2
		$titleData = [];
940
941 2
		$fullURL = $this->getFullTitleURL($title_url);
942
943 2
		$content = $this->get_content($fullURL);
944
945 2
		$data = $this->parseTitleDataDOM(
946
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 943 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
947
			$title_url,
948 2
			"//h4",
949 2
			"//ul[contains(@class, 'mng_chp')]/li[1]/a[1]",
950 2
			"b[@class='dte']",
951 2
			"",
952 2
			"404 Page Not Found"
953
		);
954 2
		if($data) {
955 1
			$titleData['title'] = trim($data['nodes_title']->textContent);
956
957 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
958
959 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) substr($data['nodes_latest']->getAttribute('title'), 13)));
960
		}
961
962 2
		return (!empty($titleData) ? $titleData : NULL);
963
	}
964
}
965
966
/*** FoolSlide sites ***/
967
968 View Code Duplication
class KireiCake extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
969
	public $site          = 'KireiCake';
970
	public $titleFormat   = '/^[a-z0-9_-]+$/';
971
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
972
973 2
	public function getFullTitleURL(string $title_url) : string {
974 2
		return "https://reader.kireicake.com/series/{$title_url}";
975
	}
976
977
	public function getChapterData(string $title_url, string $chapter) : array {
978
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
979
		$chapter_parts = explode('/', $chapter);
980
		return [
981
			'url'    => "https://reader.kireicake.com/read/{$title_url}/{$chapter}/",
982
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
983
		];
984
	}
985
986 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
987 2
		$fullURL = $this->getFullTitleURL($title_url);
988 2
		return $this->parseFoolSlide($fullURL, $title_url);
989
	}
990
}
991
992 View Code Duplication
class SeaOtterScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
993
	public $site          = 'SeaOtterScans';
994
	public $titleFormat   = '/^[a-z0-9_-]+$/';
995
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
996
997 2
	public function getFullTitleURL(string $title_url) : string {
998 2
		return "https://reader.seaotterscans.com/series/{$title_url}";
999
	}
1000
1001
	public function getChapterData(string $title_url, string $chapter) : array {
1002
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1003
		$chapter_parts = explode('/', $chapter);
1004
		return [
1005
			'url'    => "https://reader.seaotterscans.com/read/{$title_url}/{$chapter}/",
1006
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1007
		];
1008
	}
1009
1010 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1011 2
		$fullURL = $this->getFullTitleURL($title_url);
1012 2
		return $this->parseFoolSlide($fullURL, $title_url);
1013
	}
1014
}
1015
1016 View Code Duplication
class HelveticaScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1017
	public $site          = 'HelveticaScans';
1018
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1019
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1020
1021 2
	public function getFullTitleURL(string $title_url) : string {
1022 2
		return "http://helveticascans.com/reader/series/{$title_url}";
1023
	}
1024
1025
	public function getChapterData(string $title_url, string $chapter) : array {
1026
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1027
		$chapter_parts = explode('/', $chapter);
1028
		return [
1029
			'url'    => "http://helveticascans.com/reader/read/{$title_url}/{$chapter}/",
1030
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1031
		];
1032
	}
1033
1034 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1035 2
		$fullURL = $this->getFullTitleURL($title_url);
1036 2
		return $this->parseFoolSlide($fullURL, $title_url);
1037
	}
1038
}
1039
1040 View Code Duplication
class SenseScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1041
	public $site          = 'SenseScans';
1042
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1043
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1044
1045 2
	public function getFullTitleURL(string $title_url) : string {
1046 2
		return "http://reader.sensescans.com/series/{$title_url}";
1047
	}
1048
1049
	public function getChapterData(string $title_url, string $chapter) : array {
1050
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1051
		$chapter_parts = explode('/', $chapter);
1052
		return [
1053
			'url'    => "http://reader.sensescans.com/read/{$title_url}/{$chapter}/",
1054
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1055
		];
1056
	}
1057
1058 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1059 2
		$fullURL = $this->getFullTitleURL($title_url);
1060 2
		return $this->parseFoolSlide($fullURL, $title_url);
1061
	}
1062
}
1063
1064 View Code Duplication
class JaiminisBox extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1065
	public $site          = 'JaiminisBox';
1066
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1067
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1068
1069
	public function getFullTitleURL(string $title_url) : string {
1070
		return "https://jaiminisbox.com/reader/series/{$title_url}";
1071
	}
1072
1073
	public function getChapterData(string $title_url, string $chapter) : array {
1074
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1075
		$chapter_parts = explode('/', $chapter);
1076
		return [
1077
			'url'    => "https://jaiminisbox.com/reader/read/{$title_url}/{$chapter}/",
1078
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1079
		];
1080
	}
1081
1082
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1083
		$fullURL = $this->getFullTitleURL($title_url);
1084
		return $this->parseFoolSlide($fullURL, $title_url);
1085
	}
1086
}
1087