Completed
Push — master ( da8f3c...d8ce3e )
by Angus
03:49
created

Batoto::getTitleData()   C

Complexity

Conditions 8
Paths 34

Size

Total Lines 46
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 8

Importance

Changes 0
Metric Value
cc 8
eloc 29
nc 34
nop 2
dl 0
loc 46
ccs 17
cts 17
cp 1
crap 8
rs 5.5555
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
abstract class Site_Model extends CI_Model {
4
	public $site          = '';
5
	public $titleFormat   = '';
6
	public $chapterFormat = '';
7
8 121
	public function __construct() {
9 121
		parent::__construct();
10
11 121
		$this->load->database();
12 121
	}
13
14
	abstract public function getFullTitleURL(string $title_url) : string;
15
16
	abstract public function getChapterData(string $title_url, string $chapter) : array;
17
18
	//TODO: When ci-phpunit-test supports PHP Parser 3.x, add " : ?array"
19
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE);
20
21
	public function isValidTitleURL(string $title_url) : bool {
22
		$success = (bool) preg_match($this->titleFormat, $title_url);
23
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
24
		return $success;
25
	}
26
	public function isValidChapter(string $chapter) : bool {
27
		$success = (bool) preg_match($this->chapterFormat, $chapter);
28
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
29
		return $success;
30
	}
31
32 19
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
33 19
		$ch = curl_init();
34 19
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
35 19
		curl_setopt($ch, CURLOPT_ENCODING , "gzip");
36
		//curl_setopt($ch, CURLOPT_VERBOSE, 1);
37 19
		curl_setopt($ch, CURLOPT_HEADER, 1);
38
39 19
		if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
40
41 19
		if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
42 19
		if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
43
44
		//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
45 19
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36');
46
47
		//TODO: Check in a while if this being enabled still causes issues
48
		//curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
49
50 19
		curl_setopt($ch, CURLOPT_URL, $url);
51
52 19
		if($isPost) {
53
			curl_setopt($ch,CURLOPT_POST, count($postFields));
54
			curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
55
		}
56
57 19
		$response = curl_exec($ch);
58 19
		if($response === FALSE) {
59
			log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
60
			//FIXME: We don't always account for FALSE return
61
			return FALSE;
62
		}
63
64 19
		$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
65 19
		$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
66 19
		$headers     = http_parse_headers(substr($response, 0, $header_size));
67 19
		$body        = substr($response, $header_size);
68 19
		curl_close($ch);
69
70
		return [
71 19
			'headers'     => $headers,
72 19
			'status_code' => $status_code,
73 19
			'body'        => $body
74
		];
75
	}
76
77
	/**
78
	 * @param array  $content
79
	 * @param string $title_url
80
	 * @param string $node_title_string
81
	 * @param string $node_row_string
82
	 * @param string $node_latest_string
83
	 * @param string $node_chapter_string
84
	 * @param string $failure_string
85
	 *
86
	 * @return DOMElement[]|false
87
	 */
88 18
	final protected function parseTitleDataDOM(
89
		$content, string $title_url,
90
		string $node_title_string, string $node_row_string,
91
		string $node_latest_string, string $node_chapter_string,
92
		string $failure_string = "") {
93
		//list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content; //TODO: PHP 7.1
94
95 18
		if(!is_array($content)) {
96
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
97
		} else {
98 18
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
99 18
			$status_code = $content['status_code'];
100 18
			$data        = $content['body'];
101
102 18
			if(!($status_code >= 200 && $status_code < 300)) {
103 8
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
104 10
			} else if(empty($data)) {
105
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
106 10
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
107 1
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
108
			} else {
109 9
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
110
111 9
				$dom = new DOMDocument();
112 9
				libxml_use_internal_errors(TRUE);
113 9
				$dom->loadHTML($data);
114 9
				libxml_use_internal_errors(FALSE);
115
116 9
				$xpath = new DOMXPath($dom);
117 9
				$nodes_title = $xpath->query($node_title_string);
118 9
				$nodes_row   = $xpath->query($node_row_string);
119 9
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
120 9
					$firstRow      = $nodes_row->item(0);
121 9
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
122
123 9
					if($node_chapter_string !== '') {
124 8
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
125
					} else {
126 1
						$nodes_chapter = $nodes_row;
127
					}
128
129 9
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
130
						return [
131 9
							'nodes_title'   => $nodes_title->item(0),
132 9
							'nodes_latest'  => $nodes_latest->item(0),
133 9
							'nodes_chapter' => $nodes_chapter->item(0)
134
						];
135
					} else {
136
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
137
					}
138
				} else {
139
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
140
				}
141
			}
142
		}
143
144 9
		return FALSE;
145
	}
146
147 9
	public function cleanTitleDataDOM(string $data) : string {
148 9
		return $data;
149
	}
150
151
	//This has it's own function due to FoOlSlide being used a lot by fan translation sites, and the code being pretty much the same across all of them.
152 8
	final public function parseFoolSlide(string $fullURL, string $title_url) {
153 8
		$titleData = [];
154
155 8
		if($content = $this->get_content($fullURL)) {
156 8
			$content['body'] = preg_replace('/^[\S\s]*(<article[\S\s]*)<\/article>[\S\s]*$/', '$1', $content['body']);
157
158 8
			$data = $this->parseTitleDataDOM(
159
				$content,
160
				$title_url,
161 8
				"//div[@class='large comic']/h1[@class='title']",
162 8
				"(//div[@class='list']/div[@class='group']/div[@class='title' and text() = 'Chapters']/following-sibling::div[@class='element'][1] | //div[@class='list']/div[@class='element'][1] | //div[@class='list']/div[@class='group'][1]/div[@class='element'][1])[1]",
163 8
				"div[@class='meta_r']",
164 8
				"div[@class='title']/a"
165
			);
166 8
			if($data) {
167 4
				$titleData['title'] = trim($data['nodes_title']->textContent);
168
169 4
				$link                        = (string) $data['nodes_chapter']->getAttribute('href');
170 4
				$titleData['latest_chapter'] = preg_replace('/.*\/read\/.*?\/(.*?)\/$/', '$1', $link);
171
172 4
				$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) str_replace('.', '', explode(',', $data['nodes_latest']->nodeValue)[1])));
173
			}
174
		}
175
176 8
		return (!empty($titleData) ? $titleData : NULL);
177
	}
178
179
	public function doCustomFollow(string $data = "", array $extra = []) {}
180
	public function doCustomUpdate() {}
181
}
182
class Sites_Model extends CI_Model {
183
	//FIXME: Is it possible to automatically generate this in some way or another?
184
	public $MangaFox;
185
	public $MangaHere;
186
	public $Batoto;
187
	public $DynastyScans;
188
	public $MangaPanda;
189
	public $MangaStream;
190
	public $WebToons;
191
	public $KissManga;
192
	public $KireiCake;
193
	public $GameOfScanlation;
194
	public $MangaCow;
195
	public $SeaOtterScans;
196
	public $HelveticaScans;
197
	public $SenseScans;
198
	public $JaiminisBox;
199
200 121
	public function __construct() {
201 121
		parent::__construct();
202
203 121
		$this->MangaFox         = new MangaFox();
204 121
		$this->MangaHere        = new MangaHere();
205 121
		$this->Batoto           = new Batoto();
206 121
		$this->DynastyScans     = new DynastyScans();
207 121
		$this->MangaPanda       = new MangaPanda();
208 121
		$this->MangaStream      = new MangaStream();
209 121
		$this->WebToons         = new WebToons();
210 121
		$this->KissManga        = new KissManga();
211 121
		$this->KireiCake        = new KireiCake();
212 121
		$this->GameOfScanlation = new GameOfScanlation();
213 121
		$this->MangaCow         = new MangaCow();
214 121
		$this->SeaOtterScans    = new SeaOtterScans();
215 121
		$this->HelveticaScans   = new HelveticaScans();
216 121
		$this->SenseScans       = new SenseScans();
217 121
		$this->JaiminisBox      = new JaiminisBox();
218 121
	}
219
}
220
221
class MangaFox extends Site_Model {
222
	public $site          = 'MangaFox';
223
	public $titleFormat   = '/^[a-z0-9_]+$/';
224
	public $chapterFormat = '/^(?:v[0-9a-zA-Z]+\/)?c[0-9\.]+$/';
225
226 2
	public function getFullTitleURL(string $title_url) : string {
227 2
		return "http://mangafox.me/manga/{$title_url}/";
228
	}
229
230
	public function getChapterData(string $title_url, string $chapter) : array {
231
		return [
232
			'url'    => "http://mangafox.me/manga/{$title_url}/{$chapter}/",
233
			'number' => $chapter
234
		];
235
	}
236
237 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
238 2
		$titleData = [];
239
240 2
		$fullURL = $this->getFullTitleURL($title_url);
241 2
		$content = $this->get_content($fullURL);
242
243 2
		$data = $this->parseTitleDataDOM(
244
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 241 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
245
			$title_url,
246 2
			"//meta[@property='og:title']/@content",
247 2
			"//body/div[@id='page']/div[@class='left']/div[@id='chapters']/ul[1]/li[1]",
248 2
			"div/span[@class='date']",
249 2
			"div/h3/a"
250
		);
251 2
		if($data) {
252 1
			$titleData['title'] = html_entity_decode(substr($data['nodes_title']->textContent, 0, -6));
253
254 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
255 1
			$chapterURLSegments = explode('/', $link);
256 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
257 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
258
		}
259
260 2
		return (!empty($titleData) ? $titleData : NULL);
261
	}
262
}
263
264
class MangaHere extends Site_Model {
265
	public $site          = 'MangaHere';
266
	public $titleFormat   = '/^[a-z0-9_]+$/';
267
	public $chapterFormat = '/^(?:v[0-9]+\/)?c[0-9]+(?:\.[0-9]+)?$/';
268
269 2
	public function getFullTitleURL(string $title_url) : string {
270 2
		return "http://www.mangahere.co/manga/{$title_url}/";
271
	}
272
273
	public function getChapterData(string $title, string $chapter) : array {
274
		return [
275
			'url'    => "http://www.mangahere.co/manga/{$title}/{$chapter}/",
276
			'number' => $chapter
277
		];
278
	}
279
280 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
281 2
		$titleData = [];
282
283 2
		$fullURL = $this->getFullTitleURL($title_url);
284 2
		$content = $this->get_content($fullURL);
285
286 2
		$data = $this->parseTitleDataDOM(
287
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 284 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
288
			$title_url,
289 2
			"//meta[@property='og:title']/@content",
290 2
			"//body/section/article/div/div[@class='manga_detail']/div[@class='detail_list']/ul[1]/li[1]",
291 2
			"span[@class='right']",
292 2
			"span[@class='left']/a",
293 2
			"<div class=\"error_text\">Sorry, the page you have requested can’t be found."
294
		);
295 2
		if($data) {
296 1
			$titleData['title'] = $data['nodes_title']->textContent;
297
298 1
			$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
299 1
			$chapterURLSegments = explode('/', $link);
300 1
			$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
301 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
302
		}
303
304 2
		return (!empty($titleData) ? $titleData : NULL);
305
	}
306
}
307
308
class Batoto extends Site_Model {
309
	//Batoto is a bit tricky to track. Unlike MangaFox and MangaHere, it doesn't store anything in the title_url, which means we have to get the data via other methods.
310
	//One problem we have though, is the tracker must support multiple sites, so this means we need to do some weird things to track Batoto.
311
	//title_url is stored like: "ID:--:LANGUAGE"
312
	//chapter_urls are stored like "CHAPTER_ID:--:CHAPTER_NUMBER"
313
314
	public $site          = 'Batoto';
315
	public $titleFormat   = '/^[0-9]+:--:(?:English|Spanish|French|German|Portuguese|Turkish|Indonesian|Greek|Filipino|Italian|Polish|Thai|Malay|Hungarian|Romanian|Arabic|Hebrew|Russian|Vietnamese|Dutch)$/';
316
	//FIXME: We're not validating the chapter name since we don't know what all the possible valid characters can be
317
	//       Preferably we'd just use /^[0-9a-z]+:--:(v[0-9]+\/)?c[0-9]+(\.[0-9]+)?$/
318
	public $chapterFormat = '/^[0-9a-z]+:--:.+$/';
319
320 1
	public function getFullTitleURL(string $title_string) : string {
321
		//FIXME: This does not point to the language specific title page. Should ask if it is possible to set LANG as arg?
322
		//FIXME: This points to a generic URL which will redirect according to the ID. Preferably we'd try and get the exact URL from the title, but we can't pass it here.
323 1
		$title_parts = explode(':--:', $title_string);
324 1
		return "http://bato.to/comic/_/comics/-r".$title_parts[0];
325
	}
326
327 View Code Duplication
	public function getChapterData(string $title_string, string $chapter) : array {
328
		//$title_string isn't used here.
329
330
		$chapter_parts = explode(':--:', $chapter);
331
		return [
332
			'url'    => "http://bato.to/reader#" . $chapter_parts[0],
333
			'number' => $chapter_parts[1]
334
		];
335
	}
336
337 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
338 1
		$titleData = [];
339
340 1
		$title_parts = explode(':--:', $title_url);
341 1
		$fullURL     = $this->getFullTitleURL($title_url);
342 1
		$lang        = $title_parts[1]; //TODO: Validate title_lang from array?
343
344
345
		//Bato.to is annoying and locks stuff behind auth. See: https://github.com/DakuTree/manga-tracker/issues/14#issuecomment-233830855
346
		$cookies = [
347 1
			"lang_option={$lang}",
348 1
			"member_id={$this->config->item('batoto_cookie_member_id')}",
349 1
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
350
		];
351 1
		$content = $this->get_content($fullURL, implode("; ", $cookies), "", TRUE);
352
353 1
		$data = $this->parseTitleDataDOM(
354
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($full...', $cookies), '', TRUE) on line 351 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
355
			$title_url,
356 1
			"//h1[@class='ipsType_pagetitle']",
357 1
			"//table[contains(@class, 'chapters_list')]/tbody/tr[2]",
358 1
			"td[last()]",
359 1
			"td/a[contains(@href,'reader')]",
360 1
			">Register now<"
361
		);
362 1
		if($data) {
363
			$titleData['title'] = html_entity_decode(trim($data['nodes_title']->textContent));
364
365
			///^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/
366
			preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($data['nodes_chapter']->nodeValue), $text);
367
			$titleData['latest_chapter'] = substr($data['nodes_chapter']->getAttribute('href'), 22) . ':--:' . ((!empty($text['volume']) ? 'v'.$text['volume'].'/' : '') . 'c'.$text['chapter'] . (!empty($text['extra']) ? '-'.$text['extra'] : ''));
368
369
			$dateString = $data['nodes_latest']->nodeValue;
370
			if($dateString == 'An hour ago') {
371
				$dateString = '1 hour ago';
372
			}
373
			$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
374
375
			if($firstGet && $lang == 'English') {
376
				//FIXME: English is forced due for now. See #78.
377
				$this->doCustomFollow($content['body'], ['id' => $title_parts[0], 'lang' => $lang]);
378
			}
379
		}
380
381 1
		return (!empty($titleData) ? $titleData : NULL);
382
	}
383
384
	public function cleanTitleDataDOM(string $data) : string {
385
		$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
386
		$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
387
		$data = preg_replace('/<div id=\'commentsStart\' class=\'ipsBox\'>[\s\S]+$/', '</div></div><!-- end mainContent -->', $data);
388
389
		return $data;
390
	}
391
392
	//FIXME: This entire thing feels like an awful implementation....BUT IT WORKS FOR NOW.
393
	public function doCustomFollow(string $data = "", array $extra = []) {
394
		preg_match('/ipb\.vars\[\'secure_hash\'\]\s+=\s+\'(?<secure_hash>[0-9a-z]+)\';[\s\S]+ipb\.vars\[\'session_id\'\]\s+=\s+\'(?<session_id>[0-9a-z]+)\';/', $data, $text);
395
396
		$params = [
397
			's'          => $text['session_id'],
398
			'app'        => 'core',
399
			'module'     => 'ajax',
400
			'section'    => 'like',
401
			'do'         => 'save',
402
			'secure_key' => $text['secure_hash'],
403
			'f_app'      => 'ccs',
404
			'f_area'     => 'ccs_custom_database_3_records',
405
			'f_relid'    => $extra['id']
406
		];
407
		$formData = [
408
			'like_notify' => '0',
409
			'like_freq'   => 'immediate',
410
			'like_anon'   => '0'
411
		];
412
413
		$cookies = [
414
			"lang_option={$extra['lang']}",
415
			"member_id={$this->config->item('batoto_cookie_member_id')}",
416
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
417
		];
418
		$content = $this->get_content('http://bato.to/forums/index.php?'.http_build_query($params), implode("; ", $cookies), "", TRUE, TRUE, $formData);
419
420
		return is_array($content) && in_array('status_code', $content) && $content['status_code'] === 200;
421
	}
422
	public function doCustomUpdate() {
423
		$titleDataList = [];
424
425
		$cookies = [
426
			"lang_option=English", //FIXME: English is forced due for now. See #78.
427
			"member_id={$this->config->item('batoto_cookie_member_id')}",
428
			"pass_hash={$this->config->item('batoto_cookie_pass_hash')}"
429
		];
430
		$content = $this->get_content("http://bato.to/myfollows", implode("; ", $cookies), "", TRUE);
431
		if(!is_array($content)) {
432
			log_message('error', "{$this->site} /myfollows | Failed to grab URL (See above curl error)");
433
		} else {
434
			$headers     = $content['headers'];
0 ignored issues
show
Unused Code introduced by
$headers is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
435
			$status_code = $content['status_code'];
436
			$data        = $content['body'];
437
438
			if(!($status_code >= 200 && $status_code < 300)) {
439
				log_message('error', "{$this->site} /myfollows | Bad Status Code ({$status_code})");
440
			} else if(empty($data)) {
441
				log_message('error', "{$this->site} /myfollows | Data is empty? (Status code: {$status_code})");
442
			} else {
443
				$data = preg_replace('/^[\s\S]+<!-- ::: CONTENT ::: -->/', '<!-- ::: CONTENT ::: -->', $data);
444
				$data = preg_replace('/<!-- end mainContent -->[\s\S]+$/', '<!-- end mainContent -->', $data);
445
446
				$dom = new DOMDocument();
447
				libxml_use_internal_errors(TRUE);
448
				$dom->loadHTML($data);
449
				libxml_use_internal_errors(FALSE);
450
451
				$xpath      = new DOMXPath($dom);
452
				$nodes_rows = $xpath->query("//table[contains(@class, 'chapters_list')]/tbody/tr[position()>1]");
453
				if($nodes_rows->length > 0) {
454
					foreach($nodes_rows as $row) {
455
						$titleData = [];
456
457
						$nodes_title   = $xpath->query("td[2]/a[1]", $row);
458
						$nodes_chapter = $xpath->query("td[2]/a[2]", $row);
459
						$nodes_lang    = $xpath->query("td[3]/div", $row);
460
						$nodes_latest  = $xpath->query("td[5]", $row);
461
462
						if($nodes_lang->length === 1 && $nodes_lang->item(0)->getAttribute('title') == 'English') {
463
							if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
464
								$title = $nodes_title->item(0);
465
466
								preg_match('/(?<id>[0-9]+)$/', $title->getAttribute('href'), $title_url_arr);
467
								$title_url = "{$title_url_arr['id']}:--:English"; //FIXME: English is currently forced, see #78
468
469
								if(!array_key_exists($title_url, $titleDataList)) {
470
									$titleData['title'] = trim($title->textContent);
471
472
									$chapter = $nodes_chapter->item(0);
473
									preg_match('/^(?:Vol\.(?<volume>\S+) )?(?:Ch.(?<chapter>[^\s:]+)(?:\s?-\s?(?<extra>[0-9]+))?):?.*/', trim($chapter->nodeValue), $text);
474
									$titleData['latest_chapter'] = substr($chapter->getAttribute('href'), 8) . ':--:' . ((!empty($text['volume']) ? 'v' . $text['volume'] . '/' : '') . 'c' . $text['chapter'] . (!empty($text['extra']) ? '-' . $text['extra'] : ''));
475
476
									$dateString = $nodes_latest->item(0)->nodeValue;
477
									if($dateString == 'An hour ago') {
478
										$dateString = '1 hour ago';
479
									}
480
									$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
481
482
483
									$titleDataList[$title_url] = $titleData;
484
								}
485
							} else {
486
								log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
487
							}
488
						}
489
					}
490
				} else {
491
					log_message('error', '{$this->site} | Following list is empty?');
492
				}
493
			}
494
		}
495
		return $titleDataList;
496
	}
497
}
498
499
class DynastyScans extends Site_Model {
500
	//FIXME: This has some major issues. SEE: https://github.com/DakuTree/manga-tracker/issues/58
501
502
	public $site          = 'DynastyScans';
503
	public $titleFormat   = '/^[a-z0-9_]+:--:(?:0|1)$/';
504
	public $chapterFormat = '/^[0-9a-z_]+$/';
505
506 1
	public function getFullTitleURL(string $title_string) : string {
507 1
		$title_parts = explode(':--:', $title_string);
508 1
		$url_type = ($title_parts[1] == '0' ? 'series' : 'chapters');
509
510 1
		return 'http://dynasty-scans.com/'.$url_type.'/'.$title_parts[0];
511
	}
512
513
	public function getChapterData(string $title_string, string $chapter) : array {
514
		$title_parts = explode(':--:', $title_string);
515
		/* Known chapter url formats (# is numbers):
516
		       chapters_#A_#B - Ch#A-#B
517
		       ch_#A          - Ch#A
518
		       ch_#A_#B       - Ch#A.#B
519
		       <NOTHING>      - Oneshot (This is passed as "oneshot")
520
		*/
521
522
		$chapterData = [
523
			'url'    => 'http://dynasty-scans.com/chapters/' . $title_parts[0].'_'.$chapter,
524
			'number' => ''
525
		];
526
527
		if($chapter == 'oneshot') {
528
			$chapterData['number'] = 'oneshot';
529
		} else {
530
			$chapter = preg_replace("/^([a-zA-Z]+)/", '$1_', $chapter);
531
			$chapterSegments = explode('_', $chapter);
532
			switch($chapterSegments[0]) {
533
				case 'ch':
534
					$chapterData['number'] = 'c'.$chapterSegments[1].(isset($chapterSegments[2]) && !empty($chapterSegments[2]) ? '.'.$chapterSegments[2] : '');
535
					break;
536
537
				case 'chapters':
538
					//This is barely ever used, but I have seen it.
539
					$chapterData['number'] = 'c'.$chapterSegments[1].'-'.$chapterSegments[2];
540
					break;
541
542
				default:
543
					//TODO: FALLBACK, ALERT ADMIN?
544
					$chapterData['number'] = $chapter;
545
					break;
546
			}
547
		}
548
		return $chapterData;
549
	}
550
551 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
552 1
		$titleData = [];
553
554 1
		$fullURL = $this->getFullTitleURL($title_url);
555 1
		$content = $this->get_content($fullURL);
556
557 1
		$title_parts = explode(':--:', $title_url);
558 1
		switch($title_parts[1]) {
559 1
			case '0':
560
				//Normal series.
561 1
				$data = $this->parseTitleDataDOM(
562
					$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 555 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
563
					$title_url,
564 1
					"//h2[@class='tag-title']/b[1]",
565 1
					"(//dl[@class='chapter-list']/dd[a[contains(@href,'/chapters/')]])[last()]",
566 1
					"small",
567 1
					"a[@class='name']"
568
				);
569 1
				if($data) {
570 1
					$titleData['title'] = $data['nodes_title']->textContent;
571
					//In cases where the series is a doujin, try and prepend the copyright.
572 1
					preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $content['body'], $matchesD);
573 1
					if(!empty($matchedD) && substr($matchesD[1], 0, -7) !== 'Original') {
0 ignored issues
show
Bug introduced by
The variable $matchedD seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
574
						$titleData['title'] = substr($matchesD[1], 0, -7).' - '.$titleData['title'];
575
					}
576
577 1
					$chapterURLSegments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
578 1
					if (strpos($chapterURLSegments[2], $title_parts[0]) !== false) {
579 1
						$titleData['latest_chapter'] = substr($chapterURLSegments[2], strlen($title_parts[0]) + 1);
580
					} else {
581
						$titleData['latest_chapter'] = $chapterURLSegments[2];
582
					}
583
584 1
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime(str_replace("'", '', substr((string) $data['nodes_latest']->textContent, 9))));
585
				}
586 1
				break;
587
588
			case '1':
589
				//Oneshot.
590
				$data = $content['body'];
591
592
				preg_match('/<b>.*<\/b>/', $data, $matchesT);
593
				preg_match('/\/doujins\/[^"]+">(.+)?(?=<\/a>)<\/a>/', $data, $matchesD);
594
				$titleData['title'] = (!empty($matchesD) ? ($matchesD[1] !== 'Original' ? $matchesD[1].' - ' : '') : '') . substr($matchesT[0], 3, -4);
595
596
				$titleData['latest_chapter'] = 'oneshot'; //This will never change
597
598
				preg_match('/<i class="icon-calendar"><\/i> (.*)<\/span>/', $data, $matches);
599
				$titleData['last_updated']   = date("Y-m-d H:i:s", strtotime($matches[1]));
600
601
				//Oneshots are special, and really shouldn't need to be re-tracked
602
				$titleData['status'] = '2';
603
				break;
604
605
			default:
606
				//something went wrong
607
				break;
608
		}
609 1
		return (!empty($titleData) ? $titleData : NULL);
610
	}
611
}
612
613
class MangaPanda extends Site_Model {
614
	public $site          = 'MangaPanda';
615
	//NOTE: MangaPanda has manga pages under the root URL, so we need to filter out pages we know that aren't manga.
616
	public $titleFormat   = '/^(?!(?:latest|search|popular|random|alphabetical|privacy)$)([a-z0-9-]+)$/';
617
	public $chapterFormat = '/^[0-9]+$/';
618
619 2
	public function getFullTitleURL(string $title_url) : string {
620 2
		return "http://www.mangapanda.com/{$title_url}";
621
	}
622
623
	public function getChapterData(string $title_url, string $chapter) : array {
624
		return [
625
			'url'    => "http://www.mangapanda.com/{$title_url}/{$chapter}/",
626
			'number' => 'c'.$chapter
627
		];
628
	}
629
630 2 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
631 2
		$titleData = [];
632
633 2
		$fullURL = $this->getFullTitleURL($title_url);
634 2
		$content = $this->get_content($fullURL);
635
636 2
		$data = $this->parseTitleDataDOM(
637
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 634 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
638
			$title_url,
639 2
			"//h2[@class='aname']",
640 2
			"(//table[@id='listing']/tr)[last()]",
641 2
			"td[2]",
642 2
			"td[1]/a"
643
		);
644 2
		if($data) {
645 1
			$titleData['title'] = $data['nodes_title']->textContent;
646
647 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
648
649 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
650
		}
651
652 2
		return (!empty($titleData) ? $titleData : NULL);
653
	}
654
}
655
656
class MangaStream extends Site_Model {
657
	public $site          = 'MangaStream';
658
	public $titleFormat   = '/^[a-z0-9_]+$/';
659
	public $chapterFormat = '/^(.*?)\/[0-9]+$/';
660
661
	public function getFullTitleURL(string $title_url) : string {
662
		return "https://mangastream.com/manga/{$title_url}/";
663
	}
664
665
	public function getChapterData(string $title_url, string $chapter) : array {
666
		return [
667
			'url'    => "https://mangastream.com/r/{$title_url}/{$chapter}",
668
			'number' => 'c'.explode('/', $chapter)[0]
669
		];
670
	}
671
672 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
673
		$titleData = [];
674
675
		$fullURL = $this->getFullTitleURL($title_url);
676
		$content = $this->get_content($fullURL);
677
678
		$data = $this->parseTitleDataDOM(
679
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 676 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
680
			$title_url,
681
			"//div[contains(@class, 'content')]/div[1]/h1",
682
			"//div[contains(@class, 'content')]/div[1]/table/tr[2]",
683
			"td[2]",
684
			"td[1]/a",
685
			"<h1>Page Not Found</h1>"
686
		);
687
		if($data) {
688
			$titleData['title'] = $data['nodes_title']->textContent;
689
690
			$titleData['latest_chapter'] = preg_replace('/^.*\/(.*?\/[0-9]+)\/[0-9]+$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
691
692
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->nodeValue));
693
		}
694
695
		return (!empty($titleData) ? $titleData : NULL);
696
	}
697
}
698
699
class WebToons extends Site_Model {
700
	/* Webtoons.com has a very weird and pointless URL format.
701
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
702
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
703
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
704
705
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
706
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
707
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
708
709
	   FORMATS:
710
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
711
	   CHAPTER:   ID:--:CHAPTER_N
712
	*/
713
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
714
715
	public $site          = 'WebToons';
716
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi)$/';
717
	public $chapterFormat = '/^[0-9]+:--:.*$/';
718
719
	public function getFullTitleURL(string $title_url) : string {
720
		$title_parts = explode(':--:', $title_url);
721
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}/";
722
	}
723
724
	public function getChapterData(string $title_url, string $chapter) : array {
725
		$title_parts   = explode(':--:', $title_url);
726
		$chapter_parts = explode(':--:', $chapter);
727
728
		return [
729
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
730
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
731
		];
732
	}
733
734 1
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
735 1
		$titleData = [];
736
737
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
738
739
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
740 1
		$title_parts = explode(':--:', $title_url);
741 1
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
742
743 1
		$content = $this->get_content($fullURL);
744 1
		$data = $content['body'];
745 1
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
746 1
			$xml = simplexml_load_string($data) or die("Error: Cannot create object");
747 1
			if(isset($xml->{'channel'}->item[0])) {
748 1
				$titleData['title'] = trim((string) $xml->{'channel'}->title);
749
750 1
				$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
751 1
				$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
752 1
				$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
753
			}
754
		} else {
755
			log_message('error', "Series missing? (WebToons): {$title_url}");
756
			return NULL;
757
		}
758
759 1
		return (!empty($titleData) ? $titleData : NULL);
760
	}
761
}
762
763
class KissManga extends Site_Model {
764
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
765
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
766
767
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
768
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
769
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
770
771
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
772
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
773
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
774
	*/
775
776
	public $site          = 'KissManga';
777
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
778
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
779
780
	public function getFullTitleURL(string $title_url) : string {
781
		return "http://kissmanga.com/Manga/{$title_url}";
782
	}
783
784 View Code Duplication
	public function getChapterData(string $title_url, string $chapter) : array {
785
		$chapter_parts = explode(':--:', $chapter);
786
787
		return [
788
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
789
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
790
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
791
		];
792
	}
793
794
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
795
		$titleData = [];
796
797
		//Check if cookiejar is a day old (so we can know if something went wrong)
798
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
799
		$cookie_last_updated = filemtime($cookiejar_path);
800
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
801
802
			$fullURL = $this->getFullTitleURL($title_url);
803
804
			$content = $this->get_content($fullURL, '', $cookiejar_path);
805
			$data = $content['body'];
806
			if(strpos($data, 'containerRoot') !== FALSE) {
807
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
808
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
809
810
				$dom = new DOMDocument();
811
				libxml_use_internal_errors(true);
812
				$dom->loadHTML($data);
813
				libxml_use_internal_errors(false);
814
815
				$xpath = new DOMXPath($dom);
816
817
				$nodes_title = $xpath->query("//a[@class='bigChar']");
818
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
819
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
820
					$titleData['title'] = $nodes_title->item(0)->textContent;
821
822
					$firstRow      = $nodes_row->item(0);
823
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
824
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
825
826
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
827
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
828
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
829
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
830
				}
831
			} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
832
				//TODO: Throw ERRORS;
833
			}
834
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
835
			//Do nothing, wait until next update.
836
			//TODO: NAG ADMIN??
837
		}
838
839
		return (!empty($titleData) ? $titleData : NULL);
840
	}
841
}
842
843
class GameOfScanlation extends Site_Model {
844
	public $site          = 'GameOfScanlation';
845
	public $titleFormat   = '/^[a-z0-9\.-]+$/';
846
	public $chapterFormat = '/^[a-z0-9\.-]+$/';
847
848
	public function getFullTitleURL(string $title_url) : string {
849
		/* NOTE: GoS is a bit weird in that it has two separate title URL formats. One uses /projects/ and the other uses /fourms/.
850
		         The bad thing is these are interchangeable, despite them showing the exact same listing page.
851
		         Thankfully the title_url of manga which use /forums/ seem to be appended with ".%ID%" which means we can easily check them. */
852
853
		if (strpos($title_url, '.') !== FALSE) {
854
			$format = "https://gameofscanlation.moe/forums/{$title_url}/";
855
		} else {
856
			$format = "https://gameofscanlation.moe/projects/{$title_url}/";
857
		}
858
		return $format;
859
	}
860
861
	public function getChapterData(string $title_url, string $chapter) : array {
862
		return [
863
			'url'    => "https://gameofscanlation.moe/projects/".preg_replace("/\\.[0-9]+$/", "", $title_url).'/'.$chapter.'/',
864
			'number' => preg_replace("/chapter-/", "c", preg_replace("/\\.[0-9]+$/", "", $chapter))
865
		];
866
	}
867
868 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
869
		$titleData = [];
870
871
		$fullURL = $this->getFullTitleURL($title_url);
872
873
		$content = $this->get_content($fullURL);
874
875
		$data = $this->parseTitleDataDOM(
876
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 873 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
877
			$title_url,
878
			"//meta[@property='og:title']",
879
			"//ol[@class='discussionListItems']/li[1]/div[@class='home_list']/ul/li/div[@class='list_press_text']",
880
			"p[@class='author']/span|p[@class='author']/abbr",
881
			"p[@class='text_work']/a"
882
		);
883
		if($data) {
884
			$titleData['title'] = trim(html_entity_decode($data['nodes_title']->getAttribute('content')));
885
886
			$titleData['latest_chapter'] = preg_replace('/^projects\/.*?\/(.*?)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
887
888
			$titleData['last_updated'] =  date("Y-m-d H:i:s",(int) $data['nodes_latest']->getAttribute('title'));
889
		}
890
891
		return (!empty($titleData) ? $titleData : NULL);
892
	}
893
}
894
895
class MangaCow extends Site_Model {
896
	public $site          = 'MangaCow';
897
	public $titleFormat   = '/^[a-zA-Z0-9_]+$/';
898
	public $chapterFormat = '/^[0-9]+$/';
899
900 2
	public function getFullTitleURL(string $title_url) : string {
901 2
		return "http://mngcow.co/{$title_url}/";
902
	}
903
904
	public function getChapterData(string $title_url, string $chapter) : array {
905
		return [
906
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
907
			'number' => "c{$chapter}"
908
		];
909
	}
910
911 2 View Code Duplication
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
912 2
		$titleData = [];
913
914 2
		$fullURL = $this->getFullTitleURL($title_url);
915
916 2
		$content = $this->get_content($fullURL);
917
918 2
		$data = $this->parseTitleDataDOM(
919
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 916 can also be of type false; however, Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
920
			$title_url,
921 2
			"//h4",
922 2
			"//ul[contains(@class, 'mng_chp')]/li[1]/a[1]",
923 2
			"b[@class='dte']",
924 2
			"",
925 2
			"404 Page Not Found"
926
		);
927 2
		if($data) {
928 1
			$titleData['title'] = trim($data['nodes_title']->textContent);
929
930 1
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9]+)\/$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
931
932 1
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) substr($data['nodes_latest']->getAttribute('title'), 13)));
933
		}
934
935 2
		return (!empty($titleData) ? $titleData : NULL);
936
	}
937
}
938
939
/*** FoolSlide sites ***/
940
941 View Code Duplication
class KireiCake extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
942
	public $site          = 'KireiCake';
943
	public $titleFormat   = '/^[a-z0-9_-]+$/';
944
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
945
946 2
	public function getFullTitleURL(string $title_url) : string {
947 2
		return "https://reader.kireicake.com/series/{$title_url}";
948
	}
949
950
	public function getChapterData(string $title_url, string $chapter) : array {
951
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
952
		$chapter_parts = explode('/', $chapter);
953
		return [
954
			'url'    => "https://reader.kireicake.com/read/{$title_url}/{$chapter}/",
955
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
956
		];
957
	}
958
959 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
960 2
		$fullURL = $this->getFullTitleURL($title_url);
961 2
		return $this->parseFoolSlide($fullURL, $title_url);
962
	}
963
}
964
965 View Code Duplication
class SeaOtterScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
966
	public $site          = 'SeaOtterScans';
967
	public $titleFormat   = '/^[a-z0-9_-]+$/';
968
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
969
970 2
	public function getFullTitleURL(string $title_url) : string {
971 2
		return "https://reader.seaotterscans.com/series/{$title_url}";
972
	}
973
974
	public function getChapterData(string $title_url, string $chapter) : array {
975
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
976
		$chapter_parts = explode('/', $chapter);
977
		return [
978
			'url'    => "https://reader.seaotterscans.com/read/{$title_url}/{$chapter}/",
979
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
980
		];
981
	}
982
983 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
984 2
		$fullURL = $this->getFullTitleURL($title_url);
985 2
		return $this->parseFoolSlide($fullURL, $title_url);
986
	}
987
}
988
989 View Code Duplication
class HelveticaScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
990
	public $site          = 'HelveticaScans';
991
	public $titleFormat   = '/^[a-z0-9_-]+$/';
992
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
993
994 2
	public function getFullTitleURL(string $title_url) : string {
995 2
		return "http://helveticascans.com/reader/series/{$title_url}";
996
	}
997
998
	public function getChapterData(string $title_url, string $chapter) : array {
999
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1000
		$chapter_parts = explode('/', $chapter);
1001
		return [
1002
			'url'    => "http://helveticascans.com/reader/read/{$title_url}/{$chapter}/",
1003
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1004
		];
1005
	}
1006
1007 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1008 2
		$fullURL = $this->getFullTitleURL($title_url);
1009 2
		return $this->parseFoolSlide($fullURL, $title_url);
1010
	}
1011
}
1012
1013 View Code Duplication
class SenseScans extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1014
	public $site          = 'SenseScans';
1015
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1016
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1017
1018 2
	public function getFullTitleURL(string $title_url) : string {
1019 2
		return "http://reader.sensescans.com/series/{$title_url}";
1020
	}
1021
1022
	public function getChapterData(string $title_url, string $chapter) : array {
1023
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1024
		$chapter_parts = explode('/', $chapter);
1025
		return [
1026
			'url'    => "http://reader.sensescans.com/read/{$title_url}/{$chapter}/",
1027
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1028
		];
1029
	}
1030
1031 2
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1032 2
		$fullURL = $this->getFullTitleURL($title_url);
1033 2
		return $this->parseFoolSlide($fullURL, $title_url);
1034
	}
1035
}
1036
1037 View Code Duplication
class JaiminisBox extends Site_Model {
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1038
	public $site          = 'JaiminisBox';
1039
	public $titleFormat   = '/^[a-z0-9_-]+$/';
1040
	public $chapterFormat = '/^en\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
1041
1042
	public function getFullTitleURL(string $title_url) : string {
1043
		return "https://jaiminisbox.com/reader/series/{$title_url}";
1044
	}
1045
1046
	public function getChapterData(string $title_url, string $chapter) : array {
1047
		//LANG/VOLUME/CHAPTER/CHAPTER_EXTRA(/page/)
1048
		$chapter_parts = explode('/', $chapter);
1049
		return [
1050
			'url'    => "https://jaiminisbox.com/reader/read/{$title_url}/{$chapter}/",
1051
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
1052
		];
1053
	}
1054
1055
	public function getTitleData(string $title_url, bool $firstGet = FALSE) {
1056
		$fullURL = $this->getFullTitleURL($title_url);
1057
		return $this->parseFoolSlide($fullURL, $title_url);
1058
	}
1059
}
1060