Completed
Push — master ( 222f63...75778e )
by Angus
02:21
created

KissManga   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 78
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 1

Test Coverage

Coverage 0%

Importance

Changes 0
Metric Value
dl 0
loc 78
ccs 0
cts 34
cp 0
rs 10
c 0
b 0
f 0
wmc 9
lcom 0
cbo 1
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
class KissManga extends Base_Site_Model {
4
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
5
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
6
7
	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
8
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
9
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
10
11
	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
12
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
13
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
14
	*/
15
16
	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
17
	public $chapterFormat = '/^.*?:--:[0-9]+$/';
18
19
	public function getFullTitleURL(string $title_url) : string {
20
		return "http://kissmanga.com/Manga/{$title_url}";
21
	}
22
23
	public function getChapterData(string $title_url, string $chapter) : array {
24
		$chapter_parts = explode(':--:', $chapter);
25
26
		return [
27
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
28
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
29
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
30
		];
31
	}
32
33
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
34
		$titleData = [];
35
36
		//Check if cookiejar is a day old (so we can know if something went wrong)
37
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
38
		$cookie_last_updated = filemtime($cookiejar_path);
39
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
40
41
			$fullURL = $this->getFullTitleURL($title_url);
42
43
			$content = $this->get_content($fullURL, '', $cookiejar_path);
44
			$data = $content['body'];
45
			if(strpos($data, 'containerRoot') !== FALSE) {
46
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
47
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);
48
49
				$dom = new DOMDocument();
50
				libxml_use_internal_errors(true);
51
				$dom->loadHTML($data);
52
				libxml_use_internal_errors(false);
53
54
				$xpath = new DOMXPath($dom);
55
56
				$nodes_title = $xpath->query("//a[@class='bigChar']");
57
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
58
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
59
					$titleData['title'] = $nodes_title->item(0)->textContent;
60
61
					$firstRow      = $nodes_row->item(0);
62
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
63
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
64
65
					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
66
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
67
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
68
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
69
				}
70
			} else {
71
				//TODO: Throw ERRORS;
72
			}
73
		} else {
74
			//Do nothing, wait until next update.
75
			//TODO: NAG ADMIN??
76
		}
77
78
		return (!empty($titleData) ? $titleData : NULL);
79
	}
80
}
81