|
1
|
|
|
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed'); |
|
2
|
|
|
|
|
3
|
|
|
class KissManga extends Base_Site_Model { |
|
4
|
|
|
/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga. |
|
5
|
|
|
The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection. |
|
6
|
|
|
|
|
7
|
|
|
To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page. |
|
8
|
|
|
The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out. |
|
9
|
|
|
The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole. |
|
10
|
|
|
|
|
11
|
|
|
To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads. |
|
12
|
|
|
The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead. |
|
13
|
|
|
I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break. |
|
14
|
|
|
*/ |
|
15
|
|
|
|
|
16
|
|
|
public $titleFormat = '/^[A-Za-z0-9-]+$/'; |
|
17
|
|
|
public $chapterFormat = '/^.*?:--:[0-9]+$/'; |
|
18
|
|
|
|
|
19
|
|
|
public function getFullTitleURL(string $title_url) : string { |
|
20
|
|
|
return "http://kissmanga.com/Manga/{$title_url}"; |
|
21
|
|
|
} |
|
22
|
|
|
|
|
23
|
|
|
public function getChapterData(string $title_url, string $chapter) : array { |
|
24
|
|
|
$chapter_parts = explode(':--:', $chapter); |
|
25
|
|
|
|
|
26
|
|
|
return [ |
|
27
|
|
|
'url' => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}", |
|
28
|
|
|
//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse. |
|
29
|
|
|
'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/ |
|
30
|
|
|
]; |
|
31
|
|
|
} |
|
32
|
|
|
|
|
33
|
|
|
public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array { |
|
34
|
|
|
$titleData = []; |
|
35
|
|
|
|
|
36
|
|
|
//Check if cookiejar is a day old (so we can know if something went wrong) |
|
37
|
|
|
$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH); |
|
38
|
|
|
$cookie_last_updated = filemtime($cookiejar_path); |
|
39
|
|
|
if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) { |
|
40
|
|
|
|
|
41
|
|
|
$fullURL = $this->getFullTitleURL($title_url); |
|
42
|
|
|
|
|
43
|
|
|
$content = $this->get_content($fullURL, '', $cookiejar_path); |
|
44
|
|
|
$data = $content['body']; |
|
45
|
|
|
if(strpos($data, 'containerRoot') !== FALSE) { |
|
46
|
|
|
//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us |
|
47
|
|
|
$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data); |
|
48
|
|
|
|
|
49
|
|
|
$dom = new DOMDocument(); |
|
50
|
|
|
libxml_use_internal_errors(true); |
|
51
|
|
|
$dom->loadHTML($data); |
|
52
|
|
|
libxml_use_internal_errors(false); |
|
53
|
|
|
|
|
54
|
|
|
$xpath = new DOMXPath($dom); |
|
55
|
|
|
|
|
56
|
|
|
$nodes_title = $xpath->query("//a[@class='bigChar']"); |
|
57
|
|
|
$nodes_row = $xpath->query("//table[@class='listing']/tr[3]"); |
|
58
|
|
|
if($nodes_title->length === 1 && $nodes_row->length === 1) { |
|
59
|
|
|
$titleData['title'] = $nodes_title->item(0)->textContent; |
|
60
|
|
|
|
|
61
|
|
|
$firstRow = $nodes_row->item(0); |
|
62
|
|
|
$nodes_latest = $xpath->query("td[2]", $firstRow); |
|
63
|
|
|
$nodes_chapter = $xpath->query("td[1]/a", $firstRow); |
|
64
|
|
|
|
|
65
|
|
|
$link = (string) $nodes_chapter->item(0)->getAttribute('href'); |
|
66
|
|
|
$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link)); |
|
67
|
|
|
$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link); |
|
68
|
|
|
$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent)); |
|
69
|
|
|
} |
|
70
|
|
|
} else { |
|
71
|
|
|
//TODO: Throw ERRORS; |
|
72
|
|
|
} |
|
73
|
|
|
} else { |
|
74
|
|
|
//Do nothing, wait until next update. |
|
75
|
|
|
//TODO: NAG ADMIN?? |
|
76
|
|
|
} |
|
77
|
|
|
|
|
78
|
|
|
return (!empty($titleData) ? $titleData : NULL); |
|
79
|
|
|
} |
|
80
|
|
|
} |
|
81
|
|
|
|