|
1
|
|
|
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed'); |
|
2
|
|
|
|
|
3
|
|
|
class WebToons extends Base_Site_Model { |
|
4
|
|
|
/* Webtoons.com has a very weird and pointless URL format. |
|
5
|
|
|
TITLE URL: /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID# |
|
6
|
|
|
RSS URL: /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID# |
|
7
|
|
|
CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID# |
|
8
|
|
|
|
|
9
|
|
|
For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least). |
|
10
|
|
|
The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too. |
|
11
|
|
|
We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much. |
|
12
|
|
|
|
|
13
|
|
|
FORMATS: |
|
14
|
|
|
TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE |
|
15
|
|
|
CHAPTER: ID:--:CHAPTER_N |
|
16
|
|
|
*/ |
|
17
|
|
|
//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id']; |
|
18
|
|
|
|
|
19
|
|
|
public $titleFormat = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi|sf)$/'; |
|
20
|
|
|
public $chapterFormat = '/^[0-9]+:--:.*$/'; |
|
21
|
|
|
|
|
22
|
|
|
public function getFullTitleURL(string $title_url) : string { |
|
23
|
|
|
$title_parts = explode(':--:', $title_url); |
|
24
|
|
|
return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}"; |
|
25
|
|
|
} |
|
26
|
|
|
|
|
27
|
|
|
public function getChapterData(string $title_url, string $chapter) : array { |
|
28
|
|
|
$title_parts = explode(':--:', $title_url); |
|
29
|
|
|
$chapter_parts = explode(':--:', $chapter); |
|
30
|
|
|
|
|
31
|
|
|
return [ |
|
32
|
|
|
'url' => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}", |
|
33
|
|
|
'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format |
|
34
|
|
|
]; |
|
35
|
|
|
} |
|
36
|
|
|
|
|
37
|
|
|
public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array { |
|
38
|
|
|
$titleData = []; |
|
39
|
|
|
|
|
40
|
|
|
//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing. |
|
41
|
|
|
|
|
42
|
|
|
//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead. |
|
43
|
|
|
$title_parts = explode(':--:', $title_url); |
|
44
|
|
|
$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}"; |
|
45
|
|
|
|
|
46
|
|
|
$content = $this->get_content($fullURL); |
|
47
|
|
|
$data = $content['body']; |
|
48
|
|
|
if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here. |
|
49
|
|
|
$xml = simplexml_load_string($data); |
|
50
|
|
|
if($xml) { |
|
51
|
|
|
if(isset($xml->{'channel'}->item[0])) { |
|
52
|
|
|
$titleData['title'] = trim((string) $xml->{'channel'}->title); |
|
53
|
|
|
|
|
54
|
|
|
$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link)); |
|
55
|
|
|
$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6]; |
|
56
|
|
|
$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate)); |
|
57
|
|
|
|
|
58
|
|
|
if($firstGet) { |
|
59
|
|
|
$titleData = array_merge($titleData, $this->doCustomFollow($content['body'], ['id' => $title_parts[0]])); |
|
60
|
|
|
} |
|
61
|
|
|
} |
|
62
|
|
|
} else { |
|
63
|
|
|
log_message('error', "URL isn't valid XML/RSS? (WebToons): {$title_url}"); |
|
64
|
|
|
} |
|
65
|
|
|
} else { |
|
66
|
|
|
log_message('error', "Series missing? (WebToons): {$title_url}"); |
|
67
|
|
|
return NULL; |
|
68
|
|
|
} |
|
69
|
|
|
|
|
70
|
|
|
return (!empty($titleData) ? $titleData : NULL); |
|
71
|
|
|
} |
|
72
|
|
|
|
|
73
|
|
|
public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) { |
|
74
|
|
|
$formData = [ |
|
75
|
|
|
'titleNo' => $extra['id'], |
|
76
|
|
|
'currentStatus' => 'false', |
|
77
|
|
|
'promotionName' => '' |
|
78
|
|
|
]; |
|
79
|
|
|
|
|
80
|
|
|
$cookies = [ |
|
81
|
|
|
"NEO_SES={$this->config->item('webtoons_cookie')}" |
|
82
|
|
|
]; |
|
83
|
|
|
$content = $this->get_content('http://www.webtoons.com/setFavorite?'.http_build_query($formData), implode("; ", $cookies), "", TRUE); |
|
84
|
|
|
|
|
85
|
|
|
$callback($content, $extra['id'], function($body) { |
|
86
|
|
|
return strpos($body, '"favorite":true') !== FALSE; |
|
87
|
|
|
}); |
|
88
|
|
|
} |
|
89
|
|
|
public function doCustomUpdate() { |
|
90
|
|
|
/*$titleDataList = []; |
|
91
|
|
|
|
|
92
|
|
|
$cookies = [ |
|
93
|
|
|
"NEO_SES={$this->config->item('webtoons_cookie')}" |
|
94
|
|
|
]; |
|
95
|
|
|
$content = $this->get_content('http://www.webtoons.com/favorite', implode("; ", $cookies), "", TRUE); |
|
96
|
|
|
|
|
97
|
|
|
if(!is_array($content)) { |
|
98
|
|
|
log_message('error', "{$this->site} /favorite | Failed to grab URL (See above curl error)"); |
|
99
|
|
|
} else { |
|
100
|
|
|
$headers = $content['headers']; |
|
101
|
|
|
$status_code = $content['status_code']; |
|
102
|
|
|
$data = $content['body']; |
|
103
|
|
|
|
|
104
|
|
|
if(!($status_code >= 200 && $status_code < 300)) { |
|
105
|
|
|
log_message('error', "{$this->site} /favorite | Bad Status Code ({$status_code})"); |
|
106
|
|
|
} else if(empty($data)) { |
|
107
|
|
|
log_message('error', "{$this->site} /favorite | Data is empty? (Status code: {$status_code})"); |
|
108
|
|
|
} else { |
|
109
|
|
|
$data = preg_replace('/^[\s\S]+<\!-- container -->/', '<!-- container -->', $data); |
|
110
|
|
|
$data = preg_replace('/<\!-- \/\/container -->[\s\S]+$/', '<!-- //container -->', $data); |
|
111
|
|
|
|
|
112
|
|
|
$dom = new DOMDocument(); |
|
113
|
|
|
libxml_use_internal_errors(TRUE); |
|
114
|
|
|
$dom->loadHTML($data); |
|
115
|
|
|
libxml_use_internal_errors(FALSE); |
|
116
|
|
|
|
|
117
|
|
|
$xpath = new DOMXPath($dom); |
|
118
|
|
|
$nodes_rows = $xpath->query("//ul[@id='_webtoonList']/li/a"); |
|
119
|
|
|
if($nodes_rows->length > 0) { |
|
120
|
|
|
foreach($nodes_rows as $row) { |
|
121
|
|
|
$titleData = []; |
|
122
|
|
|
|
|
123
|
|
|
$nodes_title = $xpath->query("span[@class='update']", $row); |
|
124
|
|
|
$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row); |
|
125
|
|
|
$nodes_latest = $xpath->query("span[@class='update']", $row); |
|
126
|
|
|
|
|
127
|
|
|
print $nodes_latest->length; |
|
128
|
|
|
if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) { |
|
129
|
|
|
// $title = $nodes_title->item(0); |
|
130
|
|
|
// |
|
131
|
|
|
// $titleData['title'] = trim($title->textContent); |
|
132
|
|
|
// |
|
133
|
|
|
// |
|
134
|
|
|
// $link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href')); |
|
135
|
|
|
// $chapterURLSegments = explode('/', $link); |
|
136
|
|
|
// $titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : ""); |
|
137
|
|
|
// |
|
138
|
|
|
// $titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue)); |
|
139
|
|
|
// |
|
140
|
|
|
// $title_url = explode('/', $title->getAttribute('href'))[4]; |
|
141
|
|
|
// $titleDataList[$title_url] = $titleData; |
|
142
|
|
|
} else { |
|
143
|
|
|
// log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})"); |
|
144
|
|
|
} |
|
145
|
|
|
} |
|
146
|
|
|
} else { |
|
147
|
|
|
log_message('error', "{$this->site} | Following list is empty?"); |
|
148
|
|
|
} |
|
149
|
|
|
} |
|
150
|
|
|
} |
|
151
|
|
|
return $titleDataList;*/ |
|
152
|
|
|
} |
|
153
|
|
|
public function doCustomCheck(string $oldChapterString, string $newChapterString) {} |
|
154
|
|
|
} |
|
155
|
|
|
|