Completed
Push — master ( 222f63...75778e )
by Angus
02:21
created

WebToons::getTitleData()   B

Complexity

Conditions 6
Paths 9

Size

Total Lines 35
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 42

Importance

Changes 0
Metric Value
cc 6
eloc 22
nc 9
nop 2
dl 0
loc 35
ccs 0
cts 20
cp 0
crap 42
rs 8.439
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
class WebToons extends Base_Site_Model {
4
	/* Webtoons.com has a very weird and pointless URL format.
5
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
6
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
7
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
8
9
	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
10
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
11
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
12
13
	   FORMATS:
14
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
15
	   CHAPTER:   ID:--:CHAPTER_N
16
	*/
17
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
18
19
	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi|sf)$/';
20
	public $chapterFormat = '/^[0-9]+:--:.*$/';
21
22
	public function getFullTitleURL(string $title_url) : string {
23
		$title_parts = explode(':--:', $title_url);
24
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}";
25
	}
26
27
	public function getChapterData(string $title_url, string $chapter) : array {
28
		$title_parts   = explode(':--:', $title_url);
29
		$chapter_parts = explode(':--:', $chapter);
30
31
		return [
32
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
33
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
34
		];
35
	}
36
37
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
38
		$titleData = [];
39
40
		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
41
42
		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
43
		$title_parts = explode(':--:', $title_url);
44
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
45
46
		$content = $this->get_content($fullURL);
47
		$data = $content['body'];
48
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
49
			$xml = simplexml_load_string($data);
50
			if($xml) {
51
				if(isset($xml->{'channel'}->item[0])) {
52
					$titleData['title'] = trim((string) $xml->{'channel'}->title);
53
54
					$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
55
					$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
56
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
57
58
					if($firstGet) {
59
						$titleData = array_merge($titleData, $this->doCustomFollow($content['body'], ['id' => $title_parts[0]]));
60
					}
61
				}
62
			} else {
63
				log_message('error', "URL isn't valid XML/RSS? (WebToons): {$title_url}");
64
			}
65
		} else {
66
			log_message('error', "Series missing? (WebToons): {$title_url}");
67
			return NULL;
68
		}
69
70
		return (!empty($titleData) ? $titleData : NULL);
71
	}
72
73
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
74
		$formData = [
75
			'titleNo'       => $extra['id'],
76
			'currentStatus' => 'false',
77
			'promotionName' => ''
78
		];
79
80
		$cookies = [
81
			"NEO_SES={$this->config->item('webtoons_cookie')}"
82
		];
83
		$content = $this->get_content('http://www.webtoons.com/setFavorite?'.http_build_query($formData), implode("; ", $cookies), "", TRUE);
84
85
		$callback($content, $extra['id'], function($body) {
86
			return strpos($body, '"favorite":true') !== FALSE;
87
		});
88
	}
89
	public function doCustomUpdate() {
90
		/*$titleDataList = [];
91
92
		$cookies = [
93
			"NEO_SES={$this->config->item('webtoons_cookie')}"
94
		];
95
		$content = $this->get_content('http://www.webtoons.com/favorite', implode("; ", $cookies), "", TRUE);
96
97
		if(!is_array($content)) {
98
			log_message('error', "{$this->site} /favorite | Failed to grab URL (See above curl error)");
99
		} else {
100
			$headers     = $content['headers'];
101
			$status_code = $content['status_code'];
102
			$data        = $content['body'];
103
104
			if(!($status_code >= 200 && $status_code < 300)) {
105
				log_message('error', "{$this->site} /favorite | Bad Status Code ({$status_code})");
106
			} else if(empty($data)) {
107
				log_message('error', "{$this->site} /favorite | Data is empty? (Status code: {$status_code})");
108
			} else {
109
				$data = preg_replace('/^[\s\S]+<\!-- container -->/', '<!-- container -->', $data);
110
				$data = preg_replace('/<\!-- \/\/container -->[\s\S]+$/', '<!-- //container -->', $data);
111
112
				$dom = new DOMDocument();
113
				libxml_use_internal_errors(TRUE);
114
				$dom->loadHTML($data);
115
				libxml_use_internal_errors(FALSE);
116
117
				$xpath      = new DOMXPath($dom);
118
				$nodes_rows = $xpath->query("//ul[@id='_webtoonList']/li/a");
119
				if($nodes_rows->length > 0) {
120
					foreach($nodes_rows as $row) {
121
						$titleData = [];
122
123
						$nodes_title   = $xpath->query("span[@class='update']", $row);
124
						$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
125
						$nodes_latest  = $xpath->query("span[@class='update']", $row);
126
127
						print $nodes_latest->length;
128
						if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
129
					//		$title = $nodes_title->item(0);
130
					//
131
					//		$titleData['title'] = trim($title->textContent);
132
					//
133
					//
134
					//		$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
135
					//		$chapterURLSegments = explode('/', $link);
136
					//		$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
137
					//
138
					//		$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
139
					//
140
					//		$title_url = explode('/', $title->getAttribute('href'))[4];
141
					//		$titleDataList[$title_url] = $titleData;
142
						} else {
143
					//		log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
144
						}
145
					}
146
				} else {
147
					log_message('error', "{$this->site} | Following list is empty?");
148
				}
149
			}
150
		}
151
		return $titleDataList;*/
152
	}
153
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {}
154
}
155