WebToons::getTitleData() - Code Metrics - Inspection of "exclude site models from scrutinizer" - DakuTree/manga-tracker - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 222f63...75778e )

by Angus

created 2017-12-20 22:03 UTC

WebToons::getTitleData() B

↳ Parent: Project

Complexity

Conditions	6
Paths	9

Size

Total Lines	35
Code Lines	22

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	0
CRAP Score	42

Importance

Changes

Metric	Value
cc	6
eloc	22
nc	9
nop	2
dl	0
loc	35
ccs	0
cts	20
cp	0
crap	42
rs	8.439
c	0
b	0
f	0

<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');

class WebToons extends Base_Site_Model {
	/* Webtoons.com has a very weird and pointless URL format.
	   TITLE URL:   /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
	   RSS URL:     /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
	   CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#

	   For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
	   The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
	   We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.

	   FORMATS:
	   TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
	   CHAPTER:   ID:--:CHAPTER_N
	*/
	//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];

	public $titleFormat   = '/^[0-9]+:--:(?:en|zh-hant|zh-hans|th|id):--:[a-z0-9-]+:--:(?:drama|fantasy|comedy|action|slice-of-life|romance|superhero|thriller|sports|sci-fi|sf)$/';
	public $chapterFormat = '/^[0-9]+:--:.*$/';

	public function getFullTitleURL(string $title_url) : string {
		$title_parts = explode(':--:', $title_url);
		return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}";
	}

	public function getChapterData(string $title_url, string $chapter) : array {
		$title_parts   = explode(':--:', $title_url);
		$chapter_parts = explode(':--:', $chapter);

		return [
			'url'    => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
		];
	}

	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
		$titleData = [];

		//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.

		//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
		$title_parts = explode(':--:', $title_url);
		$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";

		$content = $this->get_content($fullURL);
		$data = $content['body'];
		if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
			$xml = simplexml_load_string($data);
			if($xml) {
				if(isset($xml->{'channel'}->item[0])) {
					$titleData['title'] = trim((string) $xml->{'channel'}->title);

					$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
					$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));

					if($firstGet) {
						$titleData = array_merge($titleData, $this->doCustomFollow($content['body'], ['id' => $title_parts[0]]));
					}
				}
			} else {
				log_message('error', "URL isn't valid XML/RSS? (WebToons): {$title_url}");
			}
		} else {
			log_message('error', "Series missing? (WebToons): {$title_url}");
			return NULL;
		}

		return (!empty($titleData) ? $titleData : NULL);
	}

	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
		$formData = [
			'titleNo'       => $extra['id'],
			'currentStatus' => 'false',
			'promotionName' => ''
		];

		$cookies = [
			"NEO_SES={$this->config->item('webtoons_cookie')}"
		];
		$content = $this->get_content('http://www.webtoons.com/setFavorite?'.http_build_query($formData), implode("; ", $cookies), "", TRUE);

		$callback($content, $extra['id'], function($body) {
			return strpos($body, '"favorite":true') !== FALSE;
		});
	}
	public function doCustomUpdate() {
		/*$titleDataList = [];

		$cookies = [
			"NEO_SES={$this->config->item('webtoons_cookie')}"
		];
		$content = $this->get_content('http://www.webtoons.com/favorite', implode("; ", $cookies), "", TRUE);

		if(!is_array($content)) {
			log_message('error', "{$this->site} /favorite | Failed to grab URL (See above curl error)");
		} else {
			$headers     = $content['headers'];
			$status_code = $content['status_code'];
			$data        = $content['body'];

			if(!($status_code >= 200 && $status_code < 300)) {
				log_message('error', "{$this->site} /favorite | Bad Status Code ({$status_code})");
			} else if(empty($data)) {
				log_message('error', "{$this->site} /favorite | Data is empty? (Status code: {$status_code})");
			} else {
				$data = preg_replace('/^[\s\S]+<\!-- container -->/', '<!-- container -->', $data);
				$data = preg_replace('/<\!-- \/\/container -->[\s\S]+$/', '<!-- //container -->', $data);

				$dom = new DOMDocument();
				libxml_use_internal_errors(TRUE);
				$dom->loadHTML($data);
				libxml_use_internal_errors(FALSE);

				$xpath      = new DOMXPath($dom);
				$nodes_rows = $xpath->query("//ul[@id='_webtoonList']/li/a");
				if($nodes_rows->length > 0) {
					foreach($nodes_rows as $row) {
						$titleData = [];

						$nodes_title   = $xpath->query("span[@class='update']", $row);
						$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
						$nodes_latest  = $xpath->query("span[@class='update']", $row);

						print $nodes_latest->length;
						if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
					//		$title = $nodes_title->item(0);
					//
					//		$titleData['title'] = trim($title->textContent);
					//
					//
					//		$link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
					//		$chapterURLSegments = explode('/', $link);
					//		$titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
					//
					//		$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
					//
					//		$title_url = explode('/', $title->getAttribute('href'))[4];
					//		$titleDataList[$title_url] = $titleData;
						} else {
					//		log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
						}
					}
				} else {
					log_message('error', "{$this->site} | Following list is empty?");
				}
			}
		}
		return $titleDataList;*/
	}
	public function doCustomCheck(string $oldChapterString, string $newChapterString) {}
}


1			<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3			class WebToons extends Base_Site_Model {
4			/* Webtoons.com has a very weird and pointless URL format.
5			TITLE URL: /#LANG#/#GENRE#/#TITLE#/list?title_no=#TITLEID#
6			RSS URL: /#LANG#/#GENRE#/#TITLE#/rss?title_no=#TITLEID#
7			CHAPTER URL: /#LANG#/#GENRE#/#TITLE#/#CHAPTER#/viewer?title_no=#TITLEID#&episode_no=#CHAPTERID#
8
9			For both the title and chapter URLs, only the TITLEID and CHAPTERID are needed. Everything else can be anything at all (Well, alphanumeric at least).
10			The RSS URL however, requires everything to be exactly correct. I have no idea why this is, but it does mean we need to store all that info too.
11			We <could> not use the RSS url, and just parse via the title url, but rss is much better in the long run as it shouldn't change much.
12
13			FORMATS:
14			TITLE_URL: ID:--:LANG:--:TITLE:--:GENRE
15			CHAPTER: ID:--:CHAPTER_N
16			*/
17			//private $validLang = ['en', 'zh-hant', 'zh-hans', 'th', 'id'];
18
19			public $titleFormat = '/^[0-9]+:--:(?:en\|zh-hant\|zh-hans\|th\|id):--:[a-z0-9-]+:--:(?:drama\|fantasy\|comedy\|action\|slice-of-life\|romance\|superhero\|thriller\|sports\|sci-fi\|sf)$/';
20			public $chapterFormat = '/^[0-9]+:--:.*$/';
21
22			public function getFullTitleURL(string $title_url) : string {
23			$title_parts = explode(':--:', $title_url);
24			return "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/list?title_no={$title_parts[0]}";
25			}
26
27			public function getChapterData(string $title_url, string $chapter) : array {
28			$title_parts = explode(':--:', $title_url);
29			$chapter_parts = explode(':--:', $chapter);
30
31			return [
32			'url' => "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/{$chapter_parts[1]}/viewer?title_no={$title_parts[0]}&episode_no={$chapter_parts[0]}",
33			'number' => $chapter_parts[1] //TODO: Possibly replace certain formats in here? Since webtoons doesn't have a standard chapter format
34			];
35			}
36
37			public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
38			$titleData = [];
39
40			//FIXME: We don't use parseTitleDOM here due to using rss. Should probably have an alternate method for XML parsing.
41
42			//NOTE: getTitleData uses a different FullTitleURL due to it grabbing the rss ver. instead.
43			$title_parts = explode(':--:', $title_url);
44			$fullURL = "http://www.webtoons.com/{$title_parts[1]}/{$title_parts[3]}/{$title_parts[2]}/rss?title_no={$title_parts[0]}";
45
46			$content = $this->get_content($fullURL);
47			$data = $content['body'];
48			if($data !== 'Can\'t find the manga series.') { //FIXME: We should check for he proper error here.
49			$xml = simplexml_load_string($data);
50			if($xml) {
51			if(isset($xml->{'channel'}->item[0])) {
52			$titleData['title'] = trim((string) $xml->{'channel'}->title);
53
54			$chapterURLSegments = explode('/', ((string) $xml->{'channel'}->item[0]->link));
55			$titleData['latest_chapter'] = preg_replace('/^.*?([0-9]+)$/', '$1', $chapterURLSegments[7]) . ':--:' . $chapterURLSegments[6];
56			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) $xml->{'channel'}->item[0]->pubDate));
57
58			if($firstGet) {
59			$titleData = array_merge($titleData, $this->doCustomFollow($content['body'], ['id' => $title_parts[0]]));
60			}
61			}
62			} else {
63			log_message('error', "URL isn't valid XML/RSS? (WebToons): {$title_url}");
64			}
65			} else {
66			log_message('error', "Series missing? (WebToons): {$title_url}");
67			return NULL;
68			}
69
70			return (!empty($titleData) ? $titleData : NULL);
71			}
72
73			public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
74			$formData = [
75			'titleNo' => $extra['id'],
76			'currentStatus' => 'false',
77			'promotionName' => ''
78			];
79
80			$cookies = [
81			"NEO_SES={$this->config->item('webtoons_cookie')}"
82			];
83			$content = $this->get_content('http://www.webtoons.com/setFavorite?'.http_build_query($formData), implode("; ", $cookies), "", TRUE);
84
85			$callback($content, $extra['id'], function($body) {
86			return strpos($body, '"favorite":true') !== FALSE;
87			});
88			}
89			public function doCustomUpdate() {
90			/*$titleDataList = [];
91
92			$cookies = [
93			"NEO_SES={$this->config->item('webtoons_cookie')}"
94			];
95			$content = $this->get_content('http://www.webtoons.com/favorite', implode("; ", $cookies), "", TRUE);
96
97			if(!is_array($content)) {
98			log_message('error', "{$this->site} /favorite \| Failed to grab URL (See above curl error)");
99			} else {
100			$headers = $content['headers'];
101			$status_code = $content['status_code'];
102			$data = $content['body'];
103
104			if(!($status_code >= 200 && $status_code < 300)) {
105			log_message('error', "{$this->site} /favorite \| Bad Status Code ({$status_code})");
106			} else if(empty($data)) {
107			log_message('error', "{$this->site} /favorite \| Data is empty? (Status code: {$status_code})");
108			} else {
109			$data = preg_replace('/^[\s\S]+<\!-- container -->/', '<!-- container -->', $data);
110			$data = preg_replace('/<\!-- \/\/container -->[\s\S]+$/', '<!-- //container -->', $data);
111
112			$dom = new DOMDocument();
113			libxml_use_internal_errors(TRUE);
114			$dom->loadHTML($data);
115			libxml_use_internal_errors(FALSE);
116
117			$xpath = new DOMXPath($dom);
118			$nodes_rows = $xpath->query("//ul[@id='_webtoonList']/li/a");
119			if($nodes_rows->length > 0) {
120			foreach($nodes_rows as $row) {
121			$titleData = [];
122
123			$nodes_title = $xpath->query("span[@class='update']", $row);
124			$nodes_chapter = $xpath->query("dl/dt[1]/a[@class='chapter']", $row);
125			$nodes_latest = $xpath->query("span[@class='update']", $row);
126
127			print $nodes_latest->length;
128			if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
129			// $title = $nodes_title->item(0);
130			//
131			// $titleData['title'] = trim($title->textContent);
132			//
133			//
134			// $link = preg_replace('/^(.*\/)(?:[0-9]+\.html)?$/', '$1', (string) $nodes_chapter->item(0)->getAttribute('href'));
135			// $chapterURLSegments = explode('/', $link);
136			// $titleData['latest_chapter'] = $chapterURLSegments[5] . (isset($chapterURLSegments[6]) && !empty($chapterURLSegments[6]) ? "/{$chapterURLSegments[6]}" : "");
137			//
138			// $titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->nodeValue));
139			//
140			// $title_url = explode('/', $title->getAttribute('href'))[4];
141			// $titleDataList[$title_url] = $titleData;
142			} else {
143			// log_message('error', "{$this->site}/Custom \| Invalid amount of nodes (TITLE: {$nodes_title->length} \| CHAPTER: {$nodes_chapter->length}) \| LATEST: {$nodes_latest->length})");
144			}
145			}
146			} else {
147			log_message('error', "{$this->site} \| Following list is empty?");
148			}
149			}
150			}
151			return $titleDataList;*/
152			}
153			public function doCustomCheck(string $oldChapterString, string $newChapterString) {}
154			}
155

DakuTree / manga-tracker

Push — master ( 222f63...75778e )

WebToons::getTitleData() B

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like