KissManga - Code Metrics - Inspection of "exclude site models from scrutinizer" - DakuTree/manga-tracker - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 222f63...75778e )

by Angus

created 2017-12-20 22:03 UTC

KissManga A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	78
Duplicated Lines	0 %

Coupling/Cohesion

Components	0
Dependencies	1

Test Coverage

Coverage

Importance

Changes

Metric	Value
dl	0
loc	78
ccs	0
cts	34
cp	0
rs	10
c	0
b	0
f	0
wmc	9
lcom	0
cbo	1

<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');

class KissManga extends Base_Site_Model {
	/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
	   The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.

	   To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
	   The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
	   The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.

	   To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
	   The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
	   I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
	*/

	public $titleFormat   = '/^[A-Za-z0-9-]+$/';
	public $chapterFormat = '/^.*?:--:[0-9]+$/';

	public function getFullTitleURL(string $title_url) : string {
		return "http://kissmanga.com/Manga/{$title_url}";
	}

	public function getChapterData(string $title_url, string $chapter) : array {
		$chapter_parts = explode(':--:', $chapter);

		return [
			'url'    => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
			'number' => /*preg_replace('/--.*?$/', '', */$chapter_parts[0]/*)*/
		];
	}

	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
		$titleData = [];

		//Check if cookiejar is a day old (so we can know if something went wrong)
		$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
		$cookie_last_updated = filemtime($cookiejar_path);
		if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {

			$fullURL = $this->getFullTitleURL($title_url);

			$content = $this->get_content($fullURL, '', $cookiejar_path);
			$data = $content['body'];
			if(strpos($data, 'containerRoot') !== FALSE) {
				//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
				$data = preg_replace('/^[\S\s]*(<div id="leftside">[\S\s]*)<div id="rightside">[\S\s]*$/', '$1', $data);

				$dom = new DOMDocument();
				libxml_use_internal_errors(true);
				$dom->loadHTML($data);
				libxml_use_internal_errors(false);

				$xpath = new DOMXPath($dom);

				$nodes_title = $xpath->query("//a[@class='bigChar']");
				$nodes_row   = $xpath->query("//table[@class='listing']/tr[3]");
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
					$titleData['title'] = $nodes_title->item(0)->textContent;

					$firstRow      = $nodes_row->item(0);
					$nodes_latest  = $xpath->query("td[2]",   $firstRow);
					$nodes_chapter = $xpath->query("td[1]/a", $firstRow);

					$link = (string) $nodes_chapter->item(0)->getAttribute('href');
					$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
					$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
					$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
				}
			} else {
				//TODO: Throw ERRORS;
			}
		} else {
			//Do nothing, wait until next update.
			//TODO: NAG ADMIN??
		}

		return (!empty($titleData) ? $titleData : NULL);
	}
}


1			<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3			class KissManga extends Base_Site_Model {
4			/* This site is a massive pain in the ass. The only reason I'm supporting it is it's one of the few aggregator sites which actually support more risqué manga.
5			The main problem with this site is it has some form of bot protection. To view any part of the site normally, you need a cookie set by the bot protection.
6
7			To generate this cookie, we need three variables. Two are static, but the other is generated by randomly generated JS on the page.
8			The randomly generated JS is the troublesome part. We can't easily parse this with PHP. Both V8JS & SpiderMonkey refuse to build properly for me, so that rules that out.
9			The other option is using regex, but that is a rabbit hole I don't want to touch with a ten-foot pole.
10
11			To make the entire site work, I've built a python script to handle grabbing this cookie. This is grabbed & updated at the same time the manga are updated. The script saves the cookiejar which the PHP later reads.
12			The cookie has a length of 1 year, but I don't think it actually lasts that long, so we update every 6hours instead.
13			I should probably also mention that the cookie generated also uses your user-agent, so if it changes the cookie will break.
14			*/
15
16			public $titleFormat = '/^[A-Za-z0-9-]+$/';
17			public $chapterFormat = '/^.*?:--:[0-9]+$/';
18
19			public function getFullTitleURL(string $title_url) : string {
20			return "http://kissmanga.com/Manga/{$title_url}";
21			}
22
23			public function getChapterData(string $title_url, string $chapter) : array {
24			$chapter_parts = explode(':--:', $chapter);
25
26			return [
27			'url' => "http://kissmanga.com/Manga/{$title_url}/{$chapter_parts[0]}?id={$chapter_parts[1]}",
28			//FIXME: KM has an extremely inconsistant chapter format which makes it difficult to parse.
29			'number' => /preg_replace('/--.?$/', '', /$chapter_parts[0]/)*/
30			];
31			}
32
33			public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
34			$titleData = [];
35
36			//Check if cookiejar is a day old (so we can know if something went wrong)
37			$cookiejar_path = str_replace("public/", "_scripts/cookiejar", FCPATH);
38			$cookie_last_updated = filemtime($cookiejar_path);
39			if($cookie_last_updated && ((time() - 86400) < $cookie_last_updated)) {
40
41			$fullURL = $this->getFullTitleURL($title_url);
42
43			$content = $this->get_content($fullURL, '', $cookiejar_path);
44			$data = $content['body'];
45			if(strpos($data, 'containerRoot') !== FALSE) {
46			//FIXME: For whatever reason, we can't grab the entire div without simplexml shouting at us
47			$data = preg_replace('/^[\S\s](<div id="leftside">[\S\s])<div id="rightside">[\S\s]*$/', '$1', $data);
48
49			$dom = new DOMDocument();
50			libxml_use_internal_errors(true);
51			$dom->loadHTML($data);
52			libxml_use_internal_errors(false);
53
54			$xpath = new DOMXPath($dom);
55
56			$nodes_title = $xpath->query("//a[@class='bigChar']");
57			$nodes_row = $xpath->query("//table[@class='listing']/tr[3]");
58			if($nodes_title->length === 1 && $nodes_row->length === 1) {
59			$titleData['title'] = $nodes_title->item(0)->textContent;
60
61			$firstRow = $nodes_row->item(0);
62			$nodes_latest = $xpath->query("td[2]", $firstRow);
63			$nodes_chapter = $xpath->query("td[1]/a", $firstRow);
64
65			$link = (string) $nodes_chapter->item(0)->getAttribute('href');
66			$chapterURLSegments = explode('/', preg_replace('/\?.*$/', '', $link));
67			$titleData['latest_chapter'] = $chapterURLSegments[3] . ':--:' . preg_replace('/.*?([0-9]+)$/', '$1', $link);
68			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime((string) $nodes_latest->item(0)->textContent));
69			}
70			} else {
71			//TODO: Throw ERRORS;
72			}
73			} else {
74			//Do nothing, wait until next update.
75			//TODO: NAG ADMIN??
76			}
77
78			return (!empty($titleData) ? $titleData : NULL);
79			}
80			}
81

DakuTree / manga-tracker

Push — master ( 222f63...75778e )

KissManga A

Complexity

Size/Duplication

Coupling/Cohesion

Test Coverage

Importance

Duplication Side-by-Side

Filter issues like