Completed
Push — master ( 10a673...d34665 )
by Angus
03:21
created

Base_GlossyBright_Site_Model::getChapterData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 4
nc 1
nop 2
dl 0
loc 6
ccs 0
cts 3
cp 0
crap 2
rs 9.4285
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
/**
4
 * Class Tracker_Sites_Model
5
 */
6
class Tracker_Sites_Model extends CI_Model {
7 127
	public function __construct() {
8 127
		parent::__construct();
9 127
	}
10
11
	public function __get($name) {
12
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
13
		//      It's probably a minor speed reduction, but that isn't much of an issue.
14
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
15
		if(!class_exists($name) || !(in_array(get_parent_class($name), ['Base_Site_Model', 'Base_FoolSlide_Site_Model', 'Base_myMangaReaderCMS_Site_Model', 'Base_GlossyBright_Site_Model']))) {
16
			return get_instance()->{$name};
17
		} else {
18
			$this->loadSite($name);
19
			return $this->{$name};
20
		}
21
	}
22
23
	private function loadSite(string $siteName) : void {
24
		$this->{$siteName} = new $siteName();
25
	}
26
}
27
28
abstract class Base_Site_Model extends CI_Model {
29
	public $site          = '';
30
	public $titleFormat   = '//';
31
	public $chapterFormat = '//';
32
	public $hasCloudFlare = FALSE;
33
	public $userAgent     = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2824.0 Safari/537.36';
34
35
	/**
36
	 * 0: No custom updater.
37
	 * 1: Uses following page.
38
	 * 2: Uses latest releases page.
39
	 */
40
	public $customType = 0;
41
42 16
	public function __construct() {
43 16
		parent::__construct();
44
45 16
		$this->load->database();
46
47 16
		$this->site = get_class($this);
48 16
	}
49
50
	/**
51
	 * Generates URL to the title page of the requested series.
52
	 *
53
	 * NOTE: In some cases, we are required to store more data in the title_string than is needed to generate the URL. (Namely as the title_string is our unique identifier for that series)
54
	 *       When storing additional data, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
55
	 *
56
	 * Example:
57
	 *    return "http://mangafox.me/manga/{$title_url}/";
58
	 *
59
	 * Example (with extra data):
60
	 *    $title_parts = explode(':--:', title_url);
61
	 *    return "https://bato.to/comic/_/comics/-r".$title_parts[0];
62
	 *
63
	 * @param string $title_url
64
	 * @return string
65
	 */
66
	abstract public function getFullTitleURL(string $title_url) : string;
67
68
	/**
69
	 * Generates chapter data from given $title_url and $chapter.
70
	 *
71
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
72
	 *
73
	 * NOTE: In some cases, we are required to store the chapter number, and the segment required to generate the chapter URL separately.
74
	 *       Much like when generating the title URL, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
75
	 *
76
	 * Example:
77
	 *     return [
78
	 *        'url'    => $this->getFullTitleURL($title_url).'/'.$chapter,
79
	 *        'number' => "c{$chapter}"
80
	 *    ];
81
	 *
82
	 * @param string $title_url
83
	 * @param string $chapter
84
	 * @return array [url, number]
85
	 */
86
	abstract public function getChapterData(string $title_url, string $chapter) : array;
87
88
	/**
89
	 * Used to get the latest chapter of given $title_url.
90
	 *
91
	 * This <should> utilize both get_content and parseTitleDataDOM functions when possible, as these can both reduce a lot of the code required to set this up.
92
	 *
93
	 * $titleData params must be set accordingly:
94
	 * * `title` should always be used with html_entity_decode.
95
	 * * `latest_chapter` must match $this->chapterFormat.
96
	 * * `last_updated` should always be in date("Y-m-d H:i:s") format.
97
	 * * `followed` should never be set within via getTitleData, with the exception of via a array_merge with doCustomFollow.
98
	 *
99
	 * $firstGet is set to true when the series is first added to the DB, and is used to follow the series on given site (if possible).
100
	 *
101
	 * @param string $title_url
102
	 * @param bool   $firstGet
103
	 * @return array|null [title,latest_chapter,last_updated,followed?]
104
	 */
105
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
106
107
	/**
108
	 * Validates given $title_url against titleFormat.
109
	 *
110
	 * Failure to match against titleFormat will stop the series from being added to the DB.
111
	 *
112
	 * @param string $title_url
113
	 * @return bool
114
	 */
115 2
	final public function isValidTitleURL(string $title_url) : bool {
116 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
117 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
118 2
		return $success;
119
	}
120
121
	/**
122
	 * Validates given $chapter against chapterFormat.
123
	 *
124
	 * Failure to match against chapterFormat will stop the chapter being updated.
125
	 *
126
	 * @param string $chapter
127
	 * @return bool
128
	 */
129 2
	final public function isValidChapter(string $chapter) : bool {
130 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
131 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
132 2
		return $success;
133
	}
134
135
	/**
136
	 * Used by getTitleData (& similar functions) to get the requested page data.
137
	 *
138
	 * @param string $url
139
	 * @param string $cookie_string
140
	 * @param string $cookiejar_path
141
	 * @param bool   $follow_redirect
142
	 * @param bool   $isPost
143
	 * @param array  $postFields
144
	 *
145
	 * @return array|bool
146
	 */
147
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
148
		$refresh = TRUE; //For sites that have CloudFlare, we want to loop get_content again.
149
		$loops   = 0;
150
		while($refresh && $loops < 2) {
151
			$refresh = FALSE;
152
			$loops++;
153
154
			$ch = curl_init();
155
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
156
			curl_setopt($ch, CURLOPT_ENCODING , "gzip");
157
			//curl_setopt($ch, CURLOPT_VERBOSE, 1);
158
			curl_setopt($ch, CURLOPT_HEADER, 1);
159
160
			if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
161
162
			if($cookies = $this->cache->get("cloudflare_{$this->site}")) {
163
				$cookie_string .= "; {$cookies}";
164
			}
165
166
			if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
167
			if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
168
169
			//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
170
			curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
171
172
			//NOTE: This is required for SSL URLs for now. Without it we tend to get error code 60.
173
			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //FIXME: This isn't safe, but it allows us to grab SSL URLs
174
175
			curl_setopt($ch, CURLOPT_URL, $url);
176
177
			if($isPost) {
178
				curl_setopt($ch,CURLOPT_POST, count($postFields));
179
				curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
180
			}
181
182
			$response = curl_exec($ch);
183
184
			$this->Tracker->admin->incrementRequests();
185
186
			if($response === FALSE) {
187
				log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
188
				//FIXME: We don't always account for FALSE return
189
				return FALSE;
190
			}
191
192
			$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
193
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
194
			$headers     = http_parse_headers(substr($response, 0, $header_size));
195
			$body        = substr($response, $header_size);
196
			curl_close($ch);
197
198
			if($status_code === 503) $refresh = $this->handleCloudFlare($url, $body);
199
		}
200
201
		return [
202
			'headers'     => $headers,
0 ignored issues
show
Bug introduced by
The variable $headers does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
203
			'status_code' => $status_code,
0 ignored issues
show
Bug introduced by
The variable $status_code does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
204
			'body'        => $body
0 ignored issues
show
Bug introduced by
The variable $body does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
205
		];
206
	}
207
208
	final private function handleCloudFlare(string $url, string $body) : bool {
209
		$refresh = FALSE;
210
211
		if((strpos($body, 'DDoS protection by Cloudflare') !== FALSE) || (strpos($body, '<input type="hidden" id="jschl-answer" name="jschl_answer"/>') !== FALSE)) {
212
			//print "Cloudflare detected? Grabbing Cookies.\n";
213
			if(!$this->hasCloudFlare) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
214
				//TODO: Site appears to have enabled CloudFlare, disable it and contact admin.
215
				//      We'll continue to bypass CloudFlare as this may occur in a loop.
216
			}
217
218
			$urlData = [
219
				'url'        => $url,
220
				'user_agent' => $this->userAgent
221
			];
222
			//TODO: shell_exec seems bad since the URLs "could" be user inputted? Better way of doing this?
223
			$result = shell_exec('python '.APPPATH.'../_scripts/get_cloudflare_cookie.py '.escapeshellarg(json_encode($urlData)));
224
			$cookieData = json_decode($result, TRUE);
225
226
			$this->cache->save("cloudflare_{$this->site}", $cookieData['cookies'],  31536000 /* 1 year, or until we renew it */);
227
			log_message('debug', "Saving CloudFlare Cookies for {$this->site}");
228
229
			$refresh = TRUE;
230
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
231
			//Either site doesn't have CloudFlare or we have bypassed it. Either is good!
232
		}
233
		return $refresh;
234
	}
235
236
	/**
237
	 * Used by getTitleData to get the title, latest_chapter & last_updated data from the data returned by get_content.
238
	 *
239
	 * parseTitleDataDOM checks if the data returned by get_content is valid via a few simple checks.
240
	 * * If the request was actually successful, had a valid status code & data wasn't empty. We also do an additional check on an optional $failure_string param, which will throw a failure if it's matched.
241
	 *
242
	 * Data is cleaned by cleanTitleDataDOM prior to being passed to DOMDocument.
243
	 *
244
	 * All $node_* params must be XPath to the requested node, and must only return 1 result. Anything else will throw a failure.
245
	 *
246
	 * @param array  $content
247
	 * @param string $title_url
248
	 * @param string $node_title_string
249
	 * @param string $node_row_string
250
	 * @param string $node_latest_string
251
	 * @param string $node_chapter_string
252
	 * @param string $failure_string
253
	 * @return DOMElement[]|false [nodes_title,nodes_chapter,nodes_latest]
254
	 */
255
	final protected function parseTitleDataDOM(
256
		$content, string $title_url,
257
		string $node_title_string, string $node_row_string,
258
		string $node_latest_string, string $node_chapter_string,
259
		string $failure_string = "") {
260
261
		if(!is_array($content)) {
262
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
263
		} else {
264
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
265
266
			if(!($status_code >= 200 && $status_code < 300)) {
267
				log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
268
			} else if(empty($data)) {
269
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
270
			} else if($failure_string !== "" && strpos($data, $failure_string) !== FALSE) {
271
				log_message('error', "{$this->site} : {$title_url} | Failure string matched");
272
			} else {
273
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
274
275
				$dom = new DOMDocument();
276
				libxml_use_internal_errors(TRUE);
277
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
278
				libxml_use_internal_errors(FALSE);
279
280
				$xpath = new DOMXPath($dom);
281
				$nodes_title = $xpath->query($node_title_string);
282
				$nodes_row   = $xpath->query($node_row_string);
283
				if($nodes_title->length === 1 && $nodes_row->length === 1) {
284
					$firstRow      = $nodes_row->item(0);
285
					$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
286
287
					if($node_chapter_string !== '') {
288
						$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
289
					} else {
290
						$nodes_chapter = $nodes_row;
291
					}
292
293
					if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
294
						return [
295
							'nodes_title'   => $nodes_title->item(0),
296
							'nodes_latest'  => $nodes_latest->item(0),
297
							'nodes_chapter' => $nodes_chapter->item(0)
298
						];
299
					} else {
300
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
301
					}
302
				} else {
303
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length} | ROW: {$nodes_row->length})");
304
				}
305
			}
306
		}
307
308
		return FALSE;
309
	}
310
311
	/**
312
	 * Used by parseTitleDataDOM to clean the data prior to passing it to DOMDocument & DOMXPath.
313
	 * This is mostly done as an (assumed) speed improvement due to the reduced amount of DOM to parse, or simply just making it easier to parse with XPath.
314
	 *
315
	 * @param string $data
316
	 * @return string
317
	 */
318
	public function cleanTitleDataDOM(string $data) : string {
319
		return $data;
320
	}
321
322
	/**
323
	 * Used to follow a series on given site if supported.
324
	 *
325
	 * This is called by getTitleData if $firstGet is true (which occurs when the series is first being added to the DB).
326
	 *
327
	 * Most of the actual following is done by handleCustomFollow.
328
	 *
329
	 * @param string $data
330
	 * @param array  $extra
331
	 * @return array
332
	 */
333
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
334
		$titleData = [];
335
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
336
			if(is_array($content)) {
337
				if(array_key_exists('status_code', $content)) {
338
					$statusCode = $content['status_code'];
339
					if($statusCode === 200) {
340
						$isCallable = is_callable($successCallback);
341
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
342
							$titleData['followed'] = 'Y';
343
344
							log_message('info', "doCustomFollow succeeded for {$id}");
345
						} else {
346
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
347
						}
348
					} else {
349
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
350
					}
351
				} else {
352
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
353
				}
354
			} else {
355
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
356
			}
357
		}, $data, $extra);
358
		return $titleData;
359
	}
360
361
	/**
362
	 * Used by doCustomFollow to handle following series on sites.
363
	 *
364
	 * Uses get_content to get data.
365
	 *
366
	 * $callback must return ($content, $id, closure $successCallback = NULL).
367
	 * * $content is simply just the get_content data.
368
	 * * $id is the dbID. This should be passed by the $extra arr.
369
	 * * $successCallback is an optional success check to make sure the series was properly followed.
370
	 *
371
	 * @param callable $callback
372
	 * @param string   $data
373
	 * @param array    $extra
374
	 */
375
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {}
0 ignored issues
show
Unused Code introduced by
The parameter $data is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
376
377
	/**
378
	 * Used to check the sites following page for new updates (if supported).
379
	 * This should work much like getTitleData, but instead checks the following page.
380
	 *
381
	 * This must return an array containing arrays of each of the chapters data.
382
	 */
383
	public function doCustomUpdate() {}
384
385
	/**
386
	 * Used by the custom updater to check if a chapter looks newer than the current one.
387
	 *
388
	 * This calls doCustomCheckCompare which handles the majority of the checking.
389
	 * NOTE: Depending on the site, you may need to call getChapterData to get the chapter number to be used with this.
390
	 *
391
	 * @param string $oldChapterString
392
	 * @param string $newChapterString
393
	 * @return bool
394
	 */
395
	public function doCustomCheck(string $oldChapterString, string $newChapterString) : bool {
396
		$oldChapterSegments = explode('/', $this->getChapterData('', $oldChapterString)['number']);
397
		$newChapterSegments = explode('/', $this->getChapterData('', $newChapterString)['number']);
398
399
		$status = $this->doCustomCheckCompare($oldChapterSegments, $newChapterSegments);
400
401
		return $status;
402
	}
403
404
	/**
405
	 * Used by doCustomCheck to check if a chapter looks newer than the current one.
406
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
407
	 *
408
	 * To avoid issues with the occasional off case, this will only ever return true if we are 100% sure that the new chapter is newer than the old one.
409
	 *
410
	 * @param array $oldChapterSegments
411
	 * @param array $newChapterSegments
412
	 * @return bool
413
	 */
414 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
415
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
416 12
		$status = FALSE;
417
418
		//Make sure we have a volume element
419 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
420 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
421
422 12
		$oldCount = count($oldChapterSegments);
423 12
		$newCount = count($newChapterSegments);
424 12
		if($newCount === $oldCount) {
425
			//Make sure chapter format looks correct.
426
			//NOTE: We only need to check newCount as we know oldCount is the same count.
427 12
			if($newCount === 2) {
428
				//FIXME: Can we loop this?
429 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
430 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
431
432
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
433 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
434 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
435
436 12
				$oldVolume = floatval($oldVolume);
437 12
				$newVolume = floatval($newVolume);
438
			} else {
439
				$oldVolume = 0;
440
				$newVolume = 0;
441
			}
442 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
443 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
444
445 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
446
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
447
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
448
449 4
				$status = TRUE;
450 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
451
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
452 1
				$status = TRUE;
453 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
454
				//$newVolume is higher, and chapter is higher so no need to check chapter.
455 2
				$status = TRUE;
456 5
			} elseif($newChapter > $oldChapter) {
457
				//$newVolume isn't higher, but chapter is.
458
				$status = TRUE;
459
			}
460
		}
461
462 12
		return $status;
463
	}
464
}
465
466
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
467
	public $titleFormat   = '/^[a-z0-9_-]+$/';
468
	public $chapterFormat = '/^(?:en(?:-us)?|pt|es)\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
469
	public $customType    = 2;
470
471
	public $baseURL = '';
472
473
	public function getFullTitleURL(string $title_url) : string {
474
		return "{$this->baseURL}/series/{$title_url}";
475
	}
476
477
	public function getChapterData(string $title_url, string $chapter) : array {
478
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
479
		return [
480
			'url'    => $this->getChapterURL($title_url, $chapter),
481
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
482
		];
483
	}
484
	public function getChapterURL(string $title_url, string $chapter) : string {
485
		return "{$this->baseURL}/read/{$title_url}/{$chapter}/";
486
	}
487
488
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
489
		$titleData = [];
490
491
		$jsonURL = $this->getJSONTitleURL($title_url);
492
		if($content = $this->get_content($jsonURL)) {
493
			$json = json_decode($content['body'], TRUE);
494
			if($json && isset($json['chapters']) && count($json['chapters']) > 0) {
495
				$titleData['title'] = trim($json['comic']['name']);
496
497
				//FoolSlide title API doesn't appear to let you sort (yet every other API method which has chapters does, so we need to sort ourselves..
498
				usort($json['chapters'], function($a, $b) {
499
					return floatval("{$b['chapter']['chapter']}.{$b['chapter']['subchapter']}") <=> floatval("{$a['chapter']['chapter']}.{$a['chapter']['subchapter']}");
500
				});
501
				$latestChapter = reset($json['chapters'])['chapter'];
502
503
				$latestChapterString = "{$latestChapter['language']}/{$latestChapter['volume']}/{$latestChapter['chapter']}";
504
				if($latestChapter['subchapter'] !== '0') {
505
					$latestChapterString .= "/{$latestChapter['subchapter']}";
506
				}
507
				$titleData['latest_chapter'] = $latestChapterString;
508
509
				//No need to use date() here since this is already formatted as such.
510
				$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
511
			}
512
		}
513
514
		return (!empty($titleData) ? $titleData : NULL);
515
	}
516
517
	//Since we're just checking the latest updates page and not a following page, we just need to simulate a follow.
518
	//TODO: It would probably be better to have some kind of var which says that the custom update uses a following page..
519
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
520
		$content = ['status_code' => 200];
521
		$callback($content, $extra['id']);
522
	}
523
	public function doCustomUpdate() {
524
		$titleDataList = [];
525
526
		$jsonURL = $this->getJSONUpdateURL();
527
		if(($content = $this->get_content($jsonURL)) && $content['status_code'] == 200) {
528
			if(($json = json_decode($content['body'], TRUE)) && isset($json['chapters'])) {
529
				//This should fix edge cases where chapters are uploaded in bulk in the wrong order (HelveticaScans does this with Mousou Telepathy).
530
				usort($json['chapters'], function($a, $b) {
531
					$a_date = new DateTime($a['chapter']['updated'] !== '0000-00-00 00:00:00' ? $a['chapter']['updated'] : $a['chapter']['created']);
532
					$b_date = new DateTime($b['chapter']['updated'] !== '0000-00-00 00:00:00' ? $b['chapter']['updated'] : $b['chapter']['created']);
533
					return $b_date <=> $a_date;
534
				});
535
536
				$parsedTitles = [];
537
				foreach($json['chapters'] as $chapterData) {
538
					if(!in_array($chapterData['comic']['stub'], $parsedTitles)) {
539
						$parsedTitles[] = $chapterData['comic']['stub'];
540
541
						$titleData = [];
542
						$titleData['title'] = trim($chapterData['comic']['name']);
543
544
						$latestChapter = $chapterData['chapter'];
545
546
						$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
547
						if($latestChapter['subchapter'] !== '0') {
548
							$latestChapterString .= "/{$latestChapter['subchapter']}";
549
						}
550
						$titleData['latest_chapter'] = $latestChapterString;
551
552
						//No need to use date() here since this is already formatted as such.
553
						$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
554
555
						$titleDataList[$chapterData['comic']['stub']] = $titleData;
556
					} else {
557
						//We already have title data for this title.
558
						continue;
559
					}
560
				}
561
			} else {
562
				log_message('error', "{$this->site} - Custom updating failed (no chapters arg?) for {$this->baseURL}.");
563
			}
564
		} else {
565
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
566
		}
567
568
		return $titleDataList;
569
	}
570
571
	public function getJSONTitleURL(string $title_url) : string {
572
		return "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
573
	}
574
	public function getJSONUpdateURL() : string {
575
		return "{$this->baseURL}/api/reader/chapters/orderby/desc_created/format/json";
576
	}
577
}
578
579
abstract class Base_myMangaReaderCMS_Site_Model extends Base_Site_Model {
580
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
581
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[0-9\.]+)$/';
582
	public $customType    = 2;
583
584
	public $baseURL = '';
585
586
	public function getFullTitleURL(string $title_url) : string {
587
		return "{$this->baseURL}/manga/{$title_url}";
588
	}
589
590
	public function getChapterData(string $title_url, string $chapter) : array {
591
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
592
		return [
593
			'url'    => $this->getChapterURL($title_url, $chapter),
594
			'number' => $chapterN
595
		];
596
	}
597
	public function getChapterURL(string $title_url, string $chapter) : string {
598
		return $this->getFullTitleURL($title_url).'/'.$chapter;
599
	}
600
601
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
602
		$titleData = [];
603
604
		$fullURL = $this->getFullTitleURL($title_url);
605
606
		$content = $this->get_content($fullURL);
607
608
		$data = $this->parseTitleDataDOM(
609
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 606 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
610
			$title_url,
611
			"(//h2[@class='widget-title'])[1]",
612
			"//ul[contains(@class, 'chapters')]/li[not(contains(@class, 'btn'))][1]",
613
			"div[contains(@class, 'action')]/div[@class='date-chapter-title-rtl']",
614
			"h5/a[1] | h3/a[1]",
615
			"Whoops, looks like something went wrong."
616
		);
617
		if($data) {
618
			$titleData['title'] = trim($data['nodes_title']->textContent);
619
620
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
621
			$needle = array_search('manga', array_reverse($segments, TRUE)) + 2;
622
			$titleData['latest_chapter'] = $segments[$needle];
623
624
			$dateString = $data['nodes_latest']->nodeValue;
625
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
626
		}
627
628
		return (!empty($titleData) ? $titleData : NULL);
629
	}
630
631
632
	//Since we're just checking the latest updates page and not a following page, we just need to simulate a follow.
633
	//TODO: It would probably be better to have some kind of var which says that the custom update uses a following page..
634
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
635
		$content = ['status_code' => 200];
636
		$callback($content, $extra['id']);
637
	}
638
	public function doCustomUpdate() {
639
		$titleDataList = [];
640
641
		$updateURL = "{$this->baseURL}/latest-release";
642
		if(($content = $this->get_content($updateURL)) && $content['status_code'] == 200) {
643
			$data = $content['body'];
644
645
			$data = preg_replace('/^[\s\S]+<dl>/', '<dl>', $data);
646
			$data = preg_replace('/<\/dl>[\s\S]+$/', '</dl>', $data);
647
648
			$dom = new DOMDocument();
649
			libxml_use_internal_errors(TRUE);
650
			$dom->loadHTML($data);
651
			libxml_use_internal_errors(FALSE);
652
653
			$xpath      = new DOMXPath($dom);
654
			$nodes_rows = $xpath->query("//dl/dd | //div[@class='mangalist']/div[@class='manga-item']");
655
			if($nodes_rows->length > 0) {
656
				foreach($nodes_rows as $row) {
657
					$titleData = [];
658
659
					$nodes_title   = $xpath->query("div[@class='events ']/div[@class='events-body']/h3[@class='events-heading']/a | h3/a", $row);
660
					$nodes_chapter = $xpath->query("(div[@class='events '][1]/div[@class='events-body'][1] | div[@class='manga-chapter'][1])/h6[@class='events-subtitle'][1]/a[1]", $row);
661
					$nodes_latest  = $xpath->query("div[@class='time'] | small", $row);
662
663
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
664
						$title = $nodes_title->item(0);
665
666
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
667
						$title_url = $title_url_arr['url'];
668
669
						if(!array_key_exists($title_url, $titleDataList)) {
670
							$titleData['title'] = trim($title->textContent);
671
672
							$chapter = $nodes_chapter->item(0);
673
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
674
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
675
676
							$dateString = str_replace('/', '-', trim($nodes_latest->item(0)->nodeValue)); //NOTE: We replace slashes here as it stops strtotime interpreting the date as US date format.
677
							if($dateString == 'T') {
678
								$dateString = date("Y-m-d",now());
679
							}
680
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString . ' 00:00'));
681
682
							$titleDataList[$title_url] = $titleData;
683
						}
684
					} else {
685
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
686
					}
687
				}
688
			} else {
689
				log_message('error', "{$this->site} | Following list is empty?");
690
			}
691
		} else {
692
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
693
		}
694
695
		return $titleDataList;
696
	}
697
}
698
699
abstract class Base_GlossyBright_Site_Model extends Base_Site_Model {
700
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
701
	public $chapterFormat = '/^[0-9\.]+$/';
702
703
	public $baseURL = '';
704
705
	public $customType    = 2;
706
707
	public function getFullTitleURL(string $title_url) : string {
708
		return "{$this->baseURL}/{$title_url}";
709
	}
710
711
	public function getChapterData(string $title_url, string $chapter) : array {
712
		return [
713
			'url'    => $this->getFullTitleURL($title_url).$chapter.'/',
714
			'number' => "c{$chapter}"
715
		];
716
	}
717
718
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
719
		$titleData = [];
720
721
		$fullURL = "{$this->baseURL}/manga-rss/{$title_url}";
722
		$content = $this->get_content($fullURL);
723
		$data    = $this->parseTitleDataDOM(
724
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 722 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
725
			$title_url,
726
			"//rss/channel/image/title",
727
			"//rss/channel/item[1]",
728
			"pubdate",
729
			"title"
730
		);
731
		if($data) {
732
			$titleData['title'] = preg_replace('/^Recent chapters of (.*?) manga$/', '$1', trim($data['nodes_title']->textContent));
733
734
			//For whatever reason, DOMDocument breaks the <link> element we need to grab the chapter, so we have to grab it elsewhere.
735
			$titleData['latest_chapter'] = preg_replace('/^.*? - ([0-9\.]+) - .*?$/', '$1', trim($data['nodes_chapter']->textContent));
736
737
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime((string) $data['nodes_latest']->textContent));
738
		}
739
740
		return (!empty($titleData) ? $titleData : NULL);
741
	}
742
743
	public function doCustomUpdate() {
744
		$titleDataList = [];
745
746
		if(($content = $this->get_content($this->baseURL)) && $content['status_code'] == 200) {
747
			$data = $content['body'];
748
749
			$dom = new DOMDocument();
750
			libxml_use_internal_errors(TRUE);
751
			$dom->loadHTML($data);
752
			libxml_use_internal_errors(FALSE);
753
754
			$xpath      = new DOMXPath($dom);
755
			$nodes_rows = $xpath->query("//*[@id='wpm_mng_lst']/tbody/tr/td | //*[@id='wpm_mng_lst']/li/div");
756
			if($nodes_rows->length > 0) {
757
				foreach($nodes_rows as $row) {
758
					$titleData = [];
759
760
					$nodes_title   = $xpath->query("a[2]", $row);
761
					$nodes_chapter = $xpath->query("a[2]", $row);
762
					$nodes_latest  = $xpath->query("b", $row);
763
764
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
765
						$title   = $nodes_title->item(0);
766
						$chapter = $nodes_chapter->item(0);
767
768
						preg_match('/mngcow\.co\/(?<url>.*?)\//', $title->getAttribute('href'), $title_url_arr);
769
						$title_url = $title_url_arr['url'];
770
771
						if(!array_key_exists($title_url, $titleDataList)) {
772
							$titleData['title'] = trim($title->getAttribute('title'));
773
774
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
775
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
776
777
							$dateString = trim($nodes_latest->item(0)->textContent);
778
							switch($dateString) {
779
								case 'Today':
0 ignored issues
show
Coding Style introduced by
case statements should be defined using a colon.

As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next break.

There is also the option to use a semicolon instead of a colon, this is discouraged because many programmers do not even know it works and the colon is universal between programming languages.

switch ($expr) {
    case "A": { //wrong
        doSomething();
        break;
    }
    case "B"; //wrong
        doSomething();
        break;
    case "C": //right
        doSomething();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
780
									$dateString = date("Y-m-d", now());
781
									break;
782
783
								case 'Yesterday':
784
									$dateString = date("Y-m-d", strtotime("-1 days"));
785
									break;
786
787
								default:
788
									//Do nothing
789
									break;
790
							}
791
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
792
793
							$titleDataList[$title_url] = $titleData;
794
						}
795
					} else {
796
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
797
					}
798
				}
799
			} else {
800
				log_message('error', "{$this->site} | Following list is empty?");
801
			}
802
		} else {
803
			log_message('error', "{$this->site} - Custom updating failed.");
804
		}
805
806
		return $titleDataList;
807
	}
808
}
809