Base_Site_Model::doCustomUpdate()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 0
dl 0
loc 1
rs 10
c 0
b 0
f 0
ccs 0
cts 0
cp 0
crap 2
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
/**
4
 * Class Tracker_Sites_Model
5
 */
6
class Tracker_Sites_Model extends CI_Model {
7 96
	public function __construct() {
8 96
		parent::__construct();
9 96
	}
10
11
	public function __get($name) {
12
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
13
		//      It's probably a minor speed reduction, but that isn't much of an issue.
14
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
15
		$validClasses = [
16
			'Base_Site_Model',
17
			'Base_FoolSlide_Site_Model',
18
			'Base_myMangaReaderCMS_Site_Model',
19
			'Base_GlossyBright_Site_Model',
20
			'Base_Roku_Site_Model',
21
			'Base_WP_Manga_Site_Model'
22
		];
23
		if(!class_exists($name) || !(in_array(get_parent_class($name), $validClasses))) {
24
			return get_instance()->{$name};
25
		} else {
26
			$this->loadSite($name);
27
			return $this->{$name};
28
		}
29
	}
30
31
	private function loadSite(string $siteName) : void {
32
		$this->{$siteName} = new $siteName();
33
	}
34
}
35
36
abstract class Base_Site_Model extends CI_Model {
37
	public $site          = '';
38
	public $titleFormat   = '//';
39
	public $chapterFormat = '//';
40
	public $pageSeparator = ''; //NOTE: Each site must set this manually.
41
	public $hasCloudFlare = FALSE;
42
	public $userAgent     = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36';
43
44
	public $baseURL = '';
45
46
	/**
47
	 * 0: No custom updater.
48
	 * 1: Uses following page.
49
	 * 2: Uses latest releases page.
50
	 */
51
	public $customType = 0;
52
53
	public $canHaveNoChapters = FALSE;
54
55
	public $siteRateLimit = 600;
56
57
	public $bypassSSL = FALSE;
58
59 16
	public function __construct() {
60 16
		parent::__construct();
61
62 16
		$this->load->database();
63
64 16
		$this->site = get_class($this);
65 16
	}
66
67
	/**
68
	 * Generates URL to the title page of the requested series.
69
	 *
70
	 * NOTE: In some cases, we are required to store more data in the title_string than is needed to generate the URL. (Namely as the title_string is our unique identifier for that series)
71
	 *       When storing additional data, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
72
	 *
73
	 * Example:
74
	 *    return "http://mangafox.me/manga/{$title_url}/";
75
	 *
76
	 * Example (with extra data):
77
	 *    $title_parts = explode(':--:', title_url);
78
	 *    return "https://bato.to/comic/_/comics/-r".$title_parts[0];
79
	 *
80
	 * @param string $title_url
81
	 * @return string
82
	 */
83
	abstract public function getFullTitleURL(string $title_url) : string;
84
85
	/**
86
	 * Generates chapter data from given $title_url and $chapter.
87
	 *
88
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
89
	 *
90
	 * NOTE: In some cases, we are required to store the chapter number, and the segment required to generate the chapter URL separately.
91
	 *       Much like when generating the title URL, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
92
	 *
93
	 * Example:
94
	 *     return [
95
	 *        'url'    => $this->getFullTitleURL($title_url).'/'.$chapter,
96
	 *        'number' => "c{$chapter}"
97
	 *    ];
98
	 *
99
	 * @param string $title_url
100
	 * @param string $chapter
101
	 * @return array [url, number]
102
	 */
103
	abstract public function getChapterData(string $title_url, string $chapter) : array;
104
105
	/**
106
	 * Generates chapter page URL from given chapterData.
107
	 *
108
	 * Will return NULL if pageSeparator is not set.
109
	 *
110
	 * @param array $chapterData
111
	 * @param int   $page
112
	 *
113
	 * @return null|string
114
	 */
115
	final public function getChapterPageURL(array $chapterData, int $page = 1) : ?string {
116
		$pageURL = NULL;
117
		if($this->pageSeparator !== '') {
118
			$pageSeparator = $this->pageSeparator;
119
			if(substr($chapterData['url'], -1) === $pageSeparator) {
120
				//We don't want double trailing slashes, so fix this when possible.
121
				$pageSeparator = '';
122
			}
123
			$pageURL = $chapterData['url'] . $pageSeparator . $page;
124
		}
125
		return $pageURL;
126
	}
127
128
	/**
129
	 * Used to get the latest chapter of given $title_url.
130
	 *
131
	 * This <should> utilize both get_content and parseTitleDataDOM functions when possible, as these can both reduce a lot of the code required to set this up.
132
	 *
133
	 * $titleData params must be set accordingly:
134
	 * * `title` should always be used with html_entity_decode.
135
	 * * `latest_chapter` must match $this->chapterFormat.
136
	 * * `last_updated` should always be in date("Y-m-d H:i:s") format.
137
	 * * `followed` should never be set within via getTitleData, with the exception of via a array_merge with doCustomFollow.
138
	 *
139
	 * $firstGet is set to true when the series is first added to the DB, and is used to follow the series on given site (if possible).
140
	 *
141
	 * @param string $title_url
142
	 * @param bool   $firstGet
143
	 * @return array|null [title,latest_chapter,last_updated,followed?]
144
	 */
145
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
146
147
	public function handleBatchUpdate(string $title_url) : array {
148
		$return = [
149
			'limited'   => FALSE,
150
			'titleData' => NULL
151
		];
152
		if(($rateLimit = $this->_getSiteRateLimit()) <= $this->siteRateLimit) {
153
			$this->_setSiteRateLimit($rateLimit);
154
155
			$return['titleData'] = $this->getTitleData($title_url);
156
		} else {
157
			$return['limited'] = TRUE;
158
		}
159
		return $return;
160
	}
161
162
	/**
163
	 * Validates given $title_url against titleFormat.
164
	 *
165
	 * Failure to match against titleFormat will stop the series from being added to the DB.
166
	 *
167
	 * @param string $title_url
168
	 * @return bool
169
	 */
170 2
	final public function isValidTitleURL(string $title_url) : bool {
171 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
172 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
173 2
		return $success;
174
	}
175
176
	/**
177
	 * Validates given $chapter against chapterFormat.
178
	 *
179
	 * Failure to match against chapterFormat will stop the chapter being updated.
180
	 *
181
	 * @param string $chapter
182
	 * @return bool
183
	 */
184 2
	final public function isValidChapter(string $chapter) : bool {
185 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
186 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
187 2
		return $success;
188
	}
189
190
191
192
	public function stripChapter(string $chapter) : string {
193
		return $chapter;
194
	}
195
196
	/**
197
	 * Used by getTitleData (& similar functions) to get the requested page data.
198
	 *
199
	 * @param string $url
200
	 * @param string $cookie_string
201
	 * @param string $cookiejar_path
202
	 * @param bool   $follow_redirect
203
	 * @param bool   $isPost
204
	 * @param array  $postFields
205
	 *
206
	 * @return array|bool
207
	 */
208
	final protected function get_content(string $url, string $cookie_string = '', string $cookiejar_path = '', bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
209
		$refresh = TRUE; //For sites that have CloudFlare, we want to loop get_content again.
210
		$loops   = 0;
211
		while($refresh && $loops < 2) {
212
			$refresh = FALSE;
213
			$loops++;
214
215
			$ch = curl_init();
216
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
217
			curl_setopt($ch, CURLOPT_ENCODING , 'gzip');
218
			//curl_setopt($ch, CURLOPT_VERBOSE, 1);
219
			curl_setopt($ch, CURLOPT_HEADER, 1);
220
221
			if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
222
223
			if($cookies = $this->cache->get("cloudflare_{$this->site}")) {
224
				$cookie_string .= "; {$cookies}";
225
			}
226
227
			if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
228
			if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
229
230
			//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
231
			curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
232
233
			if(!$this->bypassSSL) {
234
				curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, TRUE);
235
			} else {
236
				// Some sites just can't do SSL properly.
237
				curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
238
				curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
239
			}
240
241
			curl_setopt($ch, CURLOPT_URL, $url);
242
243
			if($isPost) {
244
				curl_setopt($ch,CURLOPT_POST, count($postFields));
245
				curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
246
			}
247
248
			$response = curl_exec($ch);
249
250
			$this->Tracker->admin->incrementRequests();
251
252
			if($response === FALSE) {
253
				log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch).' | '.$url);
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
254
				//FIXME: We don't always account for FALSE return
255
				return FALSE;
256
			}
257
258
			$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
259
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
260
			$headers     = http_parse_headers(substr($response, 0, $header_size));
261
			$body        = substr($response, $header_size);
262
			curl_close($ch);
263
264
			if($status_code === 503) $refresh = $this->handleCloudFlare($url, $body);
265
		}
266
267
		return [
268
			'headers'     => $headers,
0 ignored issues
show
Bug introduced by
The variable $headers does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
269
			'status_code' => $status_code,
0 ignored issues
show
Bug introduced by
The variable $status_code does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
270
			'body'        => $body
0 ignored issues
show
Bug introduced by
The variable $body does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
271
		];
272
	}
273
274
	final private function handleCloudFlare(string $url, string $body) : bool {
275
		$refresh = FALSE;
276
277
		if((strpos($body, 'DDoS protection by Cloudflare') !== FALSE) || (strpos($body, '<input type="hidden" id="jschl-answer" name="jschl_answer"/>') !== FALSE)) {
278
			//print "Cloudflare detected? Grabbing Cookies.\n";
279
			if(!$this->hasCloudFlare) {
280
				//TODO: Site appears to have enabled CloudFlare, disable it and contact admin.
281
				//      We'll continue to bypass CloudFlare as this may occur in a loop.
282
			}
283
284
			$urlData = [
285
				'url'        => $url,
286
				'user_agent' => $this->userAgent
287
			];
288
			//TODO: shell_exec seems bad since the URLs "could" be user inputted? Better way of doing this?
289
			$result = shell_exec('python3 '.APPPATH.'../_scripts/get_cloudflare_cookie.py '.escapeshellarg(json_encode($urlData)));
290
			$cookieData = json_decode($result, TRUE);
291
292
			$this->cache->save("cloudflare_{$this->site}", $cookieData['cookies'],  31536000 /* 1 year, or until we renew it */);
293
			log_message('debug', "Saving CloudFlare Cookies for {$this->site}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
294
295
			$refresh = TRUE;
296
		} else {
297
			//Either site doesn't have CloudFlare or we have bypassed it. Either is good!
298
		}
299
		return $refresh;
300
	}
301
302
	/**
303
	 * Used by getTitleData to get the title, latest_chapter & last_updated data from the data returned by get_content.
304
	 *
305
	 * parseTitleDataDOM checks if the data returned by get_content is valid via a few simple checks.
306
	 * * If the request was actually successful, had a valid status code & data wasn't empty. We also do an additional check on an optional $failure_string param, which will throw a failure if it's matched.
307
	 *
308
	 * Data is cleaned by cleanTitleDataDOM prior to being passed to DOMDocument.
309
	 *
310
	 * All $node_* params must be XPath to the requested node, and must only return 1 result. Anything else will throw a failure.
311
	 *
312
	 * @param array        $content
313
	 * @param string       $title_url
314
	 * @param string       $node_title_string
315
	 * @param string       $node_row_string
316
	 * @param string       $node_latest_string
317
	 * @param string       $node_chapter_string
318
	 * @param closure|null $failureCall
319
	 * @param closure|null $noChaptersCall
320
	 * @param closure|null $extraCall
321
	 * @param closure|null $statusCall //FIXME: This is really ugly.
322
	 *
323
	 * @return DOMElement[]|false [nodes_title,nodes_chapter,nodes_latest]
324
	 */
325
	final protected function parseTitleDataDOM(
326
		$content, string $title_url,
327
		string $node_title_string, string $node_row_string,
328
		string $node_latest_string, string $node_chapter_string,
329
		closure $failureCall = NULL, closure $noChaptersCall = NULL, closure $extraCall = NULL, closure $statusCall = NULL) {
330
331
		if(!is_array($content)) {
332
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
333
		} else {
334
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
335
336
			if(!($status_code >= 200 && $status_code < 300)) {
337
				if($status_code === 502) {
338
					// Site is overloaded, no need to log this.
339
				} else if(!is_null($statusCall) && is_callable($statusCall) && $statusReturn = $statusCall($status_code, $data)) {
340
					if(!array_key_exists('ignore', $statusReturn)) {
341
						log_message('error', "{$this->site} : {$title_url} | Failure status call matched");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
342
					}
343
				} else {
344
					log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
345
				}
346
			} else if(empty($data)) {
347
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
348
			} else if(!is_null($failureCall) && is_callable($failureCall) && $failureCall($data)) {
349
				log_message('error', "{$this->site} : {$title_url} | Failure call matched");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
350
			} else {
351
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
352
353
				$dom = new DOMDocument();
354
				libxml_use_internal_errors(TRUE);
355
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
356
				libxml_use_internal_errors(FALSE);
357
358
				$xpath = new DOMXPath($dom);
359
				$nodes_title = $xpath->query($node_title_string);
360
				$nodes_row   = $xpath->query($node_row_string);
361
				if($nodes_title->length === 1) {
362
					if($nodes_row->length === 1) {
363
						$firstRow      = $nodes_row->item(0);
364
						$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
365
366
						if($node_chapter_string !== '') {
367
							$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
368
						} else {
369
							$nodes_chapter = $nodes_row;
370
						}
371
372
						if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
373
							$returnData = [
374
								'nodes_title'   => $nodes_title->item(0),
375
								'nodes_latest'  => $nodes_latest->item(0),
376
								'nodes_chapter' => $nodes_chapter->item(0)
377
							];
378
379
							if(is_callable($extraCall)) $extraCall($xpath, $returnData);
380
381
							return $returnData;
382
						} else {
383
							log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
384
						}
385
					} elseif($this->canHaveNoChapters && !is_null($noChaptersCall) && is_callable($noChaptersCall)) {
386
						$returnData = [
387
							'nodes_title'   => $nodes_title->item(0)
388
						];
389
390
						$noChaptersCall($data, $xpath, $returnData);
391
392
						if(is_array($returnData)) {
393
							if(is_callable($extraCall) && is_array($returnData)) $extraCall($xpath, $returnData);
394
						} else {
395
							log_message('error', "{$this->site} : {$title_url} | canHaveNoChapters set, but doesn't match possible checks! XPath is probably broken.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
396
						}
397
398
						return $returnData;
399
					} else {
400
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (ROW: {$nodes_row->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
401
					}
402
				} else {
403
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
404
				}
405
			}
406
		}
407
408
		return FALSE;
409
	}
410
411
	/**
412
	 * Used by parseTitleDataDOM to clean the data prior to passing it to DOMDocument & DOMXPath.
413
	 * This is mostly done as an (assumed) speed improvement due to the reduced amount of DOM to parse, or simply just making it easier to parse with XPath.
414
	 *
415
	 * @param string $data
416
	 * @return string
417
	 */
418
	public function cleanTitleDataDOM(string $data) : string {
419
		return $data;
420
	}
421
422
	/**
423
	 * Used to follow a series on given site if supported.
424
	 *
425
	 * This is called by getTitleData if $firstGet is true (which occurs when the series is first being added to the DB).
426
	 *
427
	 * Most of the actual following is done by handleCustomFollow.
428
	 *
429
	 * @param string $data
430
	 * @param array  $extra
431
	 * @return array
432
	 */
433
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
434
		$titleData = [];
435
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
436
			if(is_array($content)) {
437
				if(array_key_exists('status_code', $content)) {
438
					$statusCode = $content['status_code'];
439
					if($statusCode === 200) {
440
						$isCallable = is_callable($successCallback);
441
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
442
							$titleData['followed'] = 'Y';
443
444
							log_message('info', "doCustomFollow succeeded for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
445
						} else {
446
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
447
						}
448
					} else {
449
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
450
					}
451
				} else {
452
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
453
				}
454
			} else {
455
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
456
			}
457
		}, $data, $extra);
458
		return $titleData;
459
	}
460
461
	/**
462
	 * Used by doCustomFollow to handle following series on sites.
463
	 *
464
	 * Uses get_content to get data.
465
	 *
466
	 * $callback must return ($content, $id, closure $successCallback = NULL).
467
	 * * $content is simply just the get_content data.
468
	 * * $id is the dbID. This should be passed by the $extra arr.
469
	 * * $successCallback is an optional success check to make sure the series was properly followed.
470
	 *
471
	 * @param callable $callback
472
	 * @param string   $data
473
	 * @param array    $extra
474
	 */
475
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
0 ignored issues
show
Unused Code introduced by
The parameter $data is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
476
		if($this->customType === 2) {
477
			$content = ['status_code' => 200];
478
			$callback($content, $extra['id']);
479
		}
480
	}
481
482
	/**
483
	 * Used to check the sites following page for new updates (if supported).
484
	 * This should work much like getTitleData, but instead checks the following page.
485
	 *
486
	 * This must return an array containing arrays of each of the chapters data.
487
	 */
488
	public function doCustomUpdate() {}
489
490
	/**
491
	 * Used by the custom updater to check if a chapter looks newer than the current one.
492
	 *
493
	 * This calls doCustomCheckCompare which handles the majority of the checking.
494
	 * NOTE: Depending on the site, you may need to call getChapterData to get the chapter number to be used with this.
495
	 *
496
	 * @param string $oldChapterString
497
	 * @param string $newChapterString
498
	 * @return bool
499
	 */
500
	public function doCustomCheck(?string $oldChapterString, string $newChapterString) : bool {
501
		if(!is_null($oldChapterString)) {
502
			$oldChapterSegments = explode('/', $this->getChapterData('', $oldChapterString)['number']);
503
			$newChapterSegments = explode('/', $this->getChapterData('', $newChapterString)['number']);
504
505
			$status = $this->doCustomCheckCompare($oldChapterSegments, $newChapterSegments);
506
		} else {
507
			$status = TRUE;
508
		}
509
510
		return $status;
511
	}
512
513
	/**
514
	 * Used by doCustomCheck to check if a chapter looks newer than the current one.
515
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
516
	 *
517
	 * To avoid issues with the occasional off case, this will only ever return true if we are 100% sure that the new chapter is newer than the old one.
518
	 *
519
	 * @param array $oldChapterSegments
520
	 * @param array $newChapterSegments
521
	 * @return bool
522
	 */
523 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
524
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
525 12
		$status = FALSE;
526
527
		//Make sure we have a volume element
528 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
529 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
530
531 12
		$oldCount = count($oldChapterSegments);
532 12
		$newCount = count($newChapterSegments);
533 12
		if($newCount === $oldCount) {
534
			//Make sure chapter format looks correct.
535
			//NOTE: We only need to check newCount as we know oldCount is the same count.
536 12
			if($newCount === 2) {
537
				//FIXME: Can we loop this?
538 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
539 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
540
541
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
542 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
543 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
544
545 12
				$oldVolume = floatval($oldVolume);
546 12
				$newVolume = floatval($newVolume);
547
			} else {
548
				$oldVolume = 0;
549
				$newVolume = 0;
550
			}
551 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
552 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
553
554 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
555
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
556
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
557
558 4
				$status = TRUE;
559 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
560
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
561 1
				$status = TRUE;
562 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
563
				//$newVolume is higher, and chapter is higher so no need to check chapter.
564 2
				$status = TRUE;
565 5
			} elseif($newChapter > $oldChapter) {
566
				//$newVolume isn't higher, but chapter is.
567
				$status = TRUE;
568
			}
569
		}
570
571 12
		return $status;
572
	}
573
574
	final private function _getSiteRateLimit() : int {
575
		return (int) ($this->cache->get("{$this->site}_ratelimit") ?: 0);
576
	}
577
	final private function _setSiteRateLimit(?int $rateLimit = NULL) : bool {
578
		//We would just use increment(), but we can't set ttl with it...
579
		$currentRateLimit = $rateLimit ?: $this->_getSiteRateLimit();
580
		return $this->cache->save("{$this->site}_ratelimit", $currentRateLimit + 1,3600);
581
	}
582
}
583
584
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
585
	public $titleFormat   = '/^[a-z0-9_-]+$/';
586
	public $chapterFormat = '/^(?:en(?:-us)?|pt|es)\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
587
	public $pageSeparator = 'page/';
588
	public $customType    = 2;
589
590
	public function getFullTitleURL(string $title_url) : string {
591
		return "{$this->baseURL}/series/{$title_url}";
592
	}
593
594
	public function getChapterData(string $title_url, string $chapter) : array {
595
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
596
		return [
597
			'url'    => $this->getChapterURL($title_url, $chapter),
598
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
599
		];
600
	}
601
	public function getChapterURL(string $title_url, string $chapter) : string {
602
		return "{$this->baseURL}/read/{$title_url}/{$chapter}/";
603
	}
604
605
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
606
		$titleData = [];
607
608
		$jsonURL = $this->getJSONTitleURL($title_url);
609
		if($content = $this->get_content($jsonURL)) {
610
			$json = json_decode($content['body'], TRUE);
611
			if($json && isset($json['chapters']) && count($json['chapters']) > 0) {
612
				$titleData['title'] = trim($json['comic']['name']);
613
614
				//FoolSlide title API doesn't appear to let you sort (yet every other API method which has chapters does, so we need to sort ourselves..
615
				usort($json['chapters'], function($a, $b) {
616
					return floatval("{$b['chapter']['chapter']}.{$b['chapter']['subchapter']}") <=> floatval("{$a['chapter']['chapter']}.{$a['chapter']['subchapter']}");
617
				});
618
				$latestChapter = reset($json['chapters'])['chapter'];
619
620
				$latestChapterString = "{$latestChapter['language']}/{$latestChapter['volume']}/{$latestChapter['chapter']}";
621
				if($latestChapter['subchapter'] !== '0') {
622
					$latestChapterString .= "/{$latestChapter['subchapter']}";
623
				}
624
				$titleData['latest_chapter'] = $latestChapterString;
625
626
				//No need to use date() here since this is already formatted as such.
627
				$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
628
			}
629
		}
630
631
		return (!empty($titleData) ? $titleData : NULL);
632
	}
633
634
	public function doCustomUpdate() {
635
		$titleDataList = [];
636
637
		$jsonURL = $this->getJSONUpdateURL();
638
		if(($content = $this->get_content($jsonURL)) && $content['status_code'] == 200) {
639
			if(($json = json_decode($content['body'], TRUE)) && isset($json['chapters'])) {
640
				//This should fix edge cases where chapters are uploaded in bulk in the wrong order (HelveticaScans does this with Mousou Telepathy).
641
				usort($json['chapters'], function($a, $b) {
642
					$a_date = new DateTime($a['chapter']['updated'] !== '0000-00-00 00:00:00' ? $a['chapter']['updated'] : $a['chapter']['created']);
643
					$b_date = new DateTime($b['chapter']['updated'] !== '0000-00-00 00:00:00' ? $b['chapter']['updated'] : $b['chapter']['created']);
644
					return $b_date <=> $a_date;
645
				});
646
647
				$parsedTitles = [];
648
				foreach($json['chapters'] as $chapterData) {
649
					if(!in_array($chapterData['comic']['stub'], $parsedTitles)) {
650
						$parsedTitles[] = $chapterData['comic']['stub'];
651
652
						$titleData = [];
653
						$titleData['title'] = trim($chapterData['comic']['name']);
654
655
						$latestChapter = $chapterData['chapter'];
656
657
						$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
658
						if($latestChapter['subchapter'] !== '0') {
659
							$latestChapterString .= "/{$latestChapter['subchapter']}";
660
						}
661
						$titleData['latest_chapter'] = $latestChapterString;
662
663
						//No need to use date() here since this is already formatted as such.
664
						$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
665
666
						$titleDataList[$chapterData['comic']['stub']] = $titleData;
667
					} else {
668
						//We already have title data for this title.
669
						continue;
670
					}
671
				}
672
			} else {
673
				log_message('error', "{$this->site} - Custom updating failed (no chapters arg?) for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
674
			}
675
		} else {
676
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
677
		}
678
679
		return $titleDataList;
680
	}
681
682
	public function getJSONTitleURL(string $title_url) : string {
683
		return "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
684
	}
685
	public function getJSONUpdateURL() : string {
686
		return "{$this->baseURL}/api/reader/chapters/orderby/desc_created/format/json";
687
	}
688
}
689
690
abstract class Base_myMangaReaderCMS_Site_Model extends Base_Site_Model {
691
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
692
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[a-zA-Z0-9\._-]+)$/';
693
	public $pageSeparator = '/';
694
	public $customType    = 2;
695
696
	public function getFullTitleURL(string $title_url) : string {
697
		return "{$this->baseURL}/manga/{$title_url}";
698
	}
699
700
	public function getChapterData(string $title_url, string $chapter) : array {
701
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
702
		return [
703
			'url'    => $this->getChapterURL($title_url, $chapter),
704
			'number' => $chapterN
705
		];
706
	}
707
	public function getChapterURL(string $title_url, string $chapter) : string {
708
		return $this->getFullTitleURL($title_url).'/'.$chapter;
709
	}
710
711
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
712
		$titleData = [];
713
714
		$fullURL = $this->getFullTitleURL($title_url);
715
716
		$content = $this->get_content($fullURL);
717
718
		$data = $this->parseTitleDataDOM(
719
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 716 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
720
			$title_url,
721
			"(//h2[@class='widget-title'])[1]",
722
			"//ul[contains(@class, 'chapters')]/li[not(contains(@class, 'btn'))][1]",
723
			"div[contains(@class, 'action')]/div[@class='date-chapter-title-rtl']",
724
			'h5/a[1] | h3/a[1]',
725
			NULL,
726
			NULL,
727
			NULL,
728
			function(int $status_code, $data) {
729
				// We want to silently fail here.
730
				$success = ($status_code === 500 && strpos($data, 'Whoops, looks like something went wrong.') !== FALSE);
731
				return ['success' => $success, 'ignore' => TRUE];
732
			}
733
		);
734
		if($data) {
735
			$titleData['title'] = trim($data['nodes_title']->textContent);
736
737
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
738
			$needle = array_search('manga', array_reverse($segments, TRUE), TRUE) + 2;
739
			$titleData['latest_chapter'] = $segments[$needle];
740
741
			$dateString = $data['nodes_latest']->nodeValue;
742
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
743
		}
744
745
		return (!empty($titleData) ? $titleData : NULL);
746
	}
747
748
	public function doCustomUpdate() {
749
		$titleDataList = [];
750
751
		$updateURL = "{$this->baseURL}/latest-release";
752
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
753
			$data = $content['body'];
754
755
			$data = preg_replace('/^[\s\S]+<dl>/', '<dl>', $data);
756
			$data = preg_replace('/<\/dl>[\s\S]+$/', '</dl>', $data);
757
758
			$dom = new DOMDocument();
759
			libxml_use_internal_errors(TRUE);
760
			$dom->loadHTML($data);
761
			libxml_use_internal_errors(FALSE);
762
763
			$xpath      = new DOMXPath($dom);
764
			$nodes_rows = $xpath->query("//dl/dd | //div[@class='mangalist']/div[@class='manga-item']");
765
			if($nodes_rows->length > 0) {
766
				foreach($nodes_rows as $row) {
767
					$titleData = [];
768
769
					$nodes_title   = $xpath->query("div[@class='events ']/div[@class='events-body']/h3[@class='events-heading']/a | h3/a", $row);
770
					$nodes_chapter = $xpath->query("(div[@class='events '][1]/div[@class='events-body'][1] | div[@class='manga-chapter'][1])/h6[@class='events-subtitle'][1]/a[1]", $row);
771
					$nodes_latest  = $xpath->query("div[@class='time'] | small", $row);
772
773
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
774
						$title = $nodes_title->item(0);
775
776
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
777
						$title_url = $title_url_arr['url'];
778
779
						if(!array_key_exists($title_url, $titleDataList)) {
780
							$titleData['title'] = trim($title->textContent);
781
782
							$chapter = $nodes_chapter->item(0);
783
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
784
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
785
786
							$dateString = str_replace('/', '-', trim($nodes_latest->item(0)->nodeValue)); //NOTE: We replace slashes here as it stops strtotime interpreting the date as US date format.
787
							if($dateString == 'T') {
788
								$dateString = date("Y-m-d",now());
789
							}
790
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString . ' 00:00'));
791
792
							$titleDataList[$title_url] = $titleData;
793
						}
794
					} else {
795
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
796
					}
797
				}
798
			} else {
799
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
800
			}
801
		} else {
802
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
803
		}
804
805
		return $titleDataList;
806
	}
807
}
808
809
abstract class Base_GlossyBright_Site_Model extends Base_Site_Model {
810
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
811
	public $chapterFormat = '/^[0-9\.]+$/';
812
	public $pageSeparator = '/';
813
814
	public $customType    = 2;#
815
816
	public $version = 1; # New versions of GlossyBright have a diff style.
817
818
	public function getFullTitleURL(string $title_url) : string {
819
		return "{$this->baseURL}/manga/{$title_url}";
820
	}
821
822
	public function getChapterData(string $title_url, string $chapter) : array {
823
		return [
824
			'url'    => $this->getFullTitleURL($title_url).'/'.$chapter.'/',
825
			'number' => "c{$chapter}"
826
		];
827
	}
828
829
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
830
		$titleData = [];
831
832
		if($this->version === 1) {
833
			$fullURL = "{$this->baseURL}/manga-rss/{$title_url}";
834
			$content = $this->get_content($fullURL);
835
			$data    = $this->parseTitleDataDOM(
836
				$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 834 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
837
				$title_url,
838
				'//rss/channel/image/title',
839
				'//rss/channel/item[1]',
840
				'pubdate',
841
				'title',
842
				function($data) {
843
					return strpos($data, '<image>') === FALSE;
844
				}
845
			);
846
			if($data) {
847
				$titleData['title'] = preg_replace('/^Recent chapters of (.*?) manga$/', '$1', trim($data['nodes_title']->textContent));
848
849
				//For whatever reason, DOMDocument breaks the <link> element we need to grab the chapter, so we have to grab it elsewhere.
850
				$titleData['latest_chapter'] = preg_replace('/^.*? - ([0-9\.]+) - .*?$/', '$1', trim($data['nodes_chapter']->textContent));
851
852
				$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime((string) $data['nodes_latest']->textContent));
853
			}
854
		} elseif($this->version === 2) {
855
			$fullURL = "{$this->baseURL}/rss.php?manga={$title_url}";
856
			$content = $this->get_content($fullURL);
857
			$data    = $this->parseTitleDataDOM(
858
				$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 856 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
859
				$title_url,
860
				'//rss/channel/image/title',
861
				'//rss/channel/item[1]',
862
				'pubdate',
863
				'',
864
				function($data) {
865
					return strpos($data, '<image>') === FALSE;
866
				}
867
			);
868
			if($data) {
869
				$titleData['title'] = preg_replace('/^Meraki Scans - (.*?)$/', '$1', trim($data['nodes_title']->textContent));
870
				//For whatever reason, DOMDocument breaks the <link> element we need to grab the chapter, so we have to grab it elsewhere.
871
				$chapter = preg_replace('/^.*?(https:\/\/.*)$/', '$1', trim($data['nodes_chapter']->textContent));
872
				$titleData['latest_chapter'] = explode('/', $chapter)[sizeof(explode('/', $chapter))-2];
873
874
				$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime((string) $data['nodes_latest']->textContent));
875
			}
876
		}
877
878
		return (!empty($titleData) ? $titleData : NULL);
879
	}
880
881
	public function doCustomUpdate() {
882
		$titleDataList = [];
883
884
		$baseURLRegex = str_replace('.', '\\.', parse_url($this->baseURL, PHP_URL_HOST));
0 ignored issues
show
Unused Code introduced by
$baseURLRegex is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
885
		if(($content = $this->get_content($this->baseURL)) && $content['status_code'] == 200) {
886
			$data = $content['body'];
887
888
			$dom = new DOMDocument();
889
			libxml_use_internal_errors(TRUE);
890
			$dom->loadHTML($data);
891
			libxml_use_internal_errors(FALSE);
892
893
			$xpath      = new DOMXPath($dom);
894
			$nodes_rows = $xpath->query("//div[@id='mangalistitem']");
895
896
			if($nodes_rows->length > 0) {
897
				foreach($nodes_rows as $row) {
898
					$titleData = [];
899
900
					$nodes_title   = $xpath->query(".//li[@id='manganame']/a", $row);
901
					$nodes_chapter = $xpath->query(".//li[@id='chaptername'][1]/a", $row);
902
					$nodes_latest  = $xpath->query(".//li[@id='chaptername'][1]/label", $row);
903
904
					if($nodes_latest->length === 0) {
905
						$nodes_latest = $xpath->query('text()[last()]', $row);
906
					}
907
908
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
909
						$title   = $nodes_title->item(0);
910
						$chapter = $nodes_chapter->item(0);
911
912
						preg_match('/manga\/(?<url>.*?)\//', $title->getAttribute('href'), $title_url_arr);
913
						$title_url = $title_url_arr['url'];
914
915
						if(!array_key_exists($title_url, $titleDataList)) {
916
							$titleData['title'] = trim($title->getAttribute('title'));
917
918
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
919
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
920
921
							$dateString = trim($nodes_latest->item(0)->textContent);
922
							switch($dateString) {
923
								case 'Today':
924
									$dateString = date("Y-m-d", now());
925
									break;
926
927
								case 'Yesterday':
928
									$dateString = date("Y-m-d", strtotime("-1 days"));
929
									break;
930
931
								default:
932
									//Do nothing
933
									break;
934
							}
935
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
936
937
							$titleDataList[$title_url] = $titleData;
938
						}
939
					} else {
940
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
941
					}
942
				}
943
			} else {
944
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
945
			}
946
		} else {
947
			log_message('error', "{$this->site} - Custom updating failed.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
948
		}
949
950
		return $titleDataList;
951
	}
952
}
953
954
abstract class Base_Roku_Site_Model extends Base_Site_Model {
955
	public $titleFormat   = '/^[a-zA-Z0-9-]+$/';
956
	public $chapterFormat = '/^[0-9\.]+$/';
957
958
	public $customType    = 2;
959
960
	public function getFullTitleURL(string $title_url) : string {
961
		return "{$this->baseURL}/series/{$title_url}";
962
	}
963
	public function getChapterData(string $title_url, string $chapter) : array {
964
		return [
965
			'url'    => "{$this->baseURL}/read/{$title_url}/{$chapter}",
966
			'number' => "c{$chapter}"
967
		];
968
	}
969
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
970
		$titleData = [];
971
		$fullURL = $this->getFullTitleURL($title_url);
972
		$content = $this->get_content($fullURL);
973
		$data = $this->parseTitleDataDOM(
974
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 972 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
975
			$title_url,
976
			"//div[@id='activity']/descendant::div[@class='media'][1]/descendant::div[@class='media-body']/h2/text()",
977
			"//ul[contains(@class, 'media-list')]/li[@class='media'][1]/a",
978
			"div[@class='media-body']/span[@class='text-muted']",
979
			""
980
		);
981
		if($data) {
982
			$titleData['title'] = trim(preg_replace('/ Added on .*$/','', $data['nodes_title']->textContent));
983
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9\.]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
984
985
			$dateString = preg_replace('/^Added (?:on )?/', '',$data['nodes_latest']->textContent);
986
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime($dateString));
987
		}
988
		return (!empty($titleData) ? $titleData : NULL);
989
	}
990
991
992
	public function doCustomUpdate() {
993
		$titleDataList = [];
994
995
		$updateURL = "{$this->baseURL}/latest";
996
		if(($content = $this->get_content($updateURL)) && $content['status_code'] == 200) {
997
			$data = $content['body'];
998
999
			$dom = new DOMDocument();
1000
			libxml_use_internal_errors(TRUE);
1001
			$dom->loadHTML($data);
1002
			libxml_use_internal_errors(FALSE);
1003
1004
			$xpath      = new DOMXPath($dom);
1005
			$nodes_rows = $xpath->query("//div[@class='content-wrapper']/div[@class='row']/div/div");
1006
			if($nodes_rows->length > 0) {
1007
				foreach($nodes_rows as $row) {
1008
					$titleData = [];
1009
1010
					$nodes_title   = $xpath->query("div[@class='caption']/h6/a", $row);
1011
					$nodes_chapter = $xpath->query("div[@class='panel-footer no-padding']/a", $row);
1012
					$nodes_latest  = $xpath->query("div[@class='caption']/text()", $row);
1013
1014
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
1015
						$title = $nodes_title->item(0);
1016
1017
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
1018
						$title_url = $title_url_arr['url'];
1019
1020
						if(!array_key_exists($title_url, $titleDataList)) {
1021
							$titleData['title'] = trim($title->textContent);
1022
1023
							$chapter = $nodes_chapter->item(0);
1024
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
1025
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
1026
1027
							$dateString = trim(str_replace('Added ', '', $nodes_latest->item(0)->textContent));
1028
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
1029
1030
							$titleDataList[$title_url] = $titleData;
1031
						}
1032
					} else {
1033
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1034
					}
1035
				}
1036
			} else {
1037
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1038
			}
1039
		} else {
1040
			log_message('error', "{$this->site} - Custom updating failed.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1041
		}
1042
1043
		return $titleDataList;
1044
	}
1045
}
1046
1047
//CHECK: RSS might be better to use here?
1048
abstract class Base_WP_Manga_Site_Model extends Base_Site_Model {
1049
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
1050
	public $chapterFormat = '/^(?:[0-9]+-[0-9]+\/)?(?:oneshot|(?:chapter-)?[0-9a-zA-Z\.\-_]+)$/';
1051
	//TODO: Get PageSeperator
1052
1053
	public $customType    = 2;
1054
1055
	public $titleStub = 'manga';
1056
1057
	public function getFullTitleURL(string $title_url) : string {
1058
		return "{$this->baseURL}/{$this->titleStub}/{$title_url}/";
1059
	}
1060
1061
	public function getChapterData(string $title_url, string $chapter) : array {
1062
		if(strpos($chapter, '/')) {
1063
			$chapterArr = explode('/', $chapter);
1064
			$chapterN   = "v{$chapterArr[0]}/c".str_replace('chapter_','',$chapterArr[1]);
1065
		} else if (ctype_digit($chapter)) {
1066
			$chapterN = 'c'.str_replace('chapter_','', $chapter);
1067
		}
1068
		return [
1069
			'url'    => $this->getChapterURL($title_url, $chapter),
1070
			'number' => $chapterN ?? $chapter
1071
		];
1072
	}
1073
1074
	public function getChapterURL(string $title_url, string $chapter) : string {
1075
		return $this->getFullTitleURL($title_url).$chapter.'/';
1076
	}
1077
1078
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
1079
		$titleData = [];
1080
1081
		$fullURL = $this->getFullTitleURL($title_url);
1082
		$content = $this->get_content($fullURL);
1083
1084
		$data = $this->parseTitleDataDOM(
1085
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1082 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1086
			$title_url,
1087
			"(//div[@class='post-title'])/h3[1]",
1088
			"(//ul[contains(@class, 'list-chap') or contains(@class, 'version-chap')][1]/li[@class='wp-manga-chapter'])[1]",
1089
			"span[@class='chapter-release-date']/i[1]",
1090
			'a[1]',
1091
			function($data) {
1092
				return strpos($data, 'Whoops, looks like something went wrong.') !== FALSE;
1093
			}
1094
		);
1095
		if($data) {
1096
			$titleData['title'] = trim($data['nodes_title']->textContent);
1097
1098
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
1099
			$needle = array_search($this->titleStub, array_reverse($segments, TRUE), TRUE) + 2;
1100
			$titleData['latest_chapter'] = implode('/', array_slice($segments, $needle));
1101
1102
			$dateString = $data['nodes_latest']->nodeValue;
1103
			$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
1104
		}
1105
1106
		return (!empty($titleData) ? $titleData : NULL);
1107
	}
1108
1109
	public function doCustomUpdate() {
1110
		$titleDataList = [];
1111
1112
		$updateURL = "{$this->baseURL}/page/1/?s&post_type=wp-manga";
1113
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
1114
			$data = $content['body'];
1115
1116
			$data = preg_replace('/^[\s\S]+<!-- container & no-sidebar-->/', '', $data);
1117
			$data = preg_replace('/<div class="ad c-ads custom-code body-bottom-ads">[\s\S]+$/', '', $data);
1118
1119
			$dom = new DOMDocument();
1120
			libxml_use_internal_errors(TRUE);
1121
			$dom->loadHTML($data);
1122
			libxml_use_internal_errors(FALSE);
1123
1124
			$xpath      = new DOMXPath($dom);
1125
			$nodes_rows = $xpath->query("//div[@class='tab-content-wrap']/div/div[@class='row']/div[@class='c-tabs-item__content']/div[@class='col-sm-10 col-md-10']");
1126
			if($nodes_rows->length > 0) {
1127
				foreach($nodes_rows as $row) {
1128
					$titleData = [];
1129
1130
					$nodes_title   = $xpath->query("div[@class='tab-summary']/div[@class='post-title']/h4/a", $row);
1131
					$nodes_chapter = $xpath->query("div[@class='tab-meta']/div[@class='meta-item latest-chap']/span[@class='font-meta chapter']/a", $row);
1132
					$nodes_latest  = $xpath->query("div[@class='tab-meta']/div[@class='meta-item post-on']/span[@class='font-meta']", $row);
1133
1134
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
1135
						$title = $nodes_title->item(0);
1136
1137
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
1138
						$title_url = $title_url_arr['url'];
1139
1140
						if(!array_key_exists($title_url, $titleDataList)) {
1141
							$titleData['title'] = trim($title->textContent);
1142
1143
							$chapter = $nodes_chapter->item(0);
1144
1145
							$segments = explode('/', (string) $chapter->getAttribute('href'));
1146
							$needle = array_search($this->titleStub, array_reverse($segments, TRUE), TRUE) + 2;
1147
							$titleData['latest_chapter'] = implode('/', array_slice($segments, $needle));
1148
1149
							$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime($nodes_latest->item(0)->nodeValue));
1150
1151
							$titleDataList[$title_url] = $titleData;
1152
						}
1153
					} else {
1154
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1155
					}
1156
				}
1157
			} else {
1158
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1159
			}
1160
		} else {
1161
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1162
		}
1163
1164
		return $titleDataList;
1165
	}
1166
}
1167