Completed
Push — master ( 8d22e9...3ee909 )
by Angus
02:52
created

Base_WP_Manga_Site_Model::getChapterData()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 2
dl 0
loc 7
ccs 0
cts 2
cp 0
crap 6
rs 9.4285
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
/**
4
 * Class Tracker_Sites_Model
5
 */
6
class Tracker_Sites_Model extends CI_Model {
7 127
	public function __construct() {
8 127
		parent::__construct();
9 127
	}
10
11
	public function __get($name) {
12
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
13
		//      It's probably a minor speed reduction, but that isn't much of an issue.
14
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
15
		$validClasses = [
16
			'Base_Site_Model',
17
			'Base_FoolSlide_Site_Model',
18
			'Base_myMangaReaderCMS_Site_Model',
19
			'Base_GlossyBright_Site_Model',
20
			'Base_Roku_Site_Model',
21
			'Base_WP_Manga_Site_Model'
22
		];
23
		if(!class_exists($name) || !(in_array(get_parent_class($name), $validClasses))) {
24
			return get_instance()->{$name};
25
		} else {
26
			$this->loadSite($name);
27
			return $this->{$name};
28
		}
29
	}
30
31
	private function loadSite(string $siteName) : void {
32
		$this->{$siteName} = new $siteName();
33
	}
34
}
35
36
abstract class Base_Site_Model extends CI_Model {
37
	public $site          = '';
38
	public $titleFormat   = '//';
39
	public $chapterFormat = '//';
40
	public $hasCloudFlare = FALSE;
41
	public $userAgent     = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36';
42
43
	public $baseURL = '';
44
45
	/**
46
	 * 0: No custom updater.
47
	 * 1: Uses following page.
48
	 * 2: Uses latest releases page.
49
	 */
50
	public $customType = 0;
51
52
	public $canHaveNoChapters = FALSE;
53
54
	public $siteRateLimit = 600;
55
56 16
	public function __construct() {
57 16
		parent::__construct();
58
59 16
		$this->load->database();
60
61 16
		$this->site = get_class($this);
62 16
	}
63
64
	/**
65
	 * Generates URL to the title page of the requested series.
66
	 *
67
	 * NOTE: In some cases, we are required to store more data in the title_string than is needed to generate the URL. (Namely as the title_string is our unique identifier for that series)
68
	 *       When storing additional data, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
69
	 *
70
	 * Example:
71
	 *    return "http://mangafox.me/manga/{$title_url}/";
72
	 *
73
	 * Example (with extra data):
74
	 *    $title_parts = explode(':--:', title_url);
75
	 *    return "https://bato.to/comic/_/comics/-r".$title_parts[0];
76
	 *
77
	 * @param string $title_url
78
	 * @return string
79
	 */
80
	abstract public function getFullTitleURL(string $title_url) : string;
81
82
	/**
83
	 * Generates chapter data from given $title_url and $chapter.
84
	 *
85
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
86
	 *
87
	 * NOTE: In some cases, we are required to store the chapter number, and the segment required to generate the chapter URL separately.
88
	 *       Much like when generating the title URL, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
89
	 *
90
	 * Example:
91
	 *     return [
92
	 *        'url'    => $this->getFullTitleURL($title_url).'/'.$chapter,
93
	 *        'number' => "c{$chapter}"
94
	 *    ];
95
	 *
96
	 * @param string $title_url
97
	 * @param string $chapter
98
	 * @return array [url, number]
99
	 */
100
	abstract public function getChapterData(string $title_url, string $chapter) : array;
101
102
	/**
103
	 * Used to get the latest chapter of given $title_url.
104
	 *
105
	 * This <should> utilize both get_content and parseTitleDataDOM functions when possible, as these can both reduce a lot of the code required to set this up.
106
	 *
107
	 * $titleData params must be set accordingly:
108
	 * * `title` should always be used with html_entity_decode.
109
	 * * `latest_chapter` must match $this->chapterFormat.
110
	 * * `last_updated` should always be in date("Y-m-d H:i:s") format.
111
	 * * `followed` should never be set within via getTitleData, with the exception of via a array_merge with doCustomFollow.
112
	 *
113
	 * $firstGet is set to true when the series is first added to the DB, and is used to follow the series on given site (if possible).
114
	 *
115
	 * @param string $title_url
116
	 * @param bool   $firstGet
117
	 * @return array|null [title,latest_chapter,last_updated,followed?]
118
	 */
119
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
120
121
	public function handleBatchUpdate(string $title_url) : array {
122
		$return = [
123
			'limited'   => FALSE,
124
			'titleData' => NULL
125
		];
126
		if(($rateLimit = $this->_getSiteRateLimit()) <= $this->siteRateLimit) {
127
			$this->_setSiteRateLimit($rateLimit);
128
129
			$return['titleData'] = $this->getTitleData($title_url);
130
		} else {
131
			$return['limited'] = TRUE;
132
		}
133
		return $return;
134
	}
135
136
	/**
137
	 * Validates given $title_url against titleFormat.
138
	 *
139
	 * Failure to match against titleFormat will stop the series from being added to the DB.
140
	 *
141
	 * @param string $title_url
142
	 * @return bool
143
	 */
144 2
	final public function isValidTitleURL(string $title_url) : bool {
145 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
146 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
147 2
		return $success;
148
	}
149
150
	/**
151
	 * Validates given $chapter against chapterFormat.
152
	 *
153
	 * Failure to match against chapterFormat will stop the chapter being updated.
154
	 *
155
	 * @param string $chapter
156
	 * @return bool
157
	 */
158 2
	final public function isValidChapter(string $chapter) : bool {
159 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
160 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
161 2
		return $success;
162
	}
163
164
165
166
	public function stripChapter(string $chapter) : string {
167
		return $chapter;
168
	}
169
170
	/**
171
	 * Used by getTitleData (& similar functions) to get the requested page data.
172
	 *
173
	 * @param string $url
174
	 * @param string $cookie_string
175
	 * @param string $cookiejar_path
176
	 * @param bool   $follow_redirect
177
	 * @param bool   $isPost
178
	 * @param array  $postFields
179
	 *
180
	 * @return array|bool
181
	 */
182
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
183
		$refresh = TRUE; //For sites that have CloudFlare, we want to loop get_content again.
184
		$loops   = 0;
185
		while($refresh && $loops < 2) {
186
			$refresh = FALSE;
187
			$loops++;
188
189
			$ch = curl_init();
190
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
191
			curl_setopt($ch, CURLOPT_ENCODING , "gzip");
192
			//curl_setopt($ch, CURLOPT_VERBOSE, 1);
193
			curl_setopt($ch, CURLOPT_HEADER, 1);
194
195
			if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
196
197
			if($cookies = $this->cache->get("cloudflare_{$this->site}")) {
198
				$cookie_string .= "; {$cookies}";
199
			}
200
201
			if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
202
			if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
203
204
			//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
205
			curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
206
207
			//NOTE: This is required for SSL URLs for now. Without it we tend to get error code 60.
208
			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, TRUE);
209
210
			curl_setopt($ch, CURLOPT_URL, $url);
211
212
			if($isPost) {
213
				curl_setopt($ch,CURLOPT_POST, count($postFields));
214
				curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
215
			}
216
217
			$response = curl_exec($ch);
218
219
			$this->Tracker->admin->incrementRequests();
220
221
			if($response === FALSE) {
222
				log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
223
				//FIXME: We don't always account for FALSE return
224
				return FALSE;
225
			}
226
227
			$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
228
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
229
			$headers     = http_parse_headers(substr($response, 0, $header_size));
230
			$body        = substr($response, $header_size);
231
			curl_close($ch);
232
233
			if($status_code === 503) $refresh = $this->handleCloudFlare($url, $body);
234
		}
235
236
		return [
237
			'headers'     => $headers,
0 ignored issues
show
Bug introduced by
The variable $headers does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
238
			'status_code' => $status_code,
0 ignored issues
show
Bug introduced by
The variable $status_code does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
239
			'body'        => $body
0 ignored issues
show
Bug introduced by
The variable $body does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
240
		];
241
	}
242
243
	final private function handleCloudFlare(string $url, string $body) : bool {
244
		$refresh = FALSE;
245
246
		if((strpos($body, 'DDoS protection by Cloudflare') !== FALSE) || (strpos($body, '<input type="hidden" id="jschl-answer" name="jschl_answer"/>') !== FALSE)) {
247
			//print "Cloudflare detected? Grabbing Cookies.\n";
248
			if(!$this->hasCloudFlare) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
249
				//TODO: Site appears to have enabled CloudFlare, disable it and contact admin.
250
				//      We'll continue to bypass CloudFlare as this may occur in a loop.
251
			}
252
253
			$urlData = [
254
				'url'        => $url,
255
				'user_agent' => $this->userAgent
256
			];
257
			//TODO: shell_exec seems bad since the URLs "could" be user inputted? Better way of doing this?
258
			$result = shell_exec('python '.APPPATH.'../_scripts/get_cloudflare_cookie.py '.escapeshellarg(json_encode($urlData)));
259
			$cookieData = json_decode($result, TRUE);
260
261
			$this->cache->save("cloudflare_{$this->site}", $cookieData['cookies'],  31536000 /* 1 year, or until we renew it */);
262
			log_message('debug', "Saving CloudFlare Cookies for {$this->site}");
263
264
			$refresh = TRUE;
265
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
266
			//Either site doesn't have CloudFlare or we have bypassed it. Either is good!
267
		}
268
		return $refresh;
269
	}
270
271
	/**
272
	 * Used by getTitleData to get the title, latest_chapter & last_updated data from the data returned by get_content.
273
	 *
274
	 * parseTitleDataDOM checks if the data returned by get_content is valid via a few simple checks.
275
	 * * If the request was actually successful, had a valid status code & data wasn't empty. We also do an additional check on an optional $failure_string param, which will throw a failure if it's matched.
276
	 *
277
	 * Data is cleaned by cleanTitleDataDOM prior to being passed to DOMDocument.
278
	 *
279
	 * All $node_* params must be XPath to the requested node, and must only return 1 result. Anything else will throw a failure.
280
	 *
281
	 * @param array        $content
282
	 * @param string       $title_url
283
	 * @param string       $node_title_string
284
	 * @param string       $node_row_string
285
	 * @param string       $node_latest_string
286
	 * @param string       $node_chapter_string
287
	 * @param closure|null $failureCall
288
	 * @param closure|null $noChaptersCall
289
	 * @param closure|null $extraCall
290
	 *
291
	 * @return DOMElement[]|false [nodes_title,nodes_chapter,nodes_latest]
292
	 */
293
	final protected function parseTitleDataDOM(
294
		$content, string $title_url,
295
		string $node_title_string, string $node_row_string,
296
		string $node_latest_string, string $node_chapter_string,
297
		closure $failureCall = NULL, closure $noChaptersCall = NULL, closure $extraCall = NULL) {
298
299
		if(!is_array($content)) {
300
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
301
		} else {
302
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
303
304
			if(!($status_code >= 200 && $status_code < 300)) {
305
				if($status_code === 502) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
306
					// Site is overloaded, no need to log this.
307
				} else {
308
					log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
309
				}
310
			} else if(empty($data)) {
311
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
312
			} else if(!is_null($failureCall) && is_callable($failureCall) && $failureCall($data)) {
313
				log_message('error', "{$this->site} : {$title_url} | Failure call matched");
314
			} else {
315
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
316
317
				$dom = new DOMDocument();
318
				libxml_use_internal_errors(TRUE);
319
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
320
				libxml_use_internal_errors(FALSE);
321
322
				$xpath = new DOMXPath($dom);
323
				$nodes_title = $xpath->query($node_title_string);
324
				$nodes_row   = $xpath->query($node_row_string);
325
				if($nodes_title->length === 1) {
326
					if($nodes_row->length === 1) {
327
						$firstRow      = $nodes_row->item(0);
328
						$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
329
330
						if($node_chapter_string !== '') {
331
							$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
332
						} else {
333
							$nodes_chapter = $nodes_row;
334
						}
335
336
						if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
337
							$returnData = [
338
								'nodes_title'   => $nodes_title->item(0),
339
								'nodes_latest'  => $nodes_latest->item(0),
340
								'nodes_chapter' => $nodes_chapter->item(0)
341
							];
342
343
							if(is_callable($extraCall)) $extraCall($xpath, $returnData);
344
345
							return $returnData;
346
						} else {
347
							log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
348
						}
349
					} elseif($this->canHaveNoChapters && !is_null($noChaptersCall) && is_callable($noChaptersCall)) {
350
						$returnData = [
351
							'nodes_title'   => $nodes_title->item(0)
352
						];
353
354
						$noChaptersCall($data, $xpath, $returnData);
355
356
						if(is_array($returnData)) {
357
							if(is_callable($extraCall) && is_array($returnData)) $extraCall($xpath, $returnData);
358
						} else {
359
							log_message('error', "{$this->site} : {$title_url} | canHaveNoChapters set, but doesn't match possible checks! XPath is probably broken.");
360
						}
361
362
						return $returnData;
363
					} else {
364
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (ROW: {$nodes_row->length})");
365
					}
366
				} else {
367
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length})");
368
				}
369
			}
370
		}
371
372
		return FALSE;
373
	}
374
375
	/**
376
	 * Used by parseTitleDataDOM to clean the data prior to passing it to DOMDocument & DOMXPath.
377
	 * This is mostly done as an (assumed) speed improvement due to the reduced amount of DOM to parse, or simply just making it easier to parse with XPath.
378
	 *
379
	 * @param string $data
380
	 * @return string
381
	 */
382
	public function cleanTitleDataDOM(string $data) : string {
383
		return $data;
384
	}
385
386
	/**
387
	 * Used to follow a series on given site if supported.
388
	 *
389
	 * This is called by getTitleData if $firstGet is true (which occurs when the series is first being added to the DB).
390
	 *
391
	 * Most of the actual following is done by handleCustomFollow.
392
	 *
393
	 * @param string $data
394
	 * @param array  $extra
395
	 * @return array
396
	 */
397
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
398
		$titleData = [];
399
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
400
			if(is_array($content)) {
401
				if(array_key_exists('status_code', $content)) {
402
					$statusCode = $content['status_code'];
403
					if($statusCode === 200) {
404
						$isCallable = is_callable($successCallback);
405
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
406
							$titleData['followed'] = 'Y';
407
408
							log_message('info', "doCustomFollow succeeded for {$id}");
409
						} else {
410
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
411
						}
412
					} else {
413
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
414
					}
415
				} else {
416
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
417
				}
418
			} else {
419
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
420
			}
421
		}, $data, $extra);
422
		return $titleData;
423
	}
424
425
	/**
426
	 * Used by doCustomFollow to handle following series on sites.
427
	 *
428
	 * Uses get_content to get data.
429
	 *
430
	 * $callback must return ($content, $id, closure $successCallback = NULL).
431
	 * * $content is simply just the get_content data.
432
	 * * $id is the dbID. This should be passed by the $extra arr.
433
	 * * $successCallback is an optional success check to make sure the series was properly followed.
434
	 *
435
	 * @param callable $callback
436
	 * @param string   $data
437
	 * @param array    $extra
438
	 */
439
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
0 ignored issues
show
Unused Code introduced by
The parameter $data is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
440
		if($this->customType === 2) {
441
			$content = ['status_code' => 200];
442
			$callback($content, $extra['id']);
443
		}
444
	}
445
446
	/**
447
	 * Used to check the sites following page for new updates (if supported).
448
	 * This should work much like getTitleData, but instead checks the following page.
449
	 *
450
	 * This must return an array containing arrays of each of the chapters data.
451
	 */
452
	public function doCustomUpdate() {}
453
454
	/**
455
	 * Used by the custom updater to check if a chapter looks newer than the current one.
456
	 *
457
	 * This calls doCustomCheckCompare which handles the majority of the checking.
458
	 * NOTE: Depending on the site, you may need to call getChapterData to get the chapter number to be used with this.
459
	 *
460
	 * @param string $oldChapterString
461
	 * @param string $newChapterString
462
	 * @return bool
463
	 */
464
	public function doCustomCheck(?string $oldChapterString, string $newChapterString) : bool {
465
		if(!is_null($oldChapterString)) {
466
			$oldChapterSegments = explode('/', $this->getChapterData('', $oldChapterString)['number']);
467
			$newChapterSegments = explode('/', $this->getChapterData('', $newChapterString)['number']);
468
469
			$status = $this->doCustomCheckCompare($oldChapterSegments, $newChapterSegments);
470
		} else {
471
			$status = TRUE;
472
		}
473
474
		return $status;
475
	}
476
477
	/**
478
	 * Used by doCustomCheck to check if a chapter looks newer than the current one.
479
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
480
	 *
481
	 * To avoid issues with the occasional off case, this will only ever return true if we are 100% sure that the new chapter is newer than the old one.
482
	 *
483
	 * @param array $oldChapterSegments
484
	 * @param array $newChapterSegments
485
	 * @return bool
486
	 */
487 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
488
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
489 12
		$status = FALSE;
490
491
		//Make sure we have a volume element
492 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
493 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
494
495 12
		$oldCount = count($oldChapterSegments);
496 12
		$newCount = count($newChapterSegments);
497 12
		if($newCount === $oldCount) {
498
			//Make sure chapter format looks correct.
499
			//NOTE: We only need to check newCount as we know oldCount is the same count.
500 12
			if($newCount === 2) {
501
				//FIXME: Can we loop this?
502 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
503 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
504
505
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
506 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
507 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
508
509 12
				$oldVolume = floatval($oldVolume);
510 12
				$newVolume = floatval($newVolume);
511
			} else {
512
				$oldVolume = 0;
513
				$newVolume = 0;
514
			}
515 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
516 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
517
518 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
519
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
520
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
521
522 4
				$status = TRUE;
523 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
524
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
525 1
				$status = TRUE;
526 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
527
				//$newVolume is higher, and chapter is higher so no need to check chapter.
528 2
				$status = TRUE;
529 5
			} elseif($newChapter > $oldChapter) {
530
				//$newVolume isn't higher, but chapter is.
531
				$status = TRUE;
532
			}
533
		}
534
535 12
		return $status;
536
	}
537
538
	final private function _getSiteRateLimit() : int {
539
		return (int) ($this->cache->get("{$this->site}_ratelimit") ?: 0);
540
	}
541
	final private function _setSiteRateLimit(?int $rateLimit = NULL) : bool {
542
		//We would just use increment(), but we can't set ttl with it...
543
		$currentRateLimit = $rateLimit ?: $this->_getSiteRateLimit();
544
		return $this->cache->save("{$this->site}_ratelimit", $currentRateLimit + 1,3600);
545
	}
546
}
547
548
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
549
	public $titleFormat   = '/^[a-z0-9_-]+$/';
550
	public $chapterFormat = '/^(?:en(?:-us)?|pt|es)\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
551
	public $customType    = 2;
552
553
	public function getFullTitleURL(string $title_url) : string {
554
		return "{$this->baseURL}/series/{$title_url}";
555
	}
556
557
	public function getChapterData(string $title_url, string $chapter) : array {
558
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
559
		return [
560
			'url'    => $this->getChapterURL($title_url, $chapter),
561
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
562
		];
563
	}
564
	public function getChapterURL(string $title_url, string $chapter) : string {
565
		return "{$this->baseURL}/read/{$title_url}/{$chapter}/";
566
	}
567
568
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
569
		$titleData = [];
570
571
		$jsonURL = $this->getJSONTitleURL($title_url);
572
		if($content = $this->get_content($jsonURL)) {
573
			$json = json_decode($content['body'], TRUE);
574
			if($json && isset($json['chapters']) && count($json['chapters']) > 0) {
575
				$titleData['title'] = trim($json['comic']['name']);
576
577
				//FoolSlide title API doesn't appear to let you sort (yet every other API method which has chapters does, so we need to sort ourselves..
578
				usort($json['chapters'], function($a, $b) {
579
					return floatval("{$b['chapter']['chapter']}.{$b['chapter']['subchapter']}") <=> floatval("{$a['chapter']['chapter']}.{$a['chapter']['subchapter']}");
580
				});
581
				$latestChapter = reset($json['chapters'])['chapter'];
582
583
				$latestChapterString = "{$latestChapter['language']}/{$latestChapter['volume']}/{$latestChapter['chapter']}";
584
				if($latestChapter['subchapter'] !== '0') {
585
					$latestChapterString .= "/{$latestChapter['subchapter']}";
586
				}
587
				$titleData['latest_chapter'] = $latestChapterString;
588
589
				//No need to use date() here since this is already formatted as such.
590
				$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
591
			}
592
		}
593
594
		return (!empty($titleData) ? $titleData : NULL);
595
	}
596
597
	public function doCustomUpdate() {
598
		$titleDataList = [];
599
600
		$jsonURL = $this->getJSONUpdateURL();
601
		if(($content = $this->get_content($jsonURL)) && $content['status_code'] == 200) {
602
			if(($json = json_decode($content['body'], TRUE)) && isset($json['chapters'])) {
603
				//This should fix edge cases where chapters are uploaded in bulk in the wrong order (HelveticaScans does this with Mousou Telepathy).
604
				usort($json['chapters'], function($a, $b) {
605
					$a_date = new DateTime($a['chapter']['updated'] !== '0000-00-00 00:00:00' ? $a['chapter']['updated'] : $a['chapter']['created']);
606
					$b_date = new DateTime($b['chapter']['updated'] !== '0000-00-00 00:00:00' ? $b['chapter']['updated'] : $b['chapter']['created']);
607
					return $b_date <=> $a_date;
608
				});
609
610
				$parsedTitles = [];
611
				foreach($json['chapters'] as $chapterData) {
612
					if(!in_array($chapterData['comic']['stub'], $parsedTitles)) {
613
						$parsedTitles[] = $chapterData['comic']['stub'];
614
615
						$titleData = [];
616
						$titleData['title'] = trim($chapterData['comic']['name']);
617
618
						$latestChapter = $chapterData['chapter'];
619
620
						$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
621
						if($latestChapter['subchapter'] !== '0') {
622
							$latestChapterString .= "/{$latestChapter['subchapter']}";
623
						}
624
						$titleData['latest_chapter'] = $latestChapterString;
625
626
						//No need to use date() here since this is already formatted as such.
627
						$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
628
629
						$titleDataList[$chapterData['comic']['stub']] = $titleData;
630
					} else {
631
						//We already have title data for this title.
632
						continue;
633
					}
634
				}
635
			} else {
636
				log_message('error', "{$this->site} - Custom updating failed (no chapters arg?) for {$this->baseURL}.");
637
			}
638
		} else {
639
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
640
		}
641
642
		return $titleDataList;
643
	}
644
645
	public function getJSONTitleURL(string $title_url) : string {
646
		return "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
647
	}
648
	public function getJSONUpdateURL() : string {
649
		return "{$this->baseURL}/api/reader/chapters/orderby/desc_created/format/json";
650
	}
651
}
652
653
abstract class Base_myMangaReaderCMS_Site_Model extends Base_Site_Model {
654
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
655
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[0-9\.]+)$/';
656
	public $customType    = 2;
657
658
	public function getFullTitleURL(string $title_url) : string {
659
		return "{$this->baseURL}/manga/{$title_url}";
660
	}
661
662
	public function getChapterData(string $title_url, string $chapter) : array {
663
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
664
		return [
665
			'url'    => $this->getChapterURL($title_url, $chapter),
666
			'number' => $chapterN
667
		];
668
	}
669
	public function getChapterURL(string $title_url, string $chapter) : string {
670
		return $this->getFullTitleURL($title_url).'/'.$chapter;
671
	}
672
673
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
674
		$titleData = [];
675
676
		$fullURL = $this->getFullTitleURL($title_url);
677
678
		$content = $this->get_content($fullURL);
679
680
		$data = $this->parseTitleDataDOM(
681
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 678 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
682
			$title_url,
683
			"(//h2[@class='widget-title'])[1]",
684
			"//ul[contains(@class, 'chapters')]/li[not(contains(@class, 'btn'))][1]",
685
			"div[contains(@class, 'action')]/div[@class='date-chapter-title-rtl']",
686
			'h5/a[1] | h3/a[1]',
687
			function($data) {
688
				return strpos($data, 'Whoops, looks like something went wrong.') !== FALSE;
689
			}
690
		);
691
		if($data) {
692
			$titleData['title'] = trim($data['nodes_title']->textContent);
693
694
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
695
			$needle = array_search('manga', array_reverse($segments, TRUE), TRUE) + 2;
696
			$titleData['latest_chapter'] = $segments[$needle];
697
698
			$dateString = $data['nodes_latest']->nodeValue;
699
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
700
		}
701
702
		return (!empty($titleData) ? $titleData : NULL);
703
	}
704
705
	public function doCustomUpdate() {
706
		$titleDataList = [];
707
708
		$updateURL = "{$this->baseURL}/latest-release";
709
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
710
			$data = $content['body'];
711
712
			$data = preg_replace('/^[\s\S]+<dl>/', '<dl>', $data);
713
			$data = preg_replace('/<\/dl>[\s\S]+$/', '</dl>', $data);
714
715
			$dom = new DOMDocument();
716
			libxml_use_internal_errors(TRUE);
717
			$dom->loadHTML($data);
718
			libxml_use_internal_errors(FALSE);
719
720
			$xpath      = new DOMXPath($dom);
721
			$nodes_rows = $xpath->query("//dl/dd | //div[@class='mangalist']/div[@class='manga-item']");
722
			if($nodes_rows->length > 0) {
723
				foreach($nodes_rows as $row) {
724
					$titleData = [];
725
726
					$nodes_title   = $xpath->query("div[@class='events ']/div[@class='events-body']/h3[@class='events-heading']/a | h3/a", $row);
727
					$nodes_chapter = $xpath->query("(div[@class='events '][1]/div[@class='events-body'][1] | div[@class='manga-chapter'][1])/h6[@class='events-subtitle'][1]/a[1]", $row);
728
					$nodes_latest  = $xpath->query("div[@class='time'] | small", $row);
729
730
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
731
						$title = $nodes_title->item(0);
732
733
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
734
						$title_url = $title_url_arr['url'];
735
736
						if(!array_key_exists($title_url, $titleDataList)) {
737
							$titleData['title'] = trim($title->textContent);
738
739
							$chapter = $nodes_chapter->item(0);
740
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
741
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
742
743
							$dateString = str_replace('/', '-', trim($nodes_latest->item(0)->nodeValue)); //NOTE: We replace slashes here as it stops strtotime interpreting the date as US date format.
744
							if($dateString == 'T') {
745
								$dateString = date("Y-m-d",now());
746
							}
747
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString . ' 00:00'));
748
749
							$titleDataList[$title_url] = $titleData;
750
						}
751
					} else {
752
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
753
					}
754
				}
755
			} else {
756
				log_message('error', "{$this->site} | Following list is empty?");
757
			}
758
		} else {
759
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
760
		}
761
762
		return $titleDataList;
763
	}
764
}
765
766
abstract class Base_GlossyBright_Site_Model extends Base_Site_Model {
767
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
768
	public $chapterFormat = '/^[0-9\.]+$/';
769
770
	public $customType    = 2;
771
772
	public function getFullTitleURL(string $title_url) : string {
773
		return "{$this->baseURL}/{$title_url}";
774
	}
775
776
	public function getChapterData(string $title_url, string $chapter) : array {
777
		return [
778
			'url'    => $this->getFullTitleURL($title_url).'/'.$chapter.'/',
779
			'number' => "c{$chapter}"
780
		];
781
	}
782
783
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
784
		$titleData = [];
785
786
		$fullURL = "{$this->baseURL}/manga-rss/{$title_url}";
787
		$content = $this->get_content($fullURL);
788
		$data    = $this->parseTitleDataDOM(
789
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 787 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
790
			$title_url,
791
			'//rss/channel/image/title',
792
			'//rss/channel/item[1]',
793
			'pubdate',
794
			'title',
795
			function($data) {
796
				return strpos($data, '<image>') === FALSE;
797
			}
798
		);
799
		if($data) {
800
			$titleData['title'] = preg_replace('/^Recent chapters of (.*?) manga$/', '$1', trim($data['nodes_title']->textContent));
801
802
			//For whatever reason, DOMDocument breaks the <link> element we need to grab the chapter, so we have to grab it elsewhere.
803
			$titleData['latest_chapter'] = preg_replace('/^.*? - ([0-9\.]+) - .*?$/', '$1', trim($data['nodes_chapter']->textContent));
804
805
			$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime((string) $data['nodes_latest']->textContent));
806
		}
807
808
		return (!empty($titleData) ? $titleData : NULL);
809
	}
810
811
	public function doCustomUpdate() {
812
		$titleDataList = [];
813
814
		$baseURLRegex = str_replace('.', '\\.', parse_url($this->baseURL, PHP_URL_HOST));
815
		if(($content = $this->get_content($this->baseURL)) && $content['status_code'] == 200) {
816
			$data = $content['body'];
817
818
			$dom = new DOMDocument();
819
			libxml_use_internal_errors(TRUE);
820
			$dom->loadHTML($data);
821
			libxml_use_internal_errors(FALSE);
822
823
			$xpath      = new DOMXPath($dom);
824
			$nodes_rows = $xpath->query("//div[@id='wpm_mng_lst']/div | //*[@id='wpm_mng_lst']/li/div");
825
			if($nodes_rows->length > 0) {
826
				foreach($nodes_rows as $row) {
827
					$titleData = [];
828
829
					$nodes_title   = $xpath->query("a[2]", $row);
830
					$nodes_chapter = $xpath->query("a[2]", $row);
831
					$nodes_latest  = $xpath->query("b", $row);
832
833
					if($nodes_latest->length === 0) {
834
						$nodes_latest = $xpath->query('text()[last()]', $row);
835
					}
836
837
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
838
						$title   = $nodes_title->item(0);
839
						$chapter = $nodes_chapter->item(0);
840
841
						preg_match('/'.$baseURLRegex.'\/(?<url>.*?)\//', $title->getAttribute('href'), $title_url_arr);
842
						$title_url = $title_url_arr['url'];
843
844
						if(!array_key_exists($title_url, $titleDataList)) {
845
							$titleData['title'] = trim($title->getAttribute('title'));
846
847
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
848
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
849
850
							$dateString = trim($nodes_latest->item(0)->textContent);
851
							switch($dateString) {
852
								case 'Today':
0 ignored issues
show
Coding Style introduced by
case statements should be defined using a colon.

As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next break.

There is also the option to use a semicolon instead of a colon, this is discouraged because many programmers do not even know it works and the colon is universal between programming languages.

switch ($expr) {
    case "A": { //wrong
        doSomething();
        break;
    }
    case "B"; //wrong
        doSomething();
        break;
    case "C": //right
        doSomething();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
853
									$dateString = date("Y-m-d", now());
854
									break;
855
856
								case 'Yesterday':
857
									$dateString = date("Y-m-d", strtotime("-1 days"));
858
									break;
859
860
								default:
861
									//Do nothing
862
									break;
863
							}
864
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
865
866
							$titleDataList[$title_url] = $titleData;
867
						}
868
					} else {
869
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
870
					}
871
				}
872
			} else {
873
				log_message('error', "{$this->site} | Following list is empty?");
874
			}
875
		} else {
876
			log_message('error', "{$this->site} - Custom updating failed.");
877
		}
878
879
		return $titleDataList;
880
	}
881
}
882
883
abstract class Base_Roku_Site_Model extends Base_Site_Model {
884
	public $titleFormat   = '/^[a-zA-Z0-9-]+$/';
885
	public $chapterFormat = '/^[0-9\.]+$/';
886
887
	public $customType    = 2;
888
889
	public function getFullTitleURL(string $title_url) : string {
890
		return "{$this->baseURL}/series/{$title_url}";
891
	}
892
	public function getChapterData(string $title_url, string $chapter) : array {
893
		return [
894
			'url'    => "{$this->baseURL}/read/{$title_url}/{$chapter}",
895
			'number' => "c{$chapter}"
896
		];
897
	}
898
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
899
		$titleData = [];
900
		$fullURL = $this->getFullTitleURL($title_url);
901
		$content = $this->get_content($fullURL);
902
		$data = $this->parseTitleDataDOM(
903
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 901 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
904
			$title_url,
905
			"//div[@id='activity']/descendant::div[@class='media'][1]/descendant::div[@class='media-body']/h2/text()",
906
			"//ul[contains(@class, 'media-list')]/li[@class='media'][1]/a",
907
			"div[@class='media-body']/span[@class='text-muted']",
908
			""
909
		);
910
		if($data) {
911
			$titleData['title'] = trim(preg_replace('/ Added on .*$/','', $data['nodes_title']->textContent));
912
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9\.]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
913
914
			$dateString = preg_replace('/^Added (?:on )?/', '',$data['nodes_latest']->textContent);
915
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime($dateString));
916
		}
917
		return (!empty($titleData) ? $titleData : NULL);
918
	}
919
920
921
	public function doCustomUpdate() {
922
		$titleDataList = [];
923
924
		$updateURL = "{$this->baseURL}/latest";
925
		if(($content = $this->get_content($updateURL)) && $content['status_code'] == 200) {
926
			$data = $content['body'];
927
928
			$dom = new DOMDocument();
929
			libxml_use_internal_errors(TRUE);
930
			$dom->loadHTML($data);
931
			libxml_use_internal_errors(FALSE);
932
933
			$xpath      = new DOMXPath($dom);
934
			$nodes_rows = $xpath->query("//div[@class='content-wrapper']/div[@class='row']/div/div");
935
			if($nodes_rows->length > 0) {
936
				foreach($nodes_rows as $row) {
937
					$titleData = [];
938
939
					$nodes_title   = $xpath->query("div[@class='caption']/h6/a", $row);
940
					$nodes_chapter = $xpath->query("div[@class='panel-footer no-padding']/a", $row);
941
					$nodes_latest  = $xpath->query("div[@class='caption']/text()", $row);
942
943
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
944
						$title = $nodes_title->item(0);
945
946
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
947
						$title_url = $title_url_arr['url'];
948
949
						if(!array_key_exists($title_url, $titleDataList)) {
950
							$titleData['title'] = trim($title->textContent);
951
952
							$chapter = $nodes_chapter->item(0);
953
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
954
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
955
956
							$dateString = trim(str_replace('Added ', '', $nodes_latest->item(0)->textContent));
957
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
958
959
							$titleDataList[$title_url] = $titleData;
960
						}
961
					} else {
962
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
963
					}
964
				}
965
			} else {
966
				log_message('error', "{$this->site} | Following list is empty?");
967
			}
968
		} else {
969
			log_message('error', "{$this->site} - Custom updating failed.");
970
		}
971
972
		return $titleDataList;
973
	}
974
}
975
976
//CHECK: RSS might be better to use here?
977
abstract class Base_WP_Manga_Site_Model extends Base_Site_Model {
978
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
979
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[0-9\.]+)$/';
980
981
	public $customType    = 2;
982
983
	public function getFullTitleURL(string $title_url) : string {
984
		return "{$this->baseURL}/manga/{$title_url}/";
985
	}
986
987
	public function getChapterData(string $title_url, string $chapter) : array {
988
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
989
		return [
990
			'url'    => $this->getChapterURL($title_url, $chapter),
991
			'number' => $chapterN
992
		];
993
	}
994
995
	public function getChapterURL(string $title_url, string $chapter) : string {
996
		return $this->getFullTitleURL($title_url).$chapter.'/';
997
	}
998
999
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
1000
		$titleData = [];
1001
1002
		$fullURL = $this->getFullTitleURL($title_url);
1003
		$content = $this->get_content($fullURL);
1004
1005
		$data = $this->parseTitleDataDOM(
1006
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1003 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1007
			$title_url,
1008
			"(//div[@class='post-title'])/h3[1]",
1009
			"//ul[contains(@class, 'version-chap')]/li[1]",
1010
			"span[@class='chapter-release-date']/i[1]",
1011
			'a[1]',
1012
			function($data) {
1013
				return strpos($data, 'Whoops, looks like something went wrong.') !== FALSE;
1014
			}
1015
		);
1016
		if($data) {
1017
			$titleData['title'] = trim($data['nodes_title']->textContent);
1018
1019
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
1020
			$needle = array_search('manga', array_reverse($segments, TRUE), TRUE) + 2;
1021
			$titleData['latest_chapter'] = $segments[$needle];
1022
1023
			$dateString = $data['nodes_latest']->nodeValue;
1024
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
1025
		}
1026
1027
		return (!empty($titleData) ? $titleData : NULL);
1028
	}
1029
1030
	public function doCustomUpdate() {
1031
		$titleDataList = [];
1032
1033
		$updateURL = "{$this->baseURL}/page/1/?s&post_type=wp-manga";
1034
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
1035
			$data = $content['body'];
1036
1037
			$data = preg_replace('/^[\s\S]+<!-- container & no-sidebar-->/', '', $data);
1038
			$data = preg_replace('/<div class="ad c-ads custom-code body-bottom-ads">[\s\S]+$/', '', $data);
1039
1040
			$dom = new DOMDocument();
1041
			libxml_use_internal_errors(TRUE);
1042
			$dom->loadHTML($data);
1043
			libxml_use_internal_errors(FALSE);
1044
1045
			$xpath      = new DOMXPath($dom);
1046
			$nodes_rows = $xpath->query("//div[@class='tab-content-wrap']/div/div[@class='row']/div[@class='c-tabs-item__content']/div[@class='col-sm-10 col-md-10']");
1047
			if($nodes_rows->length > 0) {
1048
				foreach($nodes_rows as $row) {
1049
					$titleData = [];
1050
1051
					$nodes_title   = $xpath->query("div[@class='tab-summary']/div[@class='post-title']/h4/a", $row);
1052
					$nodes_chapter = $xpath->query("div[@class='tab-meta']/div[@class='meta-item latest-chap']/span[@class='font-meta chapter']/a", $row);
1053
					$nodes_latest  = $xpath->query("div[@class='tab-meta']/div[@class='meta-item post-on']/span[@class='font-meta']", $row);
1054
1055
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
1056
						$title = $nodes_title->item(0);
1057
1058
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
1059
						$title_url = $title_url_arr['url'];
1060
1061
						if(!array_key_exists($title_url, $titleDataList)) {
1062
							$titleData['title'] = trim($title->textContent);
1063
1064
							$chapter = $nodes_chapter->item(0);
1065
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
1066
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
1067
1068
							$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime($nodes_latest->item(0)->nodeValue));
1069
1070
							$titleDataList[$title_url] = $titleData;
1071
						}
1072
					} else {
1073
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
1074
					}
1075
				}
1076
			} else {
1077
				log_message('error', "{$this->site} | Following list is empty?");
1078
			}
1079
		} else {
1080
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
1081
		}
1082
1083
		return $titleDataList;
1084
	}
1085
}
1086