Completed
Push — master ( 80582d...5dc5e4 )
by Angus
03:32
created

Base_Site_Model   F

Complexity

Total Complexity 83

Size/Duplication

Total Lines 540
Duplicated Lines 0 %

Coupling/Cohesion

Components 3
Dependencies 5

Test Coverage

Coverage 20%

Importance

Changes 0
Metric Value
dl 0
loc 540
ccs 37
cts 185
cp 0.2
rs 2
c 0
b 0
f 0
wmc 83
lcom 3
cbo 5

20 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 1
getFullTitleURL() 0 1 ?
getChapterData() 0 1 ?
A getChapterPageURL() 0 12 3
getTitleData() 0 1 ?
A handleBatchUpdate() 0 14 2
A isValidTitleURL() 0 5 2
A isValidChapter() 0 5 2
A stripChapter() 0 3 1
B get_content() 0 60 10
A handleCloudFlare() 0 27 4
D parseTitleDataDOM() 0 85 25
A cleanTitleDataDOM() 0 3 1
B doCustomFollow() 0 27 7
A handleCustomFollow() 0 6 2
A doCustomUpdate() 0 1 1
A doCustomCheck() 0 12 2
C doCustomCheckCompare() 0 50 16
A _getSiteRateLimit() 0 3 2
A _setSiteRateLimit() 0 5 2

How to fix   Complexity   

Complex Class

Complex classes like Base_Site_Model often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Base_Site_Model, and based on these observations, apply Extract Interface, too.

1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
/**
4
 * Class Tracker_Sites_Model
5
 */
6
class Tracker_Sites_Model extends CI_Model {
7 96
	public function __construct() {
8 96
		parent::__construct();
9 96
	}
10
11
	public function __get($name) {
12
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
13
		//      It's probably a minor speed reduction, but that isn't much of an issue.
14
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
15
		$validClasses = [
16
			'Base_Site_Model',
17
			'Base_FoolSlide_Site_Model',
18
			'Base_myMangaReaderCMS_Site_Model',
19
			'Base_GlossyBright_Site_Model',
20
			'Base_Roku_Site_Model',
21
			'Base_WP_Manga_Site_Model'
22
		];
23
		if(!class_exists($name) || !(in_array(get_parent_class($name), $validClasses))) {
24
			return get_instance()->{$name};
25
		} else {
26
			$this->loadSite($name);
27
			return $this->{$name};
28
		}
29
	}
30
31
	private function loadSite(string $siteName) : void {
32
		$this->{$siteName} = new $siteName();
33
	}
34
}
35
36
abstract class Base_Site_Model extends CI_Model {
37
	public $site          = '';
38
	public $titleFormat   = '//';
39
	public $chapterFormat = '//';
40
	public $pageSeparator = ''; //NOTE: Each site must set this manually.
41
	public $hasCloudFlare = FALSE;
42
	public $userAgent     = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36';
43
44
	public $baseURL = '';
45
46
	/**
47
	 * 0: No custom updater.
48
	 * 1: Uses following page.
49
	 * 2: Uses latest releases page.
50
	 */
51
	public $customType = 0;
52
53
	public $canHaveNoChapters = FALSE;
54
55
	public $siteRateLimit = 600;
56
57 16
	public function __construct() {
58 16
		parent::__construct();
59
60 16
		$this->load->database();
61
62 16
		$this->site = get_class($this);
63 16
	}
64
65
	/**
66
	 * Generates URL to the title page of the requested series.
67
	 *
68
	 * NOTE: In some cases, we are required to store more data in the title_string than is needed to generate the URL. (Namely as the title_string is our unique identifier for that series)
69
	 *       When storing additional data, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
70
	 *
71
	 * Example:
72
	 *    return "http://mangafox.me/manga/{$title_url}/";
73
	 *
74
	 * Example (with extra data):
75
	 *    $title_parts = explode(':--:', title_url);
76
	 *    return "https://bato.to/comic/_/comics/-r".$title_parts[0];
77
	 *
78
	 * @param string $title_url
79
	 * @return string
80
	 */
81
	abstract public function getFullTitleURL(string $title_url) : string;
82
83
	/**
84
	 * Generates chapter data from given $title_url and $chapter.
85
	 *
86
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
87
	 *
88
	 * NOTE: In some cases, we are required to store the chapter number, and the segment required to generate the chapter URL separately.
89
	 *       Much like when generating the title URL, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
90
	 *
91
	 * Example:
92
	 *     return [
93
	 *        'url'    => $this->getFullTitleURL($title_url).'/'.$chapter,
94
	 *        'number' => "c{$chapter}"
95
	 *    ];
96
	 *
97
	 * @param string $title_url
98
	 * @param string $chapter
99
	 * @return array [url, number]
100
	 */
101
	abstract public function getChapterData(string $title_url, string $chapter) : array;
102
103
	/**
104
	 * Generates chapter page URL from given chapterData.
105
	 *
106
	 * Will return NULL if pageSeparator is not set.
107
	 *
108
	 * @param array $chapterData
109
	 * @param int   $page
110
	 *
111
	 * @return null|string
112
	 */
113
	final public function getChapterPageURL(array $chapterData, int $page = 1) : ?string {
114
		$pageURL = NULL;
115
		if($this->pageSeparator !== '') {
116
			$pageSeparator = $this->pageSeparator;
117
			if(substr($chapterData['url'], -1) === $pageSeparator) {
118
				//We don't want double trailing slashes, so fix this when possible.
119
				$pageSeparator = '';
120
			}
121
			$pageURL = $chapterData['url'] . $pageSeparator . $page;
122
		}
123
		return $pageURL;
124
	}
125
126
	/**
127
	 * Used to get the latest chapter of given $title_url.
128
	 *
129
	 * This <should> utilize both get_content and parseTitleDataDOM functions when possible, as these can both reduce a lot of the code required to set this up.
130
	 *
131
	 * $titleData params must be set accordingly:
132
	 * * `title` should always be used with html_entity_decode.
133
	 * * `latest_chapter` must match $this->chapterFormat.
134
	 * * `last_updated` should always be in date("Y-m-d H:i:s") format.
135
	 * * `followed` should never be set within via getTitleData, with the exception of via a array_merge with doCustomFollow.
136
	 *
137
	 * $firstGet is set to true when the series is first added to the DB, and is used to follow the series on given site (if possible).
138
	 *
139
	 * @param string $title_url
140
	 * @param bool   $firstGet
141
	 * @return array|null [title,latest_chapter,last_updated,followed?]
142
	 */
143
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
144
145
	public function handleBatchUpdate(string $title_url) : array {
146
		$return = [
147
			'limited'   => FALSE,
148
			'titleData' => NULL
149
		];
150
		if(($rateLimit = $this->_getSiteRateLimit()) <= $this->siteRateLimit) {
151
			$this->_setSiteRateLimit($rateLimit);
152
153
			$return['titleData'] = $this->getTitleData($title_url);
154
		} else {
155
			$return['limited'] = TRUE;
156
		}
157
		return $return;
158
	}
159
160
	/**
161
	 * Validates given $title_url against titleFormat.
162
	 *
163
	 * Failure to match against titleFormat will stop the series from being added to the DB.
164
	 *
165
	 * @param string $title_url
166
	 * @return bool
167
	 */
168 2
	final public function isValidTitleURL(string $title_url) : bool {
169 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
170 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
171 2
		return $success;
172
	}
173
174
	/**
175
	 * Validates given $chapter against chapterFormat.
176
	 *
177
	 * Failure to match against chapterFormat will stop the chapter being updated.
178
	 *
179
	 * @param string $chapter
180
	 * @return bool
181
	 */
182 2
	final public function isValidChapter(string $chapter) : bool {
183 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
184 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
185 2
		return $success;
186
	}
187
188
189
190
	public function stripChapter(string $chapter) : string {
191
		return $chapter;
192
	}
193
194
	/**
195
	 * Used by getTitleData (& similar functions) to get the requested page data.
196
	 *
197
	 * @param string $url
198
	 * @param string $cookie_string
199
	 * @param string $cookiejar_path
200
	 * @param bool   $follow_redirect
201
	 * @param bool   $isPost
202
	 * @param array  $postFields
203
	 *
204
	 * @return array|bool
205
	 */
206
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
207
		$refresh = TRUE; //For sites that have CloudFlare, we want to loop get_content again.
208
		$loops   = 0;
209
		while($refresh && $loops < 2) {
210
			$refresh = FALSE;
211
			$loops++;
212
213
			$ch = curl_init();
214
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
215
			curl_setopt($ch, CURLOPT_ENCODING , "gzip");
216
			//curl_setopt($ch, CURLOPT_VERBOSE, 1);
217
			curl_setopt($ch, CURLOPT_HEADER, 1);
218
219
			if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
220
221
			if($cookies = $this->cache->get("cloudflare_{$this->site}")) {
222
				$cookie_string .= "; {$cookies}";
223
			}
224
225
			if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
226
			if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
227
228
			//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
229
			curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
230
231
			//NOTE: This is required for SSL URLs for now. Without it we tend to get error code 60.
232
			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, TRUE);
233
234
			curl_setopt($ch, CURLOPT_URL, $url);
235
236
			if($isPost) {
237
				curl_setopt($ch,CURLOPT_POST, count($postFields));
238
				curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
239
			}
240
241
			$response = curl_exec($ch);
242
243
			$this->Tracker->admin->incrementRequests();
244
245
			if($response === FALSE) {
246
				log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
247
				//FIXME: We don't always account for FALSE return
248
				return FALSE;
249
			}
250
251
			$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
252
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
253
			$headers     = http_parse_headers(substr($response, 0, $header_size));
254
			$body        = substr($response, $header_size);
255
			curl_close($ch);
256
257
			if($status_code === 503) $refresh = $this->handleCloudFlare($url, $body);
258
		}
259
260
		return [
261
			'headers'     => $headers,
0 ignored issues
show
Bug introduced by
The variable $headers does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
262
			'status_code' => $status_code,
0 ignored issues
show
Bug introduced by
The variable $status_code does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
263
			'body'        => $body
0 ignored issues
show
Bug introduced by
The variable $body does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
264
		];
265
	}
266
267
	final private function handleCloudFlare(string $url, string $body) : bool {
268
		$refresh = FALSE;
269
270
		if((strpos($body, 'DDoS protection by Cloudflare') !== FALSE) || (strpos($body, '<input type="hidden" id="jschl-answer" name="jschl_answer"/>') !== FALSE)) {
271
			//print "Cloudflare detected? Grabbing Cookies.\n";
272
			if(!$this->hasCloudFlare) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
273
				//TODO: Site appears to have enabled CloudFlare, disable it and contact admin.
274
				//      We'll continue to bypass CloudFlare as this may occur in a loop.
275
			}
276
277
			$urlData = [
278
				'url'        => $url,
279
				'user_agent' => $this->userAgent
280
			];
281
			//TODO: shell_exec seems bad since the URLs "could" be user inputted? Better way of doing this?
282
			$result = shell_exec('python '.APPPATH.'../_scripts/get_cloudflare_cookie.py '.escapeshellarg(json_encode($urlData)));
283
			$cookieData = json_decode($result, TRUE);
284
285
			$this->cache->save("cloudflare_{$this->site}", $cookieData['cookies'],  31536000 /* 1 year, or until we renew it */);
286
			log_message('debug', "Saving CloudFlare Cookies for {$this->site}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
287
288
			$refresh = TRUE;
289
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
290
			//Either site doesn't have CloudFlare or we have bypassed it. Either is good!
291
		}
292
		return $refresh;
293
	}
294
295
	/**
296
	 * Used by getTitleData to get the title, latest_chapter & last_updated data from the data returned by get_content.
297
	 *
298
	 * parseTitleDataDOM checks if the data returned by get_content is valid via a few simple checks.
299
	 * * If the request was actually successful, had a valid status code & data wasn't empty. We also do an additional check on an optional $failure_string param, which will throw a failure if it's matched.
300
	 *
301
	 * Data is cleaned by cleanTitleDataDOM prior to being passed to DOMDocument.
302
	 *
303
	 * All $node_* params must be XPath to the requested node, and must only return 1 result. Anything else will throw a failure.
304
	 *
305
	 * @param array        $content
306
	 * @param string       $title_url
307
	 * @param string       $node_title_string
308
	 * @param string       $node_row_string
309
	 * @param string       $node_latest_string
310
	 * @param string       $node_chapter_string
311
	 * @param closure|null $failureCall
312
	 * @param closure|null $noChaptersCall
313
	 * @param closure|null $extraCall
314
	 * @param closure|null $statusCall //FIXME: This is really ugly.
315
	 *
316
	 * @return DOMElement[]|false [nodes_title,nodes_chapter,nodes_latest]
317
	 */
318
	final protected function parseTitleDataDOM(
319
		$content, string $title_url,
320
		string $node_title_string, string $node_row_string,
321
		string $node_latest_string, string $node_chapter_string,
322
		closure $failureCall = NULL, closure $noChaptersCall = NULL, closure $extraCall = NULL, closure $statusCall = NULL) {
323
324
		if(!is_array($content)) {
325
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
326
		} else {
327
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
328
329
			if(!($status_code >= 200 && $status_code < 300)) {
330
				if($status_code === 502) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
331
					// Site is overloaded, no need to log this.
332
				} else if(!is_null($statusCall) && is_callable($statusCall) && $statusReturn = $statusCall($status_code, $data)) {
333
					if(!array_key_exists('ignore', $statusReturn)) {
334
						log_message('error', "{$this->site} : {$title_url} | Failure status call matched");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
335
					}
336
				} else {
337
					log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
338
				}
339
			} else if(empty($data)) {
340
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
341
			} else if(!is_null($failureCall) && is_callable($failureCall) && $failureCall($data)) {
342
				log_message('error', "{$this->site} : {$title_url} | Failure call matched");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
343
			} else {
344
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
345
346
				$dom = new DOMDocument();
347
				libxml_use_internal_errors(TRUE);
348
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
349
				libxml_use_internal_errors(FALSE);
350
351
				$xpath = new DOMXPath($dom);
352
				$nodes_title = $xpath->query($node_title_string);
353
				$nodes_row   = $xpath->query($node_row_string);
354
				if($nodes_title->length === 1) {
355
					if($nodes_row->length === 1) {
356
						$firstRow      = $nodes_row->item(0);
357
						$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
358
359
						if($node_chapter_string !== '') {
360
							$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
361
						} else {
362
							$nodes_chapter = $nodes_row;
363
						}
364
365
						if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
366
							$returnData = [
367
								'nodes_title'   => $nodes_title->item(0),
368
								'nodes_latest'  => $nodes_latest->item(0),
369
								'nodes_chapter' => $nodes_chapter->item(0)
370
							];
371
372
							if(is_callable($extraCall)) $extraCall($xpath, $returnData);
373
374
							return $returnData;
375
						} else {
376
							log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
377
						}
378
					} elseif($this->canHaveNoChapters && !is_null($noChaptersCall) && is_callable($noChaptersCall)) {
379
						$returnData = [
380
							'nodes_title'   => $nodes_title->item(0)
381
						];
382
383
						$noChaptersCall($data, $xpath, $returnData);
384
385
						if(is_array($returnData)) {
386
							if(is_callable($extraCall) && is_array($returnData)) $extraCall($xpath, $returnData);
387
						} else {
388
							log_message('error', "{$this->site} : {$title_url} | canHaveNoChapters set, but doesn't match possible checks! XPath is probably broken.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
389
						}
390
391
						return $returnData;
392
					} else {
393
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (ROW: {$nodes_row->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
394
					}
395
				} else {
396
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
397
				}
398
			}
399
		}
400
401
		return FALSE;
402
	}
403
404
	/**
405
	 * Used by parseTitleDataDOM to clean the data prior to passing it to DOMDocument & DOMXPath.
406
	 * This is mostly done as an (assumed) speed improvement due to the reduced amount of DOM to parse, or simply just making it easier to parse with XPath.
407
	 *
408
	 * @param string $data
409
	 * @return string
410
	 */
411
	public function cleanTitleDataDOM(string $data) : string {
412
		return $data;
413
	}
414
415
	/**
416
	 * Used to follow a series on given site if supported.
417
	 *
418
	 * This is called by getTitleData if $firstGet is true (which occurs when the series is first being added to the DB).
419
	 *
420
	 * Most of the actual following is done by handleCustomFollow.
421
	 *
422
	 * @param string $data
423
	 * @param array  $extra
424
	 * @return array
425
	 */
426
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
427
		$titleData = [];
428
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
429
			if(is_array($content)) {
430
				if(array_key_exists('status_code', $content)) {
431
					$statusCode = $content['status_code'];
432
					if($statusCode === 200) {
433
						$isCallable = is_callable($successCallback);
434
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
435
							$titleData['followed'] = 'Y';
436
437
							log_message('info', "doCustomFollow succeeded for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
438
						} else {
439
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
440
						}
441
					} else {
442
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
443
					}
444
				} else {
445
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
446
				}
447
			} else {
448
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
449
			}
450
		}, $data, $extra);
451
		return $titleData;
452
	}
453
454
	/**
455
	 * Used by doCustomFollow to handle following series on sites.
456
	 *
457
	 * Uses get_content to get data.
458
	 *
459
	 * $callback must return ($content, $id, closure $successCallback = NULL).
460
	 * * $content is simply just the get_content data.
461
	 * * $id is the dbID. This should be passed by the $extra arr.
462
	 * * $successCallback is an optional success check to make sure the series was properly followed.
463
	 *
464
	 * @param callable $callback
465
	 * @param string   $data
466
	 * @param array    $extra
467
	 */
468
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
0 ignored issues
show
Unused Code introduced by
The parameter $data is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
469
		if($this->customType === 2) {
470
			$content = ['status_code' => 200];
471
			$callback($content, $extra['id']);
472
		}
473
	}
474
475
	/**
476
	 * Used to check the sites following page for new updates (if supported).
477
	 * This should work much like getTitleData, but instead checks the following page.
478
	 *
479
	 * This must return an array containing arrays of each of the chapters data.
480
	 */
481
	public function doCustomUpdate() {}
482
483
	/**
484
	 * Used by the custom updater to check if a chapter looks newer than the current one.
485
	 *
486
	 * This calls doCustomCheckCompare which handles the majority of the checking.
487
	 * NOTE: Depending on the site, you may need to call getChapterData to get the chapter number to be used with this.
488
	 *
489
	 * @param string $oldChapterString
490
	 * @param string $newChapterString
491
	 * @return bool
492
	 */
493
	public function doCustomCheck(?string $oldChapterString, string $newChapterString) : bool {
494
		if(!is_null($oldChapterString)) {
495
			$oldChapterSegments = explode('/', $this->getChapterData('', $oldChapterString)['number']);
496
			$newChapterSegments = explode('/', $this->getChapterData('', $newChapterString)['number']);
497
498
			$status = $this->doCustomCheckCompare($oldChapterSegments, $newChapterSegments);
499
		} else {
500
			$status = TRUE;
501
		}
502
503
		return $status;
504
	}
505
506
	/**
507
	 * Used by doCustomCheck to check if a chapter looks newer than the current one.
508
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
509
	 *
510
	 * To avoid issues with the occasional off case, this will only ever return true if we are 100% sure that the new chapter is newer than the old one.
511
	 *
512
	 * @param array $oldChapterSegments
513
	 * @param array $newChapterSegments
514
	 * @return bool
515
	 */
516 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
517
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
518 12
		$status = FALSE;
519
520
		//Make sure we have a volume element
521 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
522 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
523
524 12
		$oldCount = count($oldChapterSegments);
525 12
		$newCount = count($newChapterSegments);
526 12
		if($newCount === $oldCount) {
527
			//Make sure chapter format looks correct.
528
			//NOTE: We only need to check newCount as we know oldCount is the same count.
529 12
			if($newCount === 2) {
530
				//FIXME: Can we loop this?
531 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
532 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
533
534
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
535 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
536 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
537
538 12
				$oldVolume = floatval($oldVolume);
539 12
				$newVolume = floatval($newVolume);
540
			} else {
541
				$oldVolume = 0;
542
				$newVolume = 0;
543
			}
544 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
545 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
546
547 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
548
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
549
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
550
551 4
				$status = TRUE;
552 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
553
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
554 1
				$status = TRUE;
555 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
556
				//$newVolume is higher, and chapter is higher so no need to check chapter.
557 2
				$status = TRUE;
558 5
			} elseif($newChapter > $oldChapter) {
559
				//$newVolume isn't higher, but chapter is.
560
				$status = TRUE;
561
			}
562
		}
563
564 12
		return $status;
565
	}
566
567
	final private function _getSiteRateLimit() : int {
568
		return (int) ($this->cache->get("{$this->site}_ratelimit") ?: 0);
569
	}
570
	final private function _setSiteRateLimit(?int $rateLimit = NULL) : bool {
571
		//We would just use increment(), but we can't set ttl with it...
572
		$currentRateLimit = $rateLimit ?: $this->_getSiteRateLimit();
573
		return $this->cache->save("{$this->site}_ratelimit", $currentRateLimit + 1,3600);
574
	}
575
}
576
577
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
578
	public $titleFormat   = '/^[a-z0-9_-]+$/';
579
	public $chapterFormat = '/^(?:en(?:-us)?|pt|es)\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
580
	public $pageSeparator = 'page/';
581
	public $customType    = 2;
582
583
	public function getFullTitleURL(string $title_url) : string {
584
		return "{$this->baseURL}/series/{$title_url}";
585
	}
586
587
	public function getChapterData(string $title_url, string $chapter) : array {
588
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
589
		return [
590
			'url'    => $this->getChapterURL($title_url, $chapter),
591
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
592
		];
593
	}
594
	public function getChapterURL(string $title_url, string $chapter) : string {
595
		return "{$this->baseURL}/read/{$title_url}/{$chapter}/";
596
	}
597
598
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
599
		$titleData = [];
600
601
		$jsonURL = $this->getJSONTitleURL($title_url);
602
		if($content = $this->get_content($jsonURL)) {
603
			$json = json_decode($content['body'], TRUE);
604
			if($json && isset($json['chapters']) && count($json['chapters']) > 0) {
605
				$titleData['title'] = trim($json['comic']['name']);
606
607
				//FoolSlide title API doesn't appear to let you sort (yet every other API method which has chapters does, so we need to sort ourselves..
608
				usort($json['chapters'], function($a, $b) {
609
					return floatval("{$b['chapter']['chapter']}.{$b['chapter']['subchapter']}") <=> floatval("{$a['chapter']['chapter']}.{$a['chapter']['subchapter']}");
610
				});
611
				$latestChapter = reset($json['chapters'])['chapter'];
612
613
				$latestChapterString = "{$latestChapter['language']}/{$latestChapter['volume']}/{$latestChapter['chapter']}";
614
				if($latestChapter['subchapter'] !== '0') {
615
					$latestChapterString .= "/{$latestChapter['subchapter']}";
616
				}
617
				$titleData['latest_chapter'] = $latestChapterString;
618
619
				//No need to use date() here since this is already formatted as such.
620
				$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
621
			}
622
		}
623
624
		return (!empty($titleData) ? $titleData : NULL);
625
	}
626
627
	public function doCustomUpdate() {
628
		$titleDataList = [];
629
630
		$jsonURL = $this->getJSONUpdateURL();
631
		if(($content = $this->get_content($jsonURL)) && $content['status_code'] == 200) {
632
			if(($json = json_decode($content['body'], TRUE)) && isset($json['chapters'])) {
633
				//This should fix edge cases where chapters are uploaded in bulk in the wrong order (HelveticaScans does this with Mousou Telepathy).
634
				usort($json['chapters'], function($a, $b) {
635
					$a_date = new DateTime($a['chapter']['updated'] !== '0000-00-00 00:00:00' ? $a['chapter']['updated'] : $a['chapter']['created']);
636
					$b_date = new DateTime($b['chapter']['updated'] !== '0000-00-00 00:00:00' ? $b['chapter']['updated'] : $b['chapter']['created']);
637
					return $b_date <=> $a_date;
638
				});
639
640
				$parsedTitles = [];
641
				foreach($json['chapters'] as $chapterData) {
642
					if(!in_array($chapterData['comic']['stub'], $parsedTitles)) {
643
						$parsedTitles[] = $chapterData['comic']['stub'];
644
645
						$titleData = [];
646
						$titleData['title'] = trim($chapterData['comic']['name']);
647
648
						$latestChapter = $chapterData['chapter'];
649
650
						$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
651
						if($latestChapter['subchapter'] !== '0') {
652
							$latestChapterString .= "/{$latestChapter['subchapter']}";
653
						}
654
						$titleData['latest_chapter'] = $latestChapterString;
655
656
						//No need to use date() here since this is already formatted as such.
657
						$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
658
659
						$titleDataList[$chapterData['comic']['stub']] = $titleData;
660
					} else {
661
						//We already have title data for this title.
662
						continue;
663
					}
664
				}
665
			} else {
666
				log_message('error', "{$this->site} - Custom updating failed (no chapters arg?) for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
667
			}
668
		} else {
669
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
670
		}
671
672
		return $titleDataList;
673
	}
674
675
	public function getJSONTitleURL(string $title_url) : string {
676
		return "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
677
	}
678
	public function getJSONUpdateURL() : string {
679
		return "{$this->baseURL}/api/reader/chapters/orderby/desc_created/format/json";
680
	}
681
}
682
683
abstract class Base_myMangaReaderCMS_Site_Model extends Base_Site_Model {
684
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
685
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[a-zA-Z0-9\._-]+)$/';
686
	public $pageSeparator = '/';
687
	public $customType    = 2;
688
689
	public function getFullTitleURL(string $title_url) : string {
690
		return "{$this->baseURL}/manga/{$title_url}";
691
	}
692
693
	public function getChapterData(string $title_url, string $chapter) : array {
694
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
695
		return [
696
			'url'    => $this->getChapterURL($title_url, $chapter),
697
			'number' => $chapterN
698
		];
699
	}
700
	public function getChapterURL(string $title_url, string $chapter) : string {
701
		return $this->getFullTitleURL($title_url).'/'.$chapter;
702
	}
703
704
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
705
		$titleData = [];
706
707
		$fullURL = $this->getFullTitleURL($title_url);
708
709
		$content = $this->get_content($fullURL);
710
711
		$data = $this->parseTitleDataDOM(
712
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 709 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
713
			$title_url,
714
			"(//h2[@class='widget-title'])[1]",
715
			"//ul[contains(@class, 'chapters')]/li[not(contains(@class, 'btn'))][1]",
716
			"div[contains(@class, 'action')]/div[@class='date-chapter-title-rtl']",
717
			'h5/a[1] | h3/a[1]',
718
			NULL,
719
			NULL,
720
			NULL,
721
			function(int $status_code, $data) {
722
				// We want to silently fail here.
723
				$success = ($status_code === 500 && strpos($data, 'Whoops, looks like something went wrong.') !== FALSE);
724
				return ['success' => $success, 'ignore' => TRUE];
725
			}
726
		);
727
		if($data) {
728
			$titleData['title'] = trim($data['nodes_title']->textContent);
729
730
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
731
			$needle = array_search('manga', array_reverse($segments, TRUE), TRUE) + 2;
732
			$titleData['latest_chapter'] = $segments[$needle];
733
734
			$dateString = $data['nodes_latest']->nodeValue;
735
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
736
		}
737
738
		return (!empty($titleData) ? $titleData : NULL);
739
	}
740
741
	public function doCustomUpdate() {
742
		$titleDataList = [];
743
744
		$updateURL = "{$this->baseURL}/latest-release";
745
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
746
			$data = $content['body'];
747
748
			$data = preg_replace('/^[\s\S]+<dl>/', '<dl>', $data);
749
			$data = preg_replace('/<\/dl>[\s\S]+$/', '</dl>', $data);
750
751
			$dom = new DOMDocument();
752
			libxml_use_internal_errors(TRUE);
753
			$dom->loadHTML($data);
754
			libxml_use_internal_errors(FALSE);
755
756
			$xpath      = new DOMXPath($dom);
757
			$nodes_rows = $xpath->query("//dl/dd | //div[@class='mangalist']/div[@class='manga-item']");
758
			if($nodes_rows->length > 0) {
759
				foreach($nodes_rows as $row) {
760
					$titleData = [];
761
762
					$nodes_title   = $xpath->query("div[@class='events ']/div[@class='events-body']/h3[@class='events-heading']/a | h3/a", $row);
763
					$nodes_chapter = $xpath->query("(div[@class='events '][1]/div[@class='events-body'][1] | div[@class='manga-chapter'][1])/h6[@class='events-subtitle'][1]/a[1]", $row);
764
					$nodes_latest  = $xpath->query("div[@class='time'] | small", $row);
765
766
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
767
						$title = $nodes_title->item(0);
768
769
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
770
						$title_url = $title_url_arr['url'];
771
772
						if(!array_key_exists($title_url, $titleDataList)) {
773
							$titleData['title'] = trim($title->textContent);
774
775
							$chapter = $nodes_chapter->item(0);
776
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
777
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
778
779
							$dateString = str_replace('/', '-', trim($nodes_latest->item(0)->nodeValue)); //NOTE: We replace slashes here as it stops strtotime interpreting the date as US date format.
780
							if($dateString == 'T') {
781
								$dateString = date("Y-m-d",now());
782
							}
783
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString . ' 00:00'));
784
785
							$titleDataList[$title_url] = $titleData;
786
						}
787
					} else {
788
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
789
					}
790
				}
791
			} else {
792
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
793
			}
794
		} else {
795
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
796
		}
797
798
		return $titleDataList;
799
	}
800
}
801
802
abstract class Base_GlossyBright_Site_Model extends Base_Site_Model {
803
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
804
	public $chapterFormat = '/^[0-9\.]+$/';
805
	public $pageSeparator = '/';
806
807
	public $customType    = 2;
808
809
	public function getFullTitleURL(string $title_url) : string {
810
		return "{$this->baseURL}/{$title_url}";
811
	}
812
813
	public function getChapterData(string $title_url, string $chapter) : array {
814
		return [
815
			'url'    => $this->getFullTitleURL($title_url).'/'.$chapter.'/',
816
			'number' => "c{$chapter}"
817
		];
818
	}
819
820
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
821
		$titleData = [];
822
823
		$fullURL = "{$this->baseURL}/manga-rss/{$title_url}";
824
		$content = $this->get_content($fullURL);
825
		$data    = $this->parseTitleDataDOM(
826
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 824 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
827
			$title_url,
828
			'//rss/channel/image/title',
829
			'//rss/channel/item[1]',
830
			'pubdate',
831
			'title',
832
			function($data) {
833
				return strpos($data, '<image>') === FALSE;
834
			}
835
		);
836
		if($data) {
837
			$titleData['title'] = preg_replace('/^Recent chapters of (.*?) manga$/', '$1', trim($data['nodes_title']->textContent));
838
839
			//For whatever reason, DOMDocument breaks the <link> element we need to grab the chapter, so we have to grab it elsewhere.
840
			$titleData['latest_chapter'] = preg_replace('/^.*? - ([0-9\.]+) - .*?$/', '$1', trim($data['nodes_chapter']->textContent));
841
842
			$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime((string) $data['nodes_latest']->textContent));
843
		}
844
845
		return (!empty($titleData) ? $titleData : NULL);
846
	}
847
848
	public function doCustomUpdate() {
849
		$titleDataList = [];
850
851
		$baseURLRegex = str_replace('.', '\\.', parse_url($this->baseURL, PHP_URL_HOST));
852
		if(($content = $this->get_content($this->baseURL)) && $content['status_code'] == 200) {
853
			$data = $content['body'];
854
855
			$dom = new DOMDocument();
856
			libxml_use_internal_errors(TRUE);
857
			$dom->loadHTML($data);
858
			libxml_use_internal_errors(FALSE);
859
860
			$xpath      = new DOMXPath($dom);
861
			$nodes_rows = $xpath->query("//div[@id='wpm_mng_lst']/div | //*[@id='wpm_mng_lst']/li/div");
862
			if($nodes_rows->length > 0) {
863
				foreach($nodes_rows as $row) {
864
					$titleData = [];
865
866
					$nodes_title   = $xpath->query("a[2]", $row);
867
					$nodes_chapter = $xpath->query("a[2]", $row);
868
					$nodes_latest  = $xpath->query("b", $row);
869
870
					if($nodes_latest->length === 0) {
871
						$nodes_latest = $xpath->query('text()[last()]', $row);
872
					}
873
874
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
875
						$title   = $nodes_title->item(0);
876
						$chapter = $nodes_chapter->item(0);
877
878
						preg_match('/'.$baseURLRegex.'\/(?<url>.*?)\//', $title->getAttribute('href'), $title_url_arr);
879
						$title_url = $title_url_arr['url'];
880
881
						if(!array_key_exists($title_url, $titleDataList)) {
882
							$titleData['title'] = trim($title->getAttribute('title'));
883
884
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
885
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
886
887
							$dateString = trim($nodes_latest->item(0)->textContent);
888
							switch($dateString) {
889
								case 'Today':
0 ignored issues
show
Coding Style introduced by
case statements should be defined using a colon.

As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next break.

There is also the option to use a semicolon instead of a colon, this is discouraged because many programmers do not even know it works and the colon is universal between programming languages.

switch ($expr) {
    case "A": { //wrong
        doSomething();
        break;
    }
    case "B"; //wrong
        doSomething();
        break;
    case "C": //right
        doSomething();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
890
									$dateString = date("Y-m-d", now());
891
									break;
892
893
								case 'Yesterday':
894
									$dateString = date("Y-m-d", strtotime("-1 days"));
895
									break;
896
897
								default:
898
									//Do nothing
899
									break;
900
							}
901
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
902
903
							$titleDataList[$title_url] = $titleData;
904
						}
905
					} else {
906
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
907
					}
908
				}
909
			} else {
910
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
911
			}
912
		} else {
913
			log_message('error', "{$this->site} - Custom updating failed.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
914
		}
915
916
		return $titleDataList;
917
	}
918
}
919
920
abstract class Base_Roku_Site_Model extends Base_Site_Model {
921
	public $titleFormat   = '/^[a-zA-Z0-9-]+$/';
922
	public $chapterFormat = '/^[0-9\.]+$/';
923
924
	public $customType    = 2;
925
926
	public function getFullTitleURL(string $title_url) : string {
927
		return "{$this->baseURL}/series/{$title_url}";
928
	}
929
	public function getChapterData(string $title_url, string $chapter) : array {
930
		return [
931
			'url'    => "{$this->baseURL}/read/{$title_url}/{$chapter}",
932
			'number' => "c{$chapter}"
933
		];
934
	}
935
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
936
		$titleData = [];
937
		$fullURL = $this->getFullTitleURL($title_url);
938
		$content = $this->get_content($fullURL);
939
		$data = $this->parseTitleDataDOM(
940
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 938 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
941
			$title_url,
942
			"//div[@id='activity']/descendant::div[@class='media'][1]/descendant::div[@class='media-body']/h2/text()",
943
			"//ul[contains(@class, 'media-list')]/li[@class='media'][1]/a",
944
			"div[@class='media-body']/span[@class='text-muted']",
945
			""
946
		);
947
		if($data) {
948
			$titleData['title'] = trim(preg_replace('/ Added on .*$/','', $data['nodes_title']->textContent));
949
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9\.]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
950
951
			$dateString = preg_replace('/^Added (?:on )?/', '',$data['nodes_latest']->textContent);
952
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime($dateString));
953
		}
954
		return (!empty($titleData) ? $titleData : NULL);
955
	}
956
957
958
	public function doCustomUpdate() {
959
		$titleDataList = [];
960
961
		$updateURL = "{$this->baseURL}/latest";
962
		if(($content = $this->get_content($updateURL)) && $content['status_code'] == 200) {
963
			$data = $content['body'];
964
965
			$dom = new DOMDocument();
966
			libxml_use_internal_errors(TRUE);
967
			$dom->loadHTML($data);
968
			libxml_use_internal_errors(FALSE);
969
970
			$xpath      = new DOMXPath($dom);
971
			$nodes_rows = $xpath->query("//div[@class='content-wrapper']/div[@class='row']/div/div");
972
			if($nodes_rows->length > 0) {
973
				foreach($nodes_rows as $row) {
974
					$titleData = [];
975
976
					$nodes_title   = $xpath->query("div[@class='caption']/h6/a", $row);
977
					$nodes_chapter = $xpath->query("div[@class='panel-footer no-padding']/a", $row);
978
					$nodes_latest  = $xpath->query("div[@class='caption']/text()", $row);
979
980
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
981
						$title = $nodes_title->item(0);
982
983
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
984
						$title_url = $title_url_arr['url'];
985
986
						if(!array_key_exists($title_url, $titleDataList)) {
987
							$titleData['title'] = trim($title->textContent);
988
989
							$chapter = $nodes_chapter->item(0);
990
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
991
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
992
993
							$dateString = trim(str_replace('Added ', '', $nodes_latest->item(0)->textContent));
994
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
995
996
							$titleDataList[$title_url] = $titleData;
997
						}
998
					} else {
999
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1000
					}
1001
				}
1002
			} else {
1003
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1004
			}
1005
		} else {
1006
			log_message('error', "{$this->site} - Custom updating failed.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1007
		}
1008
1009
		return $titleDataList;
1010
	}
1011
}
1012
1013
//CHECK: RSS might be better to use here?
1014
abstract class Base_WP_Manga_Site_Model extends Base_Site_Model {
1015
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
1016
	public $chapterFormat = '/^(?:[0-9]+-[0-9]+\/)?(?:oneshot|(?:chapter-)?[0-9a-zA-Z\.\-]+)$/';
1017
	//TODO: Get PageSeperator
1018
1019
	public $customType    = 2;
1020
1021
	public $titleStub = 'manga';
1022
1023
	public function getFullTitleURL(string $title_url) : string {
1024
		return "{$this->baseURL}/{$this->titleStub}/{$title_url}/";
1025
	}
1026
1027
	public function getChapterData(string $title_url, string $chapter) : array {
1028
		if(strpos($chapter, '/')) {
1029
			$chapterArr = explode('/', $chapter);
1030
			$chapterN   = "v{$chapterArr[0]}/c".str_replace('chapter_','',$chapterArr[1]);
1031
		} else if (ctype_digit($chapter)) {
1032
			$chapterN = 'c'.str_replace('chapter_','', $chapter);
1033
		}
1034
		return [
1035
			'url'    => $this->getChapterURL($title_url, $chapter),
1036
			'number' => $chapterN ?? $chapter
1037
		];
1038
	}
1039
1040
	public function getChapterURL(string $title_url, string $chapter) : string {
1041
		return $this->getFullTitleURL($title_url).$chapter.'/';
1042
	}
1043
1044
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
1045
		$titleData = [];
1046
1047
		$fullURL = $this->getFullTitleURL($title_url);
1048
		$content = $this->get_content($fullURL);
1049
1050
		$data = $this->parseTitleDataDOM(
1051
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1048 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1052
			$title_url,
1053
			"(//div[@class='post-title'])/h3[1]",
1054
			"(//ul[contains(@class, 'list-chap') or contains(@class, 'version-chap')][1]/li[@class='wp-manga-chapter'])[1]",
1055
			"span[@class='chapter-release-date']/i[1]",
1056
			'a[1]',
1057
			function($data) {
1058
				return strpos($data, 'Whoops, looks like something went wrong.') !== FALSE;
1059
			}
1060
		);
1061
		if($data) {
1062
			$titleData['title'] = trim($data['nodes_title']->textContent);
1063
1064
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
1065
			$needle = array_search($this->titleStub, array_reverse($segments, TRUE), TRUE) + 2;
1066
			$titleData['latest_chapter'] = implode('/', array_slice($segments, $needle));
1067
1068
			$dateString = $data['nodes_latest']->nodeValue;
1069
			$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
1070
		}
1071
1072
		return (!empty($titleData) ? $titleData : NULL);
1073
	}
1074
1075
	public function doCustomUpdate() {
1076
		$titleDataList = [];
1077
1078
		$updateURL = "{$this->baseURL}/page/1/?s&post_type=wp-manga";
1079
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
1080
			$data = $content['body'];
1081
1082
			$data = preg_replace('/^[\s\S]+<!-- container & no-sidebar-->/', '', $data);
1083
			$data = preg_replace('/<div class="ad c-ads custom-code body-bottom-ads">[\s\S]+$/', '', $data);
1084
1085
			$dom = new DOMDocument();
1086
			libxml_use_internal_errors(TRUE);
1087
			$dom->loadHTML($data);
1088
			libxml_use_internal_errors(FALSE);
1089
1090
			$xpath      = new DOMXPath($dom);
1091
			$nodes_rows = $xpath->query("//div[@class='tab-content-wrap']/div/div[@class='row']/div[@class='c-tabs-item__content']/div[@class='col-sm-10 col-md-10']");
1092
			if($nodes_rows->length > 0) {
1093
				foreach($nodes_rows as $row) {
1094
					$titleData = [];
1095
1096
					$nodes_title   = $xpath->query("div[@class='tab-summary']/div[@class='post-title']/h4/a", $row);
1097
					$nodes_chapter = $xpath->query("div[@class='tab-meta']/div[@class='meta-item latest-chap']/span[@class='font-meta chapter']/a", $row);
1098
					$nodes_latest  = $xpath->query("div[@class='tab-meta']/div[@class='meta-item post-on']/span[@class='font-meta']", $row);
1099
1100
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
1101
						$title = $nodes_title->item(0);
1102
1103
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
1104
						$title_url = $title_url_arr['url'];
1105
1106
						if(!array_key_exists($title_url, $titleDataList)) {
1107
							$titleData['title'] = trim($title->textContent);
1108
1109
							$chapter = $nodes_chapter->item(0);
1110
1111
							$segments = explode('/', (string) $chapter->getAttribute('href'));
1112
							$needle = array_search($this->titleStub, array_reverse($segments, TRUE), TRUE) + 2;
1113
							$titleData['latest_chapter'] = implode('/', array_slice($segments, $needle));
1114
1115
							$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime($nodes_latest->item(0)->nodeValue));
1116
1117
							$titleDataList[$title_url] = $titleData;
1118
						}
1119
					} else {
1120
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1121
					}
1122
				}
1123
			} else {
1124
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1125
			}
1126
		} else {
1127
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1128
		}
1129
1130
		return $titleDataList;
1131
	}
1132
}
1133