Completed
Push — master ( 638578...37612c )
by Angus
02:53
created

Base_Site_Model::getChapterPageURL()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
nc 3
nop 2
dl 0
loc 12
ccs 0
cts 8
cp 0
crap 12
rs 9.8666
c 0
b 0
f 0
1
<?php declare(strict_types=1); defined('BASEPATH') OR exit('No direct script access allowed');
2
3
/**
4
 * Class Tracker_Sites_Model
5
 */
6
class Tracker_Sites_Model extends CI_Model {
7 96
	public function __construct() {
8 96
		parent::__construct();
9 96
	}
10
11
	public function __get($name) {
12
		//TODO: Is this a good idea? There wasn't a good consensus on if this is good practice or not..
13
		//      It's probably a minor speed reduction, but that isn't much of an issue.
14
		//      An alternate solution would simply have a function which generates a PHP file with code to load each model. Similar to: https://github.com/shish/shimmie2/blob/834bc740a4eeef751f546979e6400fd089db64f8/core/util.inc.php#L1422
15
		$validClasses = [
16
			'Base_Site_Model',
17
			'Base_FoolSlide_Site_Model',
18
			'Base_myMangaReaderCMS_Site_Model',
19
			'Base_GlossyBright_Site_Model',
20
			'Base_Roku_Site_Model',
21
			'Base_WP_Manga_Site_Model'
22
		];
23
		if(!class_exists($name) || !(in_array(get_parent_class($name), $validClasses))) {
24
			return get_instance()->{$name};
25
		} else {
26
			$this->loadSite($name);
27
			return $this->{$name};
28
		}
29
	}
30
31
	private function loadSite(string $siteName) : void {
32
		$this->{$siteName} = new $siteName();
33
	}
34
}
35
36
abstract class Base_Site_Model extends CI_Model {
37
	public $site          = '';
38
	public $titleFormat   = '//';
39
	public $chapterFormat = '//';
40
	public $pageSeparator = ''; //NOTE: Each site must set this manually.
41
	public $hasCloudFlare = FALSE;
42
	public $userAgent     = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36';
43
44
	public $baseURL = '';
45
46
	/**
47
	 * 0: No custom updater.
48
	 * 1: Uses following page.
49
	 * 2: Uses latest releases page.
50
	 */
51
	public $customType = 0;
52
53
	public $canHaveNoChapters = FALSE;
54
55
	public $siteRateLimit = 600;
56
57 16
	public function __construct() {
58 16
		parent::__construct();
59
60 16
		$this->load->database();
61
62 16
		$this->site = get_class($this);
63 16
	}
64
65
	/**
66
	 * Generates URL to the title page of the requested series.
67
	 *
68
	 * NOTE: In some cases, we are required to store more data in the title_string than is needed to generate the URL. (Namely as the title_string is our unique identifier for that series)
69
	 *       When storing additional data, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
70
	 *
71
	 * Example:
72
	 *    return "http://mangafox.me/manga/{$title_url}/";
73
	 *
74
	 * Example (with extra data):
75
	 *    $title_parts = explode(':--:', title_url);
76
	 *    return "https://bato.to/comic/_/comics/-r".$title_parts[0];
77
	 *
78
	 * @param string $title_url
79
	 * @return string
80
	 */
81
	abstract public function getFullTitleURL(string $title_url) : string;
82
83
	/**
84
	 * Generates chapter data from given $title_url and $chapter.
85
	 *
86
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
87
	 *
88
	 * NOTE: In some cases, we are required to store the chapter number, and the segment required to generate the chapter URL separately.
89
	 *       Much like when generating the title URL, we use ':--:' as a delimiter to separate the data. Make sure to handle this as needed.
90
	 *
91
	 * Example:
92
	 *     return [
93
	 *        'url'    => $this->getFullTitleURL($title_url).'/'.$chapter,
94
	 *        'number' => "c{$chapter}"
95
	 *    ];
96
	 *
97
	 * @param string $title_url
98
	 * @param string $chapter
99
	 * @return array [url, number]
100
	 */
101
	abstract public function getChapterData(string $title_url, string $chapter) : array;
102
103
	/**
104
	 * Generates chapter page URL from given chapterData.
105
	 *
106
	 * Will return NULL if pageSeparator is not set.
107
	 *
108
	 * @param array $chapterData
109
	 * @param int   $page
110
	 *
111
	 * @return null|string
112
	 */
113
	final public function getChapterPageURL(array $chapterData, int $page = 1) : ?string {
114
		$pageURL = NULL;
115
		if($this->pageSeparator !== '') {
116
			$pageSeparator = $this->pageSeparator;
117
			if(substr($chapterData['url'], -1) === $pageSeparator) {
118
				//We don't want double trailing slashes, so fix this when possible.
119
				$pageSeparator = '';
120
			}
121
			$pageURL = $chapterData['url'] . $pageSeparator . $page;
122
		}
123
		return $pageURL;
124
	}
125
126
	/**
127
	 * Used to get the latest chapter of given $title_url.
128
	 *
129
	 * This <should> utilize both get_content and parseTitleDataDOM functions when possible, as these can both reduce a lot of the code required to set this up.
130
	 *
131
	 * $titleData params must be set accordingly:
132
	 * * `title` should always be used with html_entity_decode.
133
	 * * `latest_chapter` must match $this->chapterFormat.
134
	 * * `last_updated` should always be in date("Y-m-d H:i:s") format.
135
	 * * `followed` should never be set within via getTitleData, with the exception of via a array_merge with doCustomFollow.
136
	 *
137
	 * $firstGet is set to true when the series is first added to the DB, and is used to follow the series on given site (if possible).
138
	 *
139
	 * @param string $title_url
140
	 * @param bool   $firstGet
141
	 * @return array|null [title,latest_chapter,last_updated,followed?]
142
	 */
143
	abstract public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array;
144
145
	public function handleBatchUpdate(string $title_url) : array {
146
		$return = [
147
			'limited'   => FALSE,
148
			'titleData' => NULL
149
		];
150
		if(($rateLimit = $this->_getSiteRateLimit()) <= $this->siteRateLimit) {
151
			$this->_setSiteRateLimit($rateLimit);
152
153
			$return['titleData'] = $this->getTitleData($title_url);
154
		} else {
155
			$return['limited'] = TRUE;
156
		}
157
		return $return;
158
	}
159
160
	/**
161
	 * Validates given $title_url against titleFormat.
162
	 *
163
	 * Failure to match against titleFormat will stop the series from being added to the DB.
164
	 *
165
	 * @param string $title_url
166
	 * @return bool
167
	 */
168 2
	final public function isValidTitleURL(string $title_url) : bool {
169 2
		$success = (bool) preg_match($this->titleFormat, $title_url);
170 2
		if(!$success) log_message('error', "Invalid Title URL ({$this->site}): {$title_url}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
171 2
		return $success;
172
	}
173
174
	/**
175
	 * Validates given $chapter against chapterFormat.
176
	 *
177
	 * Failure to match against chapterFormat will stop the chapter being updated.
178
	 *
179
	 * @param string $chapter
180
	 * @return bool
181
	 */
182 2
	final public function isValidChapter(string $chapter) : bool {
183 2
		$success = (bool) preg_match($this->chapterFormat, $chapter);
184 2
		if(!$success) log_message('error', "Invalid Chapter ({$this->site}): {$chapter}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
185 2
		return $success;
186
	}
187
188
189
190
	public function stripChapter(string $chapter) : string {
191
		return $chapter;
192
	}
193
194
	/**
195
	 * Used by getTitleData (& similar functions) to get the requested page data.
196
	 *
197
	 * @param string $url
198
	 * @param string $cookie_string
199
	 * @param string $cookiejar_path
200
	 * @param bool   $follow_redirect
201
	 * @param bool   $isPost
202
	 * @param array  $postFields
203
	 *
204
	 * @return array|bool
205
	 */
206
	final protected function get_content(string $url, string $cookie_string = "", string $cookiejar_path = "", bool $follow_redirect = FALSE, bool $isPost = FALSE, array $postFields = []) {
207
		$refresh = TRUE; //For sites that have CloudFlare, we want to loop get_content again.
208
		$loops   = 0;
209
		while($refresh && $loops < 2) {
210
			$refresh = FALSE;
211
			$loops++;
212
213
			$ch = curl_init();
214
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
215
			curl_setopt($ch, CURLOPT_ENCODING , "gzip");
216
			//curl_setopt($ch, CURLOPT_VERBOSE, 1);
217
			curl_setopt($ch, CURLOPT_HEADER, 1);
218
219
			if($follow_redirect)        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
220
221
			if($cookies = $this->cache->get("cloudflare_{$this->site}")) {
222
				$cookie_string .= "; {$cookies}";
223
			}
224
225
			if(!empty($cookie_string))  curl_setopt($ch, CURLOPT_COOKIE, $cookie_string);
226
			if(!empty($cookiejar_path)) curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar_path);
227
228
			//Some sites check the useragent for stuff, use a pre-defined user-agent to avoid stuff.
229
			curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
230
231
			//NOTE: This is required for SSL URLs for now. Without it we tend to get error code 60.
232
			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, TRUE);
233
234
			curl_setopt($ch, CURLOPT_URL, $url);
235
236
			if($isPost) {
237
				curl_setopt($ch,CURLOPT_POST, count($postFields));
238
				curl_setopt($ch,CURLOPT_POSTFIELDS, http_build_query($postFields));
239
			}
240
241
			$response = curl_exec($ch);
242
243
			$this->Tracker->admin->incrementRequests();
244
245
			if($response === FALSE) {
246
				log_message('error', "curl failed with error: ".curl_errno($ch)." | ".curl_error($ch));
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
247
				//FIXME: We don't always account for FALSE return
248
				return FALSE;
249
			}
250
251
			$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
252
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
253
			$headers     = http_parse_headers(substr($response, 0, $header_size));
254
			$body        = substr($response, $header_size);
255
			curl_close($ch);
256
257
			if($status_code === 503) $refresh = $this->handleCloudFlare($url, $body);
258
		}
259
260
		return [
261
			'headers'     => $headers,
0 ignored issues
show
Bug introduced by
The variable $headers does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
262
			'status_code' => $status_code,
0 ignored issues
show
Bug introduced by
The variable $status_code does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
263
			'body'        => $body
0 ignored issues
show
Bug introduced by
The variable $body does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
264
		];
265
	}
266
267
	final private function handleCloudFlare(string $url, string $body) : bool {
268
		$refresh = FALSE;
269
270
		if((strpos($body, 'DDoS protection by Cloudflare') !== FALSE) || (strpos($body, '<input type="hidden" id="jschl-answer" name="jschl_answer"/>') !== FALSE)) {
271
			//print "Cloudflare detected? Grabbing Cookies.\n";
272
			if(!$this->hasCloudFlare) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
273
				//TODO: Site appears to have enabled CloudFlare, disable it and contact admin.
274
				//      We'll continue to bypass CloudFlare as this may occur in a loop.
275
			}
276
277
			$urlData = [
278
				'url'        => $url,
279
				'user_agent' => $this->userAgent
280
			];
281
			//TODO: shell_exec seems bad since the URLs "could" be user inputted? Better way of doing this?
282
			$result = shell_exec('python '.APPPATH.'../_scripts/get_cloudflare_cookie.py '.escapeshellarg(json_encode($urlData)));
283
			$cookieData = json_decode($result, TRUE);
284
285
			$this->cache->save("cloudflare_{$this->site}", $cookieData['cookies'],  31536000 /* 1 year, or until we renew it */);
286
			log_message('debug', "Saving CloudFlare Cookies for {$this->site}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
287
288
			$refresh = TRUE;
289
		} else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
290
			//Either site doesn't have CloudFlare or we have bypassed it. Either is good!
291
		}
292
		return $refresh;
293
	}
294
295
	/**
296
	 * Used by getTitleData to get the title, latest_chapter & last_updated data from the data returned by get_content.
297
	 *
298
	 * parseTitleDataDOM checks if the data returned by get_content is valid via a few simple checks.
299
	 * * If the request was actually successful, had a valid status code & data wasn't empty. We also do an additional check on an optional $failure_string param, which will throw a failure if it's matched.
300
	 *
301
	 * Data is cleaned by cleanTitleDataDOM prior to being passed to DOMDocument.
302
	 *
303
	 * All $node_* params must be XPath to the requested node, and must only return 1 result. Anything else will throw a failure.
304
	 *
305
	 * @param array        $content
306
	 * @param string       $title_url
307
	 * @param string       $node_title_string
308
	 * @param string       $node_row_string
309
	 * @param string       $node_latest_string
310
	 * @param string       $node_chapter_string
311
	 * @param closure|null $failureCall
312
	 * @param closure|null $noChaptersCall
313
	 * @param closure|null $extraCall
314
	 *
315
	 * @return DOMElement[]|false [nodes_title,nodes_chapter,nodes_latest]
316
	 */
317
	final protected function parseTitleDataDOM(
318
		$content, string $title_url,
319
		string $node_title_string, string $node_row_string,
320
		string $node_latest_string, string $node_chapter_string,
321
		closure $failureCall = NULL, closure $noChaptersCall = NULL, closure $extraCall = NULL) {
322
323
		if(!is_array($content)) {
324
			log_message('error', "{$this->site} : {$title_url} | Failed to grab URL (See above curl error)");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
325
		} else {
326
			list('headers' => $headers, 'status_code' => $status_code, 'body' => $data) = $content;
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
327
328
			if(!($status_code >= 200 && $status_code < 300)) {
329
				if($status_code === 502) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
330
					// Site is overloaded, no need to log this.
331
				} else {
332
					log_message('error', "{$this->site} : {$title_url} | Bad Status Code ({$status_code})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
333
				}
334
			} else if(empty($data)) {
335
				log_message('error', "{$this->site} : {$title_url} | Data is empty? (Status code: {$status_code})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
336
			} else if(!is_null($failureCall) && is_callable($failureCall) && $failureCall($data)) {
337
				log_message('error', "{$this->site} : {$title_url} | Failure call matched");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
338
			} else {
339
				$data = $this->cleanTitleDataDOM($data); //This allows us to clean the DOM prior to parsing. It's faster to grab the only part we need THEN parse it.
340
341
				$dom = new DOMDocument();
342
				libxml_use_internal_errors(TRUE);
343
				$dom->loadHTML('<?xml encoding="utf-8" ?>' . $data);
344
				libxml_use_internal_errors(FALSE);
345
346
				$xpath = new DOMXPath($dom);
347
				$nodes_title = $xpath->query($node_title_string);
348
				$nodes_row   = $xpath->query($node_row_string);
349
				if($nodes_title->length === 1) {
350
					if($nodes_row->length === 1) {
351
						$firstRow      = $nodes_row->item(0);
352
						$nodes_latest  = $xpath->query($node_latest_string,  $firstRow);
353
354
						if($node_chapter_string !== '') {
355
							$nodes_chapter = $xpath->query($node_chapter_string, $firstRow);
356
						} else {
357
							$nodes_chapter = $nodes_row;
358
						}
359
360
						if($nodes_latest->length === 1 && $nodes_chapter->length === 1) {
361
							$returnData = [
362
								'nodes_title'   => $nodes_title->item(0),
363
								'nodes_latest'  => $nodes_latest->item(0),
364
								'nodes_chapter' => $nodes_chapter->item(0)
365
							];
366
367
							if(is_callable($extraCall)) $extraCall($xpath, $returnData);
368
369
							return $returnData;
370
						} else {
371
							log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (LATEST: {$nodes_latest->length} | CHAPTER: {$nodes_chapter->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
372
						}
373
					} elseif($this->canHaveNoChapters && !is_null($noChaptersCall) && is_callable($noChaptersCall)) {
374
						$returnData = [
375
							'nodes_title'   => $nodes_title->item(0)
376
						];
377
378
						$noChaptersCall($data, $xpath, $returnData);
379
380
						if(is_array($returnData)) {
381
							if(is_callable($extraCall) && is_array($returnData)) $extraCall($xpath, $returnData);
382
						} else {
383
							log_message('error', "{$this->site} : {$title_url} | canHaveNoChapters set, but doesn't match possible checks! XPath is probably broken.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
384
						}
385
386
						return $returnData;
387
					} else {
388
						log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (ROW: {$nodes_row->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
389
					}
390
				} else {
391
					log_message('error', "{$this->site} : {$title_url} | Invalid amount of nodes (TITLE: {$nodes_title->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
392
				}
393
			}
394
		}
395
396
		return FALSE;
397
	}
398
399
	/**
400
	 * Used by parseTitleDataDOM to clean the data prior to passing it to DOMDocument & DOMXPath.
401
	 * This is mostly done as an (assumed) speed improvement due to the reduced amount of DOM to parse, or simply just making it easier to parse with XPath.
402
	 *
403
	 * @param string $data
404
	 * @return string
405
	 */
406
	public function cleanTitleDataDOM(string $data) : string {
407
		return $data;
408
	}
409
410
	/**
411
	 * Used to follow a series on given site if supported.
412
	 *
413
	 * This is called by getTitleData if $firstGet is true (which occurs when the series is first being added to the DB).
414
	 *
415
	 * Most of the actual following is done by handleCustomFollow.
416
	 *
417
	 * @param string $data
418
	 * @param array  $extra
419
	 * @return array
420
	 */
421
	final public function doCustomFollow(string $data = "", array $extra = []) : array {
422
		$titleData = [];
423
		$this->handleCustomFollow(function($content, $id, closure $successCallback = NULL) use(&$titleData) {
424
			if(is_array($content)) {
425
				if(array_key_exists('status_code', $content)) {
426
					$statusCode = $content['status_code'];
427
					if($statusCode === 200) {
428
						$isCallable = is_callable($successCallback);
429
						if(($isCallable && $successCallback($content['body'])) || !$isCallable) {
430
							$titleData['followed'] = 'Y';
431
432
							log_message('info', "doCustomFollow succeeded for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
433
						} else {
434
							log_message('error', "doCustomFollow failed (Invalid response?) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
435
						}
436
					} else {
437
						log_message('error', "doCustomFollow failed (Invalid status code ({$statusCode})) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
438
					}
439
				} else {
440
					log_message('error', "doCustomFollow failed (Missing status code?) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
441
				}
442
			} else {
443
				log_message('error', "doCustomFollow failed (Failed request) for {$id}");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
444
			}
445
		}, $data, $extra);
446
		return $titleData;
447
	}
448
449
	/**
450
	 * Used by doCustomFollow to handle following series on sites.
451
	 *
452
	 * Uses get_content to get data.
453
	 *
454
	 * $callback must return ($content, $id, closure $successCallback = NULL).
455
	 * * $content is simply just the get_content data.
456
	 * * $id is the dbID. This should be passed by the $extra arr.
457
	 * * $successCallback is an optional success check to make sure the series was properly followed.
458
	 *
459
	 * @param callable $callback
460
	 * @param string   $data
461
	 * @param array    $extra
462
	 */
463
	public function handleCustomFollow(callable $callback, string $data = "", array $extra = []) {
0 ignored issues
show
Unused Code introduced by
The parameter $data is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
464
		if($this->customType === 2) {
465
			$content = ['status_code' => 200];
466
			$callback($content, $extra['id']);
467
		}
468
	}
469
470
	/**
471
	 * Used to check the sites following page for new updates (if supported).
472
	 * This should work much like getTitleData, but instead checks the following page.
473
	 *
474
	 * This must return an array containing arrays of each of the chapters data.
475
	 */
476
	public function doCustomUpdate() {}
477
478
	/**
479
	 * Used by the custom updater to check if a chapter looks newer than the current one.
480
	 *
481
	 * This calls doCustomCheckCompare which handles the majority of the checking.
482
	 * NOTE: Depending on the site, you may need to call getChapterData to get the chapter number to be used with this.
483
	 *
484
	 * @param string $oldChapterString
485
	 * @param string $newChapterString
486
	 * @return bool
487
	 */
488
	public function doCustomCheck(?string $oldChapterString, string $newChapterString) : bool {
489
		if(!is_null($oldChapterString)) {
490
			$oldChapterSegments = explode('/', $this->getChapterData('', $oldChapterString)['number']);
491
			$newChapterSegments = explode('/', $this->getChapterData('', $newChapterString)['number']);
492
493
			$status = $this->doCustomCheckCompare($oldChapterSegments, $newChapterSegments);
494
		} else {
495
			$status = TRUE;
496
		}
497
498
		return $status;
499
	}
500
501
	/**
502
	 * Used by doCustomCheck to check if a chapter looks newer than the current one.
503
	 * Chapter must be in a (v[0-9]+/)?c[0-9]+(\..+)? format.
504
	 *
505
	 * To avoid issues with the occasional off case, this will only ever return true if we are 100% sure that the new chapter is newer than the old one.
506
	 *
507
	 * @param array $oldChapterSegments
508
	 * @param array $newChapterSegments
509
	 * @return bool
510
	 */
511 12
	final public function doCustomCheckCompare(array $oldChapterSegments, array $newChapterSegments) : bool {
512
		//NOTE: We only need to check against the new chapter here, as that is what is used for confirming update.
513 12
		$status = FALSE;
514
515
		//Make sure we have a volume element
516 12
		if(count($oldChapterSegments) === 1) array_unshift($oldChapterSegments, 'v0');
517 12
		if(count($newChapterSegments) === 1) array_unshift($newChapterSegments, 'v0');
518
519 12
		$oldCount = count($oldChapterSegments);
520 12
		$newCount = count($newChapterSegments);
521 12
		if($newCount === $oldCount) {
522
			//Make sure chapter format looks correct.
523
			//NOTE: We only need to check newCount as we know oldCount is the same count.
524 12
			if($newCount === 2) {
525
				//FIXME: Can we loop this?
526 12
				$oldVolume = substr(array_shift($oldChapterSegments), 1);
527 12
				$newVolume = substr(array_shift($newChapterSegments), 1);
528
529
				//Forcing volume to 0 as TBD might not be the latest (although it can be, but that is covered by other checks)
530 12
				if(in_array($oldVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $oldVolume = 0;
531 12
				if(in_array($newVolume, ['TBD', 'TBA', 'NA', 'LMT'])) $newVolume = 0;
532
533 12
				$oldVolume = floatval($oldVolume);
534 12
				$newVolume = floatval($newVolume);
535
			} else {
536
				$oldVolume = 0;
537
				$newVolume = 0;
538
			}
539 12
			$oldChapter = floatval(substr(array_shift($oldChapterSegments), 1));
540 12
			$newChapter = floatval(substr(array_shift($newChapterSegments), 1));
541
542 12
			if($newChapter > $oldChapter && ($oldChapter >= 10 && $newChapter >= 10)) {
543
				//$newChapter is higher than $oldChapter AND $oldChapter and $newChapter are both more than 10
544
				//This is intended to cover the /majority/ of valid updates, as we technically shouldn't have to check volumes.
545
546 4
				$status = TRUE;
547 8
			} elseif($newVolume > $oldVolume && ($oldChapter < 10 && $newChapter < 10)) {
548
				//This is pretty much just to match a one-off case where the site doesn't properly increment chapter numbers across volumes, and instead does something like: v1/c1..v1/c5, v2/c1..v1/c5 (and so on).
549 1
				$status = TRUE;
550 7
			} elseif($newVolume > $oldVolume && $newChapter >= $oldChapter) {
551
				//$newVolume is higher, and chapter is higher so no need to check chapter.
552 2
				$status = TRUE;
553 5
			} elseif($newChapter > $oldChapter) {
554
				//$newVolume isn't higher, but chapter is.
555
				$status = TRUE;
556
			}
557
		}
558
559 12
		return $status;
560
	}
561
562
	final private function _getSiteRateLimit() : int {
563
		return (int) ($this->cache->get("{$this->site}_ratelimit") ?: 0);
564
	}
565
	final private function _setSiteRateLimit(?int $rateLimit = NULL) : bool {
566
		//We would just use increment(), but we can't set ttl with it...
567
		$currentRateLimit = $rateLimit ?: $this->_getSiteRateLimit();
568
		return $this->cache->save("{$this->site}_ratelimit", $currentRateLimit + 1,3600);
569
	}
570
}
571
572
abstract class Base_FoolSlide_Site_Model extends Base_Site_Model {
573
	public $titleFormat   = '/^[a-z0-9_-]+$/';
574
	public $chapterFormat = '/^(?:en(?:-us)?|pt|es)\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+(?:\/[0-9]+)?)?)?$/';
575
	public $pageSeparator = 'page/';
576
	public $customType    = 2;
577
578
	public function getFullTitleURL(string $title_url) : string {
579
		return "{$this->baseURL}/series/{$title_url}";
580
	}
581
582
	public function getChapterData(string $title_url, string $chapter) : array {
583
		$chapter_parts = explode('/', $chapter); //returns #LANG#/#VOLUME#/#CHAPTER#/#CHAPTER_EXTRA#(/#PAGE#/)
584
		return [
585
			'url'    => $this->getChapterURL($title_url, $chapter),
586
			'number' => ($chapter_parts[1] !== '0' ? "v{$chapter_parts[1]}/" : '') . "c{$chapter_parts[2]}" . (isset($chapter_parts[3]) ? ".{$chapter_parts[3]}" : '')/*)*/
587
		];
588
	}
589
	public function getChapterURL(string $title_url, string $chapter) : string {
590
		return "{$this->baseURL}/read/{$title_url}/{$chapter}/";
591
	}
592
593
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
594
		$titleData = [];
595
596
		$jsonURL = $this->getJSONTitleURL($title_url);
597
		if($content = $this->get_content($jsonURL)) {
598
			$json = json_decode($content['body'], TRUE);
599
			if($json && isset($json['chapters']) && count($json['chapters']) > 0) {
600
				$titleData['title'] = trim($json['comic']['name']);
601
602
				//FoolSlide title API doesn't appear to let you sort (yet every other API method which has chapters does, so we need to sort ourselves..
603
				usort($json['chapters'], function($a, $b) {
604
					return floatval("{$b['chapter']['chapter']}.{$b['chapter']['subchapter']}") <=> floatval("{$a['chapter']['chapter']}.{$a['chapter']['subchapter']}");
605
				});
606
				$latestChapter = reset($json['chapters'])['chapter'];
607
608
				$latestChapterString = "{$latestChapter['language']}/{$latestChapter['volume']}/{$latestChapter['chapter']}";
609
				if($latestChapter['subchapter'] !== '0') {
610
					$latestChapterString .= "/{$latestChapter['subchapter']}";
611
				}
612
				$titleData['latest_chapter'] = $latestChapterString;
613
614
				//No need to use date() here since this is already formatted as such.
615
				$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
616
			}
617
		}
618
619
		return (!empty($titleData) ? $titleData : NULL);
620
	}
621
622
	public function doCustomUpdate() {
623
		$titleDataList = [];
624
625
		$jsonURL = $this->getJSONUpdateURL();
626
		if(($content = $this->get_content($jsonURL)) && $content['status_code'] == 200) {
627
			if(($json = json_decode($content['body'], TRUE)) && isset($json['chapters'])) {
628
				//This should fix edge cases where chapters are uploaded in bulk in the wrong order (HelveticaScans does this with Mousou Telepathy).
629
				usort($json['chapters'], function($a, $b) {
630
					$a_date = new DateTime($a['chapter']['updated'] !== '0000-00-00 00:00:00' ? $a['chapter']['updated'] : $a['chapter']['created']);
631
					$b_date = new DateTime($b['chapter']['updated'] !== '0000-00-00 00:00:00' ? $b['chapter']['updated'] : $b['chapter']['created']);
632
					return $b_date <=> $a_date;
633
				});
634
635
				$parsedTitles = [];
636
				foreach($json['chapters'] as $chapterData) {
637
					if(!in_array($chapterData['comic']['stub'], $parsedTitles)) {
638
						$parsedTitles[] = $chapterData['comic']['stub'];
639
640
						$titleData = [];
641
						$titleData['title'] = trim($chapterData['comic']['name']);
642
643
						$latestChapter = $chapterData['chapter'];
644
645
						$latestChapterString = "en/{$latestChapter['volume']}/{$latestChapter['chapter']}";
646
						if($latestChapter['subchapter'] !== '0') {
647
							$latestChapterString .= "/{$latestChapter['subchapter']}";
648
						}
649
						$titleData['latest_chapter'] = $latestChapterString;
650
651
						//No need to use date() here since this is already formatted as such.
652
						$titleData['last_updated'] = ($latestChapter['updated'] !== '0000-00-00 00:00:00' ? $latestChapter['updated'] : $latestChapter['created']);
653
654
						$titleDataList[$chapterData['comic']['stub']] = $titleData;
655
					} else {
656
						//We already have title data for this title.
657
						continue;
658
					}
659
				}
660
			} else {
661
				log_message('error', "{$this->site} - Custom updating failed (no chapters arg?) for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
662
			}
663
		} else {
664
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
665
		}
666
667
		return $titleDataList;
668
	}
669
670
	public function getJSONTitleURL(string $title_url) : string {
671
		return "{$this->baseURL}/api/reader/comic/stub/{$title_url}/format/json";
672
	}
673
	public function getJSONUpdateURL() : string {
674
		return "{$this->baseURL}/api/reader/chapters/orderby/desc_created/format/json";
675
	}
676
}
677
678
abstract class Base_myMangaReaderCMS_Site_Model extends Base_Site_Model {
679
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
680
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[a-zA-Z0-9\._-]+)$/';
681
	public $pageSeparator = '/';
682
	public $customType    = 2;
683
684
	public function getFullTitleURL(string $title_url) : string {
685
		return "{$this->baseURL}/manga/{$title_url}";
686
	}
687
688
	public function getChapterData(string $title_url, string $chapter) : array {
689
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
690
		return [
691
			'url'    => $this->getChapterURL($title_url, $chapter),
692
			'number' => $chapterN
693
		];
694
	}
695
	public function getChapterURL(string $title_url, string $chapter) : string {
696
		return $this->getFullTitleURL($title_url).'/'.$chapter;
697
	}
698
699
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
700
		$titleData = [];
701
702
		$fullURL = $this->getFullTitleURL($title_url);
703
704
		$content = $this->get_content($fullURL);
705
706
		$data = $this->parseTitleDataDOM(
707
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 704 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
708
			$title_url,
709
			"(//h2[@class='widget-title'])[1]",
710
			"//ul[contains(@class, 'chapters')]/li[not(contains(@class, 'btn'))][1]",
711
			"div[contains(@class, 'action')]/div[@class='date-chapter-title-rtl']",
712
			'h5/a[1] | h3/a[1]',
713
			function($data) {
714
				return strpos($data, 'Whoops, looks like something went wrong.') !== FALSE;
715
			}
716
		);
717
		if($data) {
718
			$titleData['title'] = trim($data['nodes_title']->textContent);
719
720
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
721
			$needle = array_search('manga', array_reverse($segments, TRUE), TRUE) + 2;
722
			$titleData['latest_chapter'] = $segments[$needle];
723
724
			$dateString = $data['nodes_latest']->nodeValue;
725
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
726
		}
727
728
		return (!empty($titleData) ? $titleData : NULL);
729
	}
730
731
	public function doCustomUpdate() {
732
		$titleDataList = [];
733
734
		$updateURL = "{$this->baseURL}/latest-release";
735
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
736
			$data = $content['body'];
737
738
			$data = preg_replace('/^[\s\S]+<dl>/', '<dl>', $data);
739
			$data = preg_replace('/<\/dl>[\s\S]+$/', '</dl>', $data);
740
741
			$dom = new DOMDocument();
742
			libxml_use_internal_errors(TRUE);
743
			$dom->loadHTML($data);
744
			libxml_use_internal_errors(FALSE);
745
746
			$xpath      = new DOMXPath($dom);
747
			$nodes_rows = $xpath->query("//dl/dd | //div[@class='mangalist']/div[@class='manga-item']");
748
			if($nodes_rows->length > 0) {
749
				foreach($nodes_rows as $row) {
750
					$titleData = [];
751
752
					$nodes_title   = $xpath->query("div[@class='events ']/div[@class='events-body']/h3[@class='events-heading']/a | h3/a", $row);
753
					$nodes_chapter = $xpath->query("(div[@class='events '][1]/div[@class='events-body'][1] | div[@class='manga-chapter'][1])/h6[@class='events-subtitle'][1]/a[1]", $row);
754
					$nodes_latest  = $xpath->query("div[@class='time'] | small", $row);
755
756
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
757
						$title = $nodes_title->item(0);
758
759
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
760
						$title_url = $title_url_arr['url'];
761
762
						if(!array_key_exists($title_url, $titleDataList)) {
763
							$titleData['title'] = trim($title->textContent);
764
765
							$chapter = $nodes_chapter->item(0);
766
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
767
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
768
769
							$dateString = str_replace('/', '-', trim($nodes_latest->item(0)->nodeValue)); //NOTE: We replace slashes here as it stops strtotime interpreting the date as US date format.
770
							if($dateString == 'T') {
771
								$dateString = date("Y-m-d",now());
772
							}
773
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString . ' 00:00'));
774
775
							$titleDataList[$title_url] = $titleData;
776
						}
777
					} else {
778
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
779
					}
780
				}
781
			} else {
782
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
783
			}
784
		} else {
785
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
786
		}
787
788
		return $titleDataList;
789
	}
790
}
791
792
abstract class Base_GlossyBright_Site_Model extends Base_Site_Model {
793
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
794
	public $chapterFormat = '/^[0-9\.]+$/';
795
	public $pageSeparator = '/';
796
797
	public $customType    = 2;
798
799
	public function getFullTitleURL(string $title_url) : string {
800
		return "{$this->baseURL}/{$title_url}";
801
	}
802
803
	public function getChapterData(string $title_url, string $chapter) : array {
804
		return [
805
			'url'    => $this->getFullTitleURL($title_url).'/'.$chapter.'/',
806
			'number' => "c{$chapter}"
807
		];
808
	}
809
810
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
811
		$titleData = [];
812
813
		$fullURL = "{$this->baseURL}/manga-rss/{$title_url}";
814
		$content = $this->get_content($fullURL);
815
		$data    = $this->parseTitleDataDOM(
816
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 814 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
817
			$title_url,
818
			'//rss/channel/image/title',
819
			'//rss/channel/item[1]',
820
			'pubdate',
821
			'title',
822
			function($data) {
823
				return strpos($data, '<image>') === FALSE;
824
			}
825
		);
826
		if($data) {
827
			$titleData['title'] = preg_replace('/^Recent chapters of (.*?) manga$/', '$1', trim($data['nodes_title']->textContent));
828
829
			//For whatever reason, DOMDocument breaks the <link> element we need to grab the chapter, so we have to grab it elsewhere.
830
			$titleData['latest_chapter'] = preg_replace('/^.*? - ([0-9\.]+) - .*?$/', '$1', trim($data['nodes_chapter']->textContent));
831
832
			$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime((string) $data['nodes_latest']->textContent));
833
		}
834
835
		return (!empty($titleData) ? $titleData : NULL);
836
	}
837
838
	public function doCustomUpdate() {
839
		$titleDataList = [];
840
841
		$baseURLRegex = str_replace('.', '\\.', parse_url($this->baseURL, PHP_URL_HOST));
842
		if(($content = $this->get_content($this->baseURL)) && $content['status_code'] == 200) {
843
			$data = $content['body'];
844
845
			$dom = new DOMDocument();
846
			libxml_use_internal_errors(TRUE);
847
			$dom->loadHTML($data);
848
			libxml_use_internal_errors(FALSE);
849
850
			$xpath      = new DOMXPath($dom);
851
			$nodes_rows = $xpath->query("//div[@id='wpm_mng_lst']/div | //*[@id='wpm_mng_lst']/li/div");
852
			if($nodes_rows->length > 0) {
853
				foreach($nodes_rows as $row) {
854
					$titleData = [];
855
856
					$nodes_title   = $xpath->query("a[2]", $row);
857
					$nodes_chapter = $xpath->query("a[2]", $row);
858
					$nodes_latest  = $xpath->query("b", $row);
859
860
					if($nodes_latest->length === 0) {
861
						$nodes_latest = $xpath->query('text()[last()]', $row);
862
					}
863
864
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
865
						$title   = $nodes_title->item(0);
866
						$chapter = $nodes_chapter->item(0);
867
868
						preg_match('/'.$baseURLRegex.'\/(?<url>.*?)\//', $title->getAttribute('href'), $title_url_arr);
869
						$title_url = $title_url_arr['url'];
870
871
						if(!array_key_exists($title_url, $titleDataList)) {
872
							$titleData['title'] = trim($title->getAttribute('title'));
873
874
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
875
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
876
877
							$dateString = trim($nodes_latest->item(0)->textContent);
878
							switch($dateString) {
879
								case 'Today':
0 ignored issues
show
Coding Style introduced by
case statements should be defined using a colon.

As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next break.

There is also the option to use a semicolon instead of a colon, this is discouraged because many programmers do not even know it works and the colon is universal between programming languages.

switch ($expr) {
    case "A": { //wrong
        doSomething();
        break;
    }
    case "B"; //wrong
        doSomething();
        break;
    case "C": //right
        doSomething();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
880
									$dateString = date("Y-m-d", now());
881
									break;
882
883
								case 'Yesterday':
884
									$dateString = date("Y-m-d", strtotime("-1 days"));
885
									break;
886
887
								default:
888
									//Do nothing
889
									break;
890
							}
891
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
892
893
							$titleDataList[$title_url] = $titleData;
894
						}
895
					} else {
896
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
897
					}
898
				}
899
			} else {
900
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
901
			}
902
		} else {
903
			log_message('error', "{$this->site} - Custom updating failed.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
904
		}
905
906
		return $titleDataList;
907
	}
908
}
909
910
abstract class Base_Roku_Site_Model extends Base_Site_Model {
911
	public $titleFormat   = '/^[a-zA-Z0-9-]+$/';
912
	public $chapterFormat = '/^[0-9\.]+$/';
913
914
	public $customType    = 2;
915
916
	public function getFullTitleURL(string $title_url) : string {
917
		return "{$this->baseURL}/series/{$title_url}";
918
	}
919
	public function getChapterData(string $title_url, string $chapter) : array {
920
		return [
921
			'url'    => "{$this->baseURL}/read/{$title_url}/{$chapter}",
922
			'number' => "c{$chapter}"
923
		];
924
	}
925
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
926
		$titleData = [];
927
		$fullURL = $this->getFullTitleURL($title_url);
928
		$content = $this->get_content($fullURL);
929
		$data = $this->parseTitleDataDOM(
930
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 928 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
931
			$title_url,
932
			"//div[@id='activity']/descendant::div[@class='media'][1]/descendant::div[@class='media-body']/h2/text()",
933
			"//ul[contains(@class, 'media-list')]/li[@class='media'][1]/a",
934
			"div[@class='media-body']/span[@class='text-muted']",
935
			""
936
		);
937
		if($data) {
938
			$titleData['title'] = trim(preg_replace('/ Added on .*$/','', $data['nodes_title']->textContent));
939
			$titleData['latest_chapter'] = preg_replace('/^.*\/([0-9\.]+)$/', '$1', (string) $data['nodes_chapter']->getAttribute('href'));
940
941
			$dateString = preg_replace('/^Added (?:on )?/', '',$data['nodes_latest']->textContent);
942
			$titleData['last_updated'] =  date("Y-m-d H:i:s", strtotime($dateString));
943
		}
944
		return (!empty($titleData) ? $titleData : NULL);
945
	}
946
947
948
	public function doCustomUpdate() {
949
		$titleDataList = [];
950
951
		$updateURL = "{$this->baseURL}/latest";
952
		if(($content = $this->get_content($updateURL)) && $content['status_code'] == 200) {
953
			$data = $content['body'];
954
955
			$dom = new DOMDocument();
956
			libxml_use_internal_errors(TRUE);
957
			$dom->loadHTML($data);
958
			libxml_use_internal_errors(FALSE);
959
960
			$xpath      = new DOMXPath($dom);
961
			$nodes_rows = $xpath->query("//div[@class='content-wrapper']/div[@class='row']/div/div");
962
			if($nodes_rows->length > 0) {
963
				foreach($nodes_rows as $row) {
964
					$titleData = [];
965
966
					$nodes_title   = $xpath->query("div[@class='caption']/h6/a", $row);
967
					$nodes_chapter = $xpath->query("div[@class='panel-footer no-padding']/a", $row);
968
					$nodes_latest  = $xpath->query("div[@class='caption']/text()", $row);
969
970
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
971
						$title = $nodes_title->item(0);
972
973
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
974
						$title_url = $title_url_arr['url'];
975
976
						if(!array_key_exists($title_url, $titleDataList)) {
977
							$titleData['title'] = trim($title->textContent);
978
979
							$chapter = $nodes_chapter->item(0);
980
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
981
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
982
983
							$dateString = trim(str_replace('Added ', '', $nodes_latest->item(0)->textContent));
984
							$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime($dateString));
985
986
							$titleDataList[$title_url] = $titleData;
987
						}
988
					} else {
989
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
990
					}
991
				}
992
			} else {
993
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
994
			}
995
		} else {
996
			log_message('error', "{$this->site} - Custom updating failed.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
997
		}
998
999
		return $titleDataList;
1000
	}
1001
}
1002
1003
//CHECK: RSS might be better to use here?
1004
abstract class Base_WP_Manga_Site_Model extends Base_Site_Model {
1005
	public $titleFormat   = '/^[a-zA-Z0-9_-]+$/';
1006
	public $chapterFormat = '/^(?:oneshot|(?:chapter-)?[0-9a-zA-Z\.\-]+)$/';
1007
	//TODO: Get PageSeperator
1008
1009
	public $customType    = 2;
1010
1011
	public function getFullTitleURL(string $title_url) : string {
1012
		return "{$this->baseURL}/manga/{$title_url}/";
1013
	}
1014
1015
	public function getChapterData(string $title_url, string $chapter) : array {
1016
		$chapterN = (ctype_digit($chapter) ? "c${chapter}" : $chapter);
1017
		return [
1018
			'url'    => $this->getChapterURL($title_url, $chapter),
1019
			'number' => $chapterN
1020
		];
1021
	}
1022
1023
	public function getChapterURL(string $title_url, string $chapter) : string {
1024
		return $this->getFullTitleURL($title_url).$chapter.'/';
1025
	}
1026
1027
	public function getTitleData(string $title_url, bool $firstGet = FALSE) : ?array {
1028
		$titleData = [];
1029
1030
		$fullURL = $this->getFullTitleURL($title_url);
1031
		$content = $this->get_content($fullURL);
1032
1033
		$data = $this->parseTitleDataDOM(
1034
			$content,
0 ignored issues
show
Security Bug introduced by
It seems like $content defined by $this->get_content($fullURL) on line 1031 can also be of type false; however, Base_Site_Model::parseTitleDataDOM() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1035
			$title_url,
1036
			"(//div[@class='post-title'])/h3[1]",
1037
			"//ul[contains(@class, 'version-chap')]/li[1]",
1038
			"span[@class='chapter-release-date']/i[1]",
1039
			'a[1]',
1040
			function($data) {
1041
				return strpos($data, 'Whoops, looks like something went wrong.') !== FALSE;
1042
			}
1043
		);
1044
		if($data) {
1045
			$titleData['title'] = trim($data['nodes_title']->textContent);
1046
1047
			$segments = explode('/', (string) $data['nodes_chapter']->getAttribute('href'));
1048
			$needle = array_search('manga', array_reverse($segments, TRUE), TRUE) + 2;
1049
			$titleData['latest_chapter'] = $segments[$needle];
1050
1051
			$dateString = $data['nodes_latest']->nodeValue;
1052
			$titleData['last_updated'] = date("Y-m-d H:i:s", strtotime(preg_replace('/ (-|\[A\]).*$/', '', $dateString)));
1053
		}
1054
1055
		return (!empty($titleData) ? $titleData : NULL);
1056
	}
1057
1058
	public function doCustomUpdate() {
1059
		$titleDataList = [];
1060
1061
		$updateURL = "{$this->baseURL}/page/1/?s&post_type=wp-manga";
1062
		if(($content = $this->get_content($updateURL)) && $content['status_code'] === 200) {
1063
			$data = $content['body'];
1064
1065
			$data = preg_replace('/^[\s\S]+<!-- container & no-sidebar-->/', '', $data);
1066
			$data = preg_replace('/<div class="ad c-ads custom-code body-bottom-ads">[\s\S]+$/', '', $data);
1067
1068
			$dom = new DOMDocument();
1069
			libxml_use_internal_errors(TRUE);
1070
			$dom->loadHTML($data);
1071
			libxml_use_internal_errors(FALSE);
1072
1073
			$xpath      = new DOMXPath($dom);
1074
			$nodes_rows = $xpath->query("//div[@class='tab-content-wrap']/div/div[@class='row']/div[@class='c-tabs-item__content']/div[@class='col-sm-10 col-md-10']");
1075
			if($nodes_rows->length > 0) {
1076
				foreach($nodes_rows as $row) {
1077
					$titleData = [];
1078
1079
					$nodes_title   = $xpath->query("div[@class='tab-summary']/div[@class='post-title']/h4/a", $row);
1080
					$nodes_chapter = $xpath->query("div[@class='tab-meta']/div[@class='meta-item latest-chap']/span[@class='font-meta chapter']/a", $row);
1081
					$nodes_latest  = $xpath->query("div[@class='tab-meta']/div[@class='meta-item post-on']/span[@class='font-meta']", $row);
1082
1083
					if($nodes_title->length === 1 && $nodes_chapter->length === 1 && $nodes_latest->length === 1) {
1084
						$title = $nodes_title->item(0);
1085
1086
						preg_match('/(?<url>[^\/]+(?=\/$|$))/', $title->getAttribute('href'), $title_url_arr);
1087
						$title_url = $title_url_arr['url'];
1088
1089
						if(!array_key_exists($title_url, $titleDataList)) {
1090
							$titleData['title'] = trim($title->textContent);
1091
1092
							$chapter = $nodes_chapter->item(0);
1093
							preg_match('/(?<chapter>[^\/]+(?=\/$|$))/', $chapter->getAttribute('href'), $chapter_arr);
1094
							$titleData['latest_chapter'] = $chapter_arr['chapter'];
1095
1096
							$titleData['last_updated'] = date('Y-m-d H:i:s', strtotime($nodes_latest->item(0)->nodeValue));
1097
1098
							$titleDataList[$title_url] = $titleData;
1099
						}
1100
					} else {
1101
						log_message('error', "{$this->site}/Custom | Invalid amount of nodes (TITLE: {$nodes_title->length} | CHAPTER: {$nodes_chapter->length}) | LATEST: {$nodes_latest->length})");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1102
					}
1103
				}
1104
			} else {
1105
				log_message('error', "{$this->site} | Following list is empty?");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1106
			}
1107
		} else {
1108
			log_message('error', "{$this->site} - Custom updating failed for {$this->baseURL}.");
0 ignored issues
show
Unused Code introduced by
The call to the function log_message() seems unnecessary as the function has no side-effects.
Loading history...
1109
		}
1110
1111
		return $titleDataList;
1112
	}
1113
}
1114