1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace phpMyFAQ; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* The Linkverifier class provides methods and functions for verifying URLs. |
7
|
|
|
* |
8
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public License, |
9
|
|
|
* v. 2.0. If a copy of the MPL was not distributed with this file, You can |
10
|
|
|
* obtain one at http://mozilla.org/MPL/2.0/. |
11
|
|
|
* |
12
|
|
|
* The Initial Developer of the Original Code is released for external use |
13
|
|
|
* with permission from NetJapan, Inc. IT Administration Group. |
14
|
|
|
* |
15
|
|
|
* @package phpMyFAQ |
16
|
|
|
* @author Minoru TODA <[email protected]> |
17
|
|
|
* @author Matteo Scaramuccia <[email protected]> |
18
|
|
|
* @author Thorsten Rinne <[email protected]> |
19
|
|
|
* @copyright 2005-2019 NetJapan, Inc. and phpMyFAQ Team |
20
|
|
|
* @license http://www.mozilla.org/MPL/2.0/ Mozilla Public License Version 2.0 |
21
|
|
|
* @link https://www.phpmyfaq.de |
22
|
|
|
* @since 2005-08-01 |
23
|
|
|
*/ |
24
|
|
|
|
25
|
|
|
if (!defined('IS_VALID_PHPMYFAQ')) { |
26
|
|
|
exit(); |
27
|
|
|
} |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* LinkVerifier. |
31
|
|
|
* |
32
|
|
|
* @package phpMyFAQ |
33
|
|
|
* @author Minoru TODA <[email protected]> |
34
|
|
|
* @author Matteo Scaramuccia <[email protected]> |
35
|
|
|
* @author Thorsten Rinne <[email protected]> |
36
|
|
|
* @copyright 2005-2019 NetJapan, Inc. and phpMyFAQ Team |
37
|
|
|
* @license http://www.mozilla.org/MPL/2.0/ Mozilla Public License Version 2.0 |
38
|
|
|
* @link https://www.phpmyfaq.de |
39
|
|
|
* @since 2005-08-01 |
40
|
|
|
*/ |
41
|
|
|
class Linkverifier |
42
|
|
|
{ |
43
|
|
|
/** |
44
|
|
|
* Defines number of times link verifier follows 302 response before failing. |
45
|
|
|
*/ |
46
|
|
|
const LINKVERIFIER_MAX_REDIRECT_COUNT = 10; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Defines the number of seconds to wait for the remote server to respond. |
50
|
|
|
*/ |
51
|
|
|
const LINKVERIFIER_CONNECT_TIMEOUT = 5; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Defines the number of seconds to wait for the remote server to send data. |
55
|
|
|
*/ |
56
|
|
|
const LINKVERIFIER_RESPONSE_TIMEOUT = 10; |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Defines the behaviour when a user click "Edit FAQs" in the backend. |
60
|
|
|
* Do you want an automatic links verification |
61
|
|
|
* with live update of each links verification status? |
62
|
|
|
* |
63
|
|
|
* Suggested value is: |
64
|
|
|
* a. false, if you don't use a cron/at entry to call 'cron.verifyurls.php' during each night. |
65
|
|
|
* This will avoid browser high load (100% CPU) |
66
|
|
|
* b. true, if you use a cron/at entry to call 'cron.verifyurls.php' during each night |
67
|
|
|
*/ |
68
|
|
|
const LINKVERIFIER_AUTOMATIC_CALL_ON_EDIT_FAQ = false; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* List of protocol and urls. |
72
|
|
|
* |
73
|
|
|
* @var array |
74
|
|
|
*/ |
75
|
|
|
private $urlpool = []; |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* List of protocols we do not want to look at. |
79
|
|
|
* |
80
|
|
|
* @var array |
81
|
|
|
*/ |
82
|
|
|
private $invalidProtocols = []; |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* Last verify results (we might use it later). |
86
|
|
|
* |
87
|
|
|
* @var array |
88
|
|
|
*/ |
89
|
|
|
private $lastResult = []; |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* List of hosts that are slow to resolve. |
93
|
|
|
* |
94
|
|
|
* @var array |
95
|
|
|
*/ |
96
|
|
|
private $slowHosts = []; |
97
|
|
|
|
98
|
|
|
/** |
99
|
|
|
* User. |
100
|
|
|
* |
101
|
|
|
* @var int |
102
|
|
|
*/ |
103
|
|
|
private $user = null; |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* @var PMF_Configuration |
107
|
|
|
*/ |
108
|
|
|
private $config = null; |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* Constructor. |
112
|
|
|
* |
113
|
|
|
* @param Configuration $config |
114
|
|
|
* @param string $user User |
115
|
|
|
* |
116
|
|
|
* @return LinkVerifier |
117
|
|
|
*/ |
118
|
|
|
public function __construct(Configuration $config, $user = null) |
119
|
|
|
{ |
120
|
|
|
global $PMF_LANG; |
121
|
|
|
|
122
|
|
|
$this->config = $config; |
|
|
|
|
123
|
|
|
$this->user = $user; |
124
|
|
|
|
125
|
|
|
if (!extension_loaded('openssl')) { |
126
|
|
|
$this->addIgnoreProtocol('https:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'https')); |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
$this->addIgnoreProtocol('ftp:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'ftp')); |
130
|
|
|
$this->addIgnoreProtocol('gopher:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'gopher')); |
131
|
|
|
$this->addIgnoreProtocol('mailto:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'mailto')); |
132
|
|
|
$this->addIgnoreProtocol('telnet:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'telnet')); |
133
|
|
|
$this->addIgnoreProtocol('feed:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'feed')); |
134
|
|
|
|
135
|
|
|
// Hack: these below are not real scheme for defining protocols like the ones above |
136
|
|
|
$this->addIgnoreProtocol('file:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'file')); |
137
|
|
|
$this->addIgnoreProtocol('javascript:', sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], 'javascript')); |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Get current urls. |
142
|
|
|
* |
143
|
|
|
* @return array $urlpool url list |
144
|
|
|
*/ |
145
|
|
|
public function getUrlpool() |
146
|
|
|
{ |
147
|
|
|
return $this->urlpool; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Returns whether link verifier is ready to verify URLs. |
152
|
|
|
* |
153
|
|
|
* @return bool true if ready to verify URLs, otherwise false |
154
|
|
|
*/ |
155
|
|
|
public function isReady(): bool |
156
|
|
|
{ |
157
|
|
|
if (is_null($this->config->getDefaultUrl()) || '' !== $this->config->getDefaultUrl()) { |
158
|
|
|
return false; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
return true; |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
/** |
165
|
|
|
* Resets url pool for next batch of processing. |
166
|
|
|
*/ |
167
|
|
|
public function resetPool() |
168
|
|
|
{ |
169
|
|
|
$this->urlpool = $this->lastResult = []; |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
/** |
173
|
|
|
* Adds protocols we want to ignore to an array, executed in constructor. |
174
|
|
|
* |
175
|
|
|
* @param string $protocol |
176
|
|
|
* @param string $message |
177
|
|
|
* |
178
|
|
|
* @return bool true, if successfully added, otherwise false |
179
|
|
|
*/ |
180
|
|
|
protected function addIgnoreProtocol($protocol = '', $message = '') |
181
|
|
|
{ |
182
|
|
|
if ('' !== $protocol) { |
183
|
|
|
$this->invalidProtocols[strtolower($protocol)] = $message; |
184
|
|
|
|
185
|
|
|
return true; |
186
|
|
|
} else { |
187
|
|
|
return false; |
188
|
|
|
} |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* This function converts relative uri into absolute uri using specific reference point. |
193
|
|
|
* For example: |
194
|
|
|
* $relativeUri = "test/foo.html" |
195
|
|
|
* $referenceUri = "http://example.com:8000/sample/index.php" |
196
|
|
|
* will generate "http://example.com:8000/sample/test/foo.html". |
197
|
|
|
* |
198
|
|
|
* @param string $relativeUri |
199
|
|
|
* @param string $referenceUri |
200
|
|
|
* |
201
|
|
|
* @return string $result |
202
|
|
|
*/ |
203
|
|
|
protected function makeAbsoluteURL($relativeUri = '', $referenceUri = '') |
204
|
|
|
{ |
205
|
|
|
// If relative URI is protocol we don't want to handle, don't process it. |
206
|
|
|
foreach ($this->invalidProtocols as $protocol => $message) { |
207
|
|
|
if (Strings::strpos($relativeUri, $protocol) === 0) { |
208
|
|
|
return $relativeUri; |
209
|
|
|
} |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
// If relative URI is absolute URI, don't process it. |
213
|
|
|
foreach (['http://', 'https://'] as $protocol) { |
214
|
|
|
if (Strings::strpos($relativeUri, $protocol) === 0) { |
215
|
|
|
return $relativeUri; |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
// Split reference uri into parts. |
220
|
|
|
$pathParts = parse_url($referenceUri); |
221
|
|
|
|
222
|
|
|
// If port is specified in reference uri, prefix with ":" |
223
|
|
|
if (isset($pathParts['port']) && $pathParts['port'] !== '') { |
224
|
|
|
$pathParts['port'] = ':'.$pathParts['port']; |
225
|
|
|
} else { |
226
|
|
|
$pathParts['port'] = ''; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
// If path is not specified in reference uri, set as blank |
230
|
|
|
if (isset($pathParts['path'])) { |
231
|
|
|
$pathParts['path'] = str_replace('\\', '/', $pathParts['path']); |
232
|
|
|
$pathParts['path'] = preg_replace("/^.*(\/)$/i", '', $pathParts['path']); |
233
|
|
|
} else { |
234
|
|
|
$pathParts['path'] = ''; |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
// Recombine urls |
238
|
|
|
if ('/' !== Strings::substr($relativeUri, 0, 1)) { |
239
|
|
|
$relativeUri = $pathParts['path'].'/'.$relativeUri; |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
return sprintf( |
243
|
|
|
'%s://%s%s%s', |
244
|
|
|
$pathParts['scheme'], |
245
|
|
|
$pathParts['host'], |
246
|
|
|
$pathParts['port'], |
247
|
|
|
$relativeUri |
248
|
|
|
); |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* This function parses HTML and extracts URLs and returns the number of |
253
|
|
|
* URLs found. |
254
|
|
|
* |
255
|
|
|
* @param string $string String |
256
|
|
|
* |
257
|
|
|
* @return int |
258
|
|
|
*/ |
259
|
|
|
public function parseString($string = '') |
260
|
|
|
{ |
261
|
|
|
$urlCount = 0; |
262
|
|
|
$types = ['href', 'src']; |
263
|
|
|
$matches = []; |
264
|
|
|
|
265
|
|
|
// Clean $this->urlpool |
266
|
|
|
$this->urlpool = []; |
267
|
|
|
foreach ($types as $type) { |
268
|
|
|
preg_match_all("|[^?&]$type\=(\"?'?`?)([[:alnum:]\:\#%?=;&@/\ \.\_\-\{\}]+)\\1|i", $string, $matches); |
269
|
|
|
$sz = sizeof($matches[2]); |
270
|
|
|
for ($i = 0; $i < $sz; ++$i) { |
271
|
|
|
$this->urlpool[$type][] = $matches[2][$i]; |
272
|
|
|
++$urlCount; |
273
|
|
|
} |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
return $urlCount; |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
/** |
280
|
|
|
* Checks whether a URL can be opened. |
281
|
|
|
* if $redirect is specified, will handle Location: redirects. |
282
|
|
|
* |
283
|
|
|
* @param string $url |
284
|
|
|
* @param string $redirect |
285
|
|
|
* @param int $redirectCount |
286
|
|
|
* |
287
|
|
|
* @return array |
288
|
|
|
*/ |
289
|
|
|
protected function openURL($url = '', $redirect = '', $redirectCount = 0) |
290
|
|
|
{ |
291
|
|
|
global $PMF_LANG; |
292
|
|
|
|
293
|
|
|
// If prequisites fail |
294
|
|
|
if (false === $this->isReady()) { |
295
|
|
|
return [false, $redirectCount, $PMF_LANG['ad_linkcheck_openurl_notready']]; |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
// Recursing too much ? |
299
|
|
|
if (($redirectCount >= self::LINKVERIFIER_MAX_REDIRECT_COUNT) || ($url == $redirect)) { |
300
|
|
|
return [ |
301
|
|
|
false, |
302
|
|
|
$redirectCount, |
303
|
|
|
sprintf( |
304
|
|
|
$PMF_LANG['ad_linkcheck_openurl_maxredirect'], |
305
|
|
|
self::LINKVERIFIER_MAX_REDIRECT_COUNT |
306
|
|
|
), |
307
|
|
|
]; |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
// If destination is blank, fail. |
311
|
|
|
if ('' === trim($url)) { |
312
|
|
|
return [false, $redirectCount, $PMF_LANG['ad_linkcheck_openurl_urlisblank']]; |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
if ('' !== $redirect) { |
316
|
|
|
$url = $this->makeAbsoluteURL($redirect, $url); |
317
|
|
|
} |
318
|
|
|
|
319
|
|
|
// parse URL |
320
|
|
|
$defaultParts = [ |
321
|
|
|
'scheme' => 'http', |
322
|
|
|
'host' => $_SERVER['HTTP_HOST'], |
323
|
|
|
'user' => '', |
324
|
|
|
'pass' => '', |
325
|
|
|
'path' => '/', |
326
|
|
|
'query' => '', |
327
|
|
|
'fragment' => '', |
328
|
|
|
]; |
329
|
|
|
$urlParts = @parse_url($url); |
330
|
|
|
foreach ($defaultParts as $key => $value) { |
331
|
|
|
if (!(isset($urlParts[$key]))) { |
332
|
|
|
$urlParts[$key] = $value; |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
if (!(isset($urlParts['port']))) { |
337
|
|
|
switch ($urlParts['scheme']) { |
338
|
|
|
case 'https': |
339
|
|
|
$urlParts['port'] = 443; |
340
|
|
|
break; |
341
|
|
|
case 'http': |
342
|
|
|
$urlParts['port'] = 80; |
343
|
|
|
break; |
344
|
|
|
default: |
345
|
|
|
$urlParts['port'] = 80; |
346
|
|
|
break; |
347
|
|
|
} |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
// Hack: fix any unsafe space chars in any component of the path to avoid HTTP 400 status during HEAD crawling |
351
|
|
|
if ('' !== $urlParts['path']) { |
352
|
|
|
$urlSubParts = explode('/', $urlParts['path']); |
353
|
|
|
$num = count($urlSubParts); |
354
|
|
|
for ($i = 0; $i < $num; ++$i) { |
355
|
|
|
$urlSubParts[$i] = str_replace(' ', '%20', $urlSubParts[$i]); |
356
|
|
|
} |
357
|
|
|
$urlParts['path'] = implode('/', $urlSubParts); |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
if ('' !== $urlParts['query']) { |
361
|
|
|
$urlParts['query'] = '?'.$urlParts['query']; |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
if ('' !== $urlParts['fragment']) { |
365
|
|
|
$urlParts['fragment'] = '#'.$urlParts['fragment']; |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
// Check whether we tried the host before |
369
|
|
View Code Duplication |
if (isset($this->slowHosts[$urlParts['host']])) { |
370
|
|
|
return [ |
371
|
|
|
false, |
372
|
|
|
$redirectCount, |
373
|
|
|
sprintf( |
374
|
|
|
$PMF_LANG['ad_linkcheck_openurl_tooslow'], |
375
|
|
|
Strings::htmlspecialchars($urlParts['host']) |
376
|
|
|
), |
377
|
|
|
]; |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
// Check whether the hostname exists |
381
|
|
|
if (gethostbynamel($urlParts['host']) === false) { |
382
|
|
|
// mark this host too slow to verify |
383
|
|
|
$this->slowHosts[$urlParts['host']] = true; |
384
|
|
|
|
385
|
|
|
return [ |
386
|
|
|
false, |
387
|
|
|
$redirectCount, |
388
|
|
|
sprintf( |
389
|
|
|
$PMF_LANG['ad_linkcheck_openurl_nodns'], |
390
|
|
|
Strings::htmlspecialchars($urlParts['host']) |
391
|
|
|
), |
392
|
|
|
]; |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
$_response = ''; |
396
|
|
|
|
397
|
|
|
// open socket for remote server with timeout (default: 5secs) |
398
|
|
|
$_host = $urlParts['host']; |
399
|
|
|
if (@extension_loaded('openssl') && ('https' == $urlParts['scheme'])) { |
400
|
|
|
$_host = 'ssl://'.$_host; |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
$fp = @fsockopen($_host, $urlParts['port'], $errno, $errstr, self::LINKVERIFIER_CONNECT_TIMEOUT); |
404
|
|
|
|
405
|
|
View Code Duplication |
if (!$fp) { |
406
|
|
|
// mark this host too slow to verify |
407
|
|
|
$this->slowHosts[$urlParts['host']] = true; |
408
|
|
|
|
409
|
|
|
return [ |
410
|
|
|
false, |
411
|
|
|
$redirectCount, |
412
|
|
|
sprintf( |
413
|
|
|
$PMF_LANG['ad_linkcheck_openurl_tooslow'], |
414
|
|
|
Strings::htmlspecialchars($urlParts['host']) |
415
|
|
|
), |
416
|
|
|
]; |
417
|
|
|
} |
418
|
|
|
|
419
|
|
|
// wait for data with timeout (default: 10secs) |
420
|
|
|
stream_set_timeout($fp, self::LINKVERIFIER_RESPONSE_TIMEOUT, 0); |
421
|
|
|
$_url = $urlParts['path'].$urlParts['query'].$urlParts['fragment']; |
422
|
|
|
fputs($fp, 'HEAD '.$_url." HTTP/1.0\r\nHost: ".$urlParts['host']."\r\n"); |
423
|
|
|
// Be polite: let our probe declares itself |
424
|
|
|
fputs($fp, "User-Agent: phpMyFAQ Link Checker\r\n"); |
425
|
|
|
fputs($fp, "\r\n"); |
426
|
|
|
while (!feof($fp)) { |
427
|
|
|
$_response .= fread($fp, 4096); |
428
|
|
|
} |
429
|
|
|
fclose($fp); |
430
|
|
|
|
431
|
|
|
// parse response |
432
|
|
|
$code = 0; |
433
|
|
|
$allowVerbs = 'n/a'; |
434
|
|
|
$location = $url; |
435
|
|
|
$response = explode("\r\n", $_response); |
436
|
|
|
$httpStatusMsg = strip_tags($response[count($response) - 1]); |
437
|
|
|
|
438
|
|
|
foreach ($response as $_response) { |
439
|
|
|
if (preg_match("/^HTTP\/[^ ]+ ([01-9]+) .*$/", $_response, $matches)) { |
440
|
|
|
$code = $matches[1]; |
441
|
|
|
} |
442
|
|
|
if (preg_match('/^Location: (.*)$/', $_response, $matches)) { |
443
|
|
|
$location = $matches[1]; |
444
|
|
|
} |
445
|
|
|
if (preg_match('/^[a|A]llow: (.*)$/', $_response, $matches)) { |
446
|
|
|
$allowVerbs = $matches[1]; |
447
|
|
|
} |
448
|
|
|
} |
449
|
|
|
|
450
|
|
|
// process response code |
451
|
|
|
switch ($code) { |
452
|
|
|
// TODO: Add more explicit http status management |
453
|
|
|
case '200': // OK |
454
|
|
|
$_reason = ($redirectCount > 0) ? sprintf($PMF_LANG['ad_linkcheck_openurl_redirected'], Strings::htmlspecialchars($url)) : ''; |
455
|
|
|
|
456
|
|
|
return array(true, $redirectCount, $_reason); |
457
|
|
|
break; |
|
|
|
|
458
|
|
|
case '301': // Moved Permanently (go recursive ?) |
459
|
|
|
case '302': // Found (go recursive ?) |
460
|
|
|
return $this->openURL($url, $location, $redirectCount + 1); |
461
|
|
|
break; |
462
|
|
View Code Duplication |
case 400: // Bad Request |
463
|
|
|
return array(false, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_ambiguous'].'<br>'.$httpStatusMsg, $code)); |
464
|
|
|
break; |
|
|
|
|
465
|
|
|
case 404: // Not found |
466
|
|
|
return array(false, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_not_found'], $urlParts['host'])); |
467
|
|
|
break; |
|
|
|
|
468
|
|
|
case '300': // Multiple choices |
469
|
|
View Code Duplication |
case '401': // Unauthorized (but it's there. right ?) |
470
|
|
|
return array(true, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_ambiguous'], $code)); |
471
|
|
|
break; |
472
|
|
|
case '405': // Method Not Allowed |
473
|
|
|
// TODO: Add a fallback to use GET method, otherwise this link should be marked as bad |
474
|
|
|
return array(true, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_not_allowed'], $urlParts['host'], $allowVerbs)); |
475
|
|
|
break; |
476
|
|
View Code Duplication |
default: // All other statuses |
477
|
|
|
return array(false, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_ambiguous'], $code)); |
478
|
|
|
break; |
|
|
|
|
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
return [false, $redirectCount, '']; |
482
|
|
|
} |
483
|
|
|
|
484
|
|
|
/** |
485
|
|
|
* Perform link validation to each URLs found. |
486
|
|
|
* |
487
|
|
|
* @param string $referenceUri |
488
|
|
|
* |
489
|
|
|
* @return array |
490
|
|
|
*/ |
491
|
|
|
public function verifyURLs($referenceUri = '') |
492
|
|
|
{ |
493
|
|
|
$this->lastResult = []; |
494
|
|
|
|
495
|
|
|
foreach ($this->urlpool as $_type => $_value) { |
496
|
|
|
foreach ($_value as $_key => $_url) { |
497
|
|
|
if (!(isset($result[$_type][$_url]))) { |
498
|
|
|
$_result = []; |
499
|
|
|
$_result['type'] = $_type; |
500
|
|
|
$_result['rawurl'] = $_url; |
501
|
|
|
$_result['reference'] = $referenceUri; |
502
|
|
|
|
503
|
|
|
// Expand uri into absolute URL. |
504
|
|
|
$_absurl = $this->makeAbsoluteURL($_url, $referenceUri); |
505
|
|
|
$_result['absurl'] = $_absurl; |
506
|
|
|
|
507
|
|
|
list($_result['valid'], $_result['redirects'], $_result['reason']) = $this->openURL($_absurl); |
508
|
|
|
$this->lastResult[$_type][$_url] = $_result; |
509
|
|
|
} |
510
|
|
|
} |
511
|
|
|
} |
512
|
|
|
|
513
|
|
|
return $this->lastResult; |
514
|
|
|
} |
515
|
|
|
|
516
|
|
|
/** |
517
|
|
|
* logs the current state of link to the specified entry. |
518
|
|
|
* |
519
|
|
|
* @param int $id |
520
|
|
|
* @param string $artlang |
521
|
|
|
* @param string $state (optional) |
522
|
|
|
* |
523
|
|
|
* @return bool true if operation successful, otherwise false |
524
|
|
|
*/ |
525
|
|
|
public function markEntry($id = 0, $artlang = '', $state = '') |
526
|
|
|
{ |
527
|
|
|
if (($id < 1) || (trim($artlang) == '')) { |
528
|
|
|
return false; |
529
|
|
|
} |
530
|
|
|
|
531
|
|
|
if ($state == '') { |
532
|
|
|
$state = $this->getLinkStateString(); |
533
|
|
|
} |
534
|
|
|
|
535
|
|
|
$query = sprintf(" |
536
|
|
|
UPDATE |
537
|
|
|
%sfaqdata |
538
|
|
|
SET |
539
|
|
|
links_state = '%s', links_check_date = %d |
540
|
|
|
WHERE |
541
|
|
|
id = %d |
542
|
|
|
AND |
543
|
|
|
lang='%s'", |
544
|
|
|
phpMyFAQ\Db::getTablePrefix(), |
545
|
|
|
$state, |
546
|
|
|
$_SERVER['REQUEST_TIME'], |
547
|
|
|
$id, |
548
|
|
|
$artlang); |
549
|
|
|
|
550
|
|
|
if ($this->config->getDb()->query($query)) { |
551
|
|
|
return true; |
552
|
|
|
} else { |
553
|
|
|
return false; |
554
|
|
|
} |
555
|
|
|
} |
556
|
|
|
|
557
|
|
|
/** |
558
|
|
|
* Retrieves the oldest timestamp for stored link validation result. |
559
|
|
|
* |
560
|
|
|
* @return int |
561
|
|
|
*/ |
562
|
|
|
public function getURLValidateInterval() |
563
|
|
|
{ |
564
|
|
|
if ($this->config->get('main.urlValidateInterval') != '') { |
565
|
|
|
$requestTime = $_SERVER['REQUEST_TIME'] - $this->config->get('main.urlValidateInterval'); |
566
|
|
|
} else { |
567
|
|
|
$requestTime = $_SERVER['REQUEST_TIME'] - 86400; // default in recheck links once a day unless explicitly requested. |
568
|
|
|
} |
569
|
|
|
|
570
|
|
|
return $requestTime; |
571
|
|
|
} |
572
|
|
|
|
573
|
|
|
/** |
574
|
|
|
* retrieves stored link state and validates timestamp. |
575
|
|
|
* |
576
|
|
|
* @param int $id |
577
|
|
|
* @param string $artlang |
578
|
|
|
* @param bool $checkDate |
579
|
|
|
* |
580
|
|
|
* @return bool|string |
581
|
|
|
*/ |
582
|
|
|
public function getEntryState($id = 0, $artlang = '', $checkDate = false) |
583
|
|
|
{ |
584
|
|
|
$interval = $this->getURLValidateInterval(); |
585
|
|
|
$query = sprintf(" |
586
|
|
|
SELECT |
587
|
|
|
links_state, links_check_date |
588
|
|
|
FROM |
589
|
|
|
%sfaqdata |
590
|
|
|
WHERE |
591
|
|
|
id = %d |
592
|
|
|
AND |
593
|
|
|
lang = '%s'", |
594
|
|
|
phpMyFAQ\Db::getTablePrefix(), |
595
|
|
|
$id, |
596
|
|
|
$this->config->getDb()->escape($artlang)); |
597
|
|
|
|
598
|
|
|
if ($result = $this->config->getDb()->query($query)) { |
599
|
|
|
while ($row = $this->config->getDb()->fetchObject($result)) { |
600
|
|
|
$_linkState = $row->links_state; |
601
|
|
|
if (trim($_linkState) == '') { |
602
|
|
|
$_linkState = true; |
603
|
|
|
} |
604
|
|
|
|
605
|
|
|
if ($row->links_check_date > $interval) { |
606
|
|
|
return $_linkState; |
607
|
|
|
} else { |
608
|
|
|
if ($checkDate == false) { |
609
|
|
|
return $_linkState; |
610
|
|
|
} else { |
611
|
|
|
return true; |
612
|
|
|
} |
613
|
|
|
} |
614
|
|
|
} |
615
|
|
|
} else { |
616
|
|
|
return false; |
617
|
|
|
} |
618
|
|
|
} |
619
|
|
|
|
620
|
|
|
/** |
621
|
|
|
* gets the HTML text that needs to be shown in entry listing. |
622
|
|
|
* |
623
|
|
|
* @param int $id |
624
|
|
|
* @param string $artlang |
625
|
|
|
* |
626
|
|
|
* @return string |
627
|
|
|
*/ |
628
|
|
|
public function getEntryStateHTML($id = 0, $artlang = '') |
629
|
|
|
{ |
630
|
|
|
global $PMF_LANG; |
631
|
|
|
|
632
|
|
|
// Check if feature is disabled. |
633
|
|
|
if ($this->isReady() === false) { |
634
|
|
|
return sprintf( |
635
|
|
|
'<span class="fa-stack" aria-hidden="true"><i class="fas fa-link fa-stack-1x"></i><i class="fas fa-ban fa-stack-2x text-danger" title="%s"></i></span>', |
636
|
|
|
$PMF_LANG['ad_linkcheck_feedback_url-disabled'] |
637
|
|
|
); |
638
|
|
|
} |
639
|
|
|
|
640
|
|
|
// check if article entry exists (we should not need this) |
641
|
|
|
$src = $this->getEntryState($id, $artlang, false); |
642
|
|
|
if ($src === false) { |
643
|
|
|
return sprintf( |
644
|
|
|
'<span class="fa-stack" aria-hidden="true"><i class="fas fa-link fa-stack-1x"></i><i class="fas fa-ban fa-stack-2x text-danger" title="%s"></i></span>', |
645
|
|
|
$PMF_LANG['ad_linkcheck_feedback_url-disabled'] |
646
|
|
|
); |
647
|
|
|
} |
648
|
|
|
|
649
|
|
|
if ($src === true) { |
650
|
|
|
$src = 'noscript'; |
651
|
|
|
} |
652
|
|
|
|
653
|
|
|
// define name for javascripting |
654
|
|
|
$spanId = 'spanurl_'.$artlang.'_'.$id; |
655
|
|
|
$divId = 'divurl_'.$artlang.'_'.$id; |
656
|
|
|
|
657
|
|
|
$output = sprintf( |
658
|
|
|
'<div id="%s" class="url-%s"><span id="%s"><a href="javascript:onDemandVerifyURL(%d,\'%s\');">%s</a></span></div>', |
659
|
|
|
$divId, |
660
|
|
|
$src, |
661
|
|
|
$spanId, |
662
|
|
|
$id, |
663
|
|
|
$artlang, |
664
|
|
|
$PMF_LANG['ad_linkcheck_feedback_url-'.$src] |
665
|
|
|
); |
666
|
|
|
|
667
|
|
|
return $output; |
668
|
|
|
} |
669
|
|
|
|
670
|
|
|
/** |
671
|
|
|
* gets the current status string for link check result. |
672
|
|
|
* |
673
|
|
|
* "nolinks" - no links were found in contents |
674
|
|
|
* "linkok" - link(s) were found and were all ok |
675
|
|
|
* "linkbad" - link(s) were found and at least one link was broken |
676
|
|
|
* |
677
|
|
|
* @result string |
678
|
|
|
*/ |
679
|
|
|
public function getLinkStateString() |
680
|
|
|
{ |
681
|
|
|
$linkCount = $errorCount = 0; |
682
|
|
|
|
683
|
|
|
foreach ($this->lastResult as $_type => $_value) { |
684
|
|
|
foreach ($_value as $_url => $value) { |
685
|
|
|
++$linkCount; |
686
|
|
|
if ($value['valid'] == false) { |
687
|
|
|
++$errorCount; |
688
|
|
|
} |
689
|
|
|
} |
690
|
|
|
} |
691
|
|
|
|
692
|
|
|
if (0 === $linkCount) { |
693
|
|
|
return 'nolinks'; |
694
|
|
|
} else { |
695
|
|
|
if (0 === $errorCount) { |
696
|
|
|
return 'linkok'; |
697
|
|
|
} else { |
698
|
|
|
return 'linkbad'; |
699
|
|
|
} |
700
|
|
|
} |
701
|
|
|
} |
702
|
|
|
|
703
|
|
|
/** |
704
|
|
|
* Verifies specified article content and update links_state database entry. |
705
|
|
|
* |
706
|
|
|
* @param string $contents |
707
|
|
|
* @param int $id |
708
|
|
|
* @param string $artlang |
709
|
|
|
* @param bool $cron |
710
|
|
|
* |
711
|
|
|
* @return string HTML text, if $cron is false (default) |
712
|
|
|
*/ |
713
|
|
|
public function verifyArticleURL($contents = '', $id = 0, $artlang = '', $cron = false) |
714
|
|
|
{ |
715
|
|
|
global $PMF_LANG; |
716
|
|
|
|
717
|
|
View Code Duplication |
if ($this->config->getDefaultUrl() === '') { |
718
|
|
|
$output = $PMF_LANG['ad_linkcheck_noReferenceURL']; |
719
|
|
|
|
720
|
|
|
return ($cron ? '' : sprintf('<p class="alert alert-warning">%s</p>', $output)); |
721
|
|
|
} |
722
|
|
|
|
723
|
|
View Code Duplication |
if (trim('' == $this->config->getDefaultUrl())) { |
724
|
|
|
$output = $PMF_LANG['ad_linkcheck_noReferenceURL']; |
725
|
|
|
|
726
|
|
|
return ($cron ? '' : sprintf('<p class="alert alert-warning">%s</p>', $output)); |
727
|
|
|
} |
728
|
|
|
|
729
|
|
|
if ($this->isReady() === false) { |
730
|
|
|
$output = $PMF_LANG['ad_linkcheck_noAllowUrlOpen']; |
731
|
|
|
|
732
|
|
|
return ($cron ? '' : sprintf('<p class="alert alert-warning">%s</p>', $output)); |
733
|
|
|
} |
734
|
|
|
|
735
|
|
|
// Parse contents and verify URLs |
736
|
|
|
$this->parseString($contents); |
737
|
|
|
$result = $this->verifyURLs($this->config->getDefaultUrl()); |
738
|
|
|
$this->markEntry($id, $artlang); |
739
|
|
|
|
740
|
|
|
// If no URLs found |
741
|
|
|
if ($result == false) { |
742
|
|
|
$output = sprintf('<h3>%s</h3><p class="alert alert-info">%s</p>', |
743
|
|
|
$PMF_LANG['ad_linkcheck_checkResult'], |
744
|
|
|
$PMF_LANG['ad_linkcheck_noLinksFound']); |
745
|
|
|
|
746
|
|
|
return ($cron ? '' : $output); |
747
|
|
|
} |
748
|
|
|
|
749
|
|
|
$failreasons = $inforeasons = []; |
750
|
|
|
$output = ' <h3>'.$PMF_LANG['ad_linkcheck_checkResult']."</h3>\n"; |
751
|
|
|
$output .= ' <table class="table">'."\n"; |
752
|
|
|
foreach ($result as $type => $_value) { |
753
|
|
|
$output .= ' <tr><td><strong>'.Strings::htmlspecialchars($type)."</strong></td></tr>\n"; |
754
|
|
|
foreach ($_value as $value) { |
755
|
|
|
$_output = ' <td />'; |
756
|
|
|
$_output .= ' <td><a href="'.$value['absurl'].'" target="_blank">'.Strings::htmlspecialchars($value['absurl'])."</a></td>\n"; |
757
|
|
|
$_output .= ' <td>'; |
758
|
|
|
if (isset($value['redirects']) && ($value['redirects'] > 0)) { |
759
|
|
|
$_redirects = '('.$value['redirects'].')'; |
760
|
|
|
} else { |
761
|
|
|
$_redirects = ''; |
762
|
|
|
} |
763
|
|
|
if ($value['valid'] === true) { |
764
|
|
|
$_classname = 'urlsuccess'; |
765
|
|
|
$_output .= '<td class="'.$_classname.'">'.$PMF_LANG['ad_linkcheck_checkSuccess'].$_redirects.'</td>'; |
766
|
|
|
if ($value['reason'] != '') { |
767
|
|
|
$inforeasons[] = sprintf( |
768
|
|
|
$PMF_LANG['ad_linkcheck_openurl_infoprefix'], |
769
|
|
|
Strings::htmlspecialchars($value['absurl']) |
770
|
|
|
).$value['reason']; |
771
|
|
|
} |
772
|
|
|
} else { |
773
|
|
|
$_classname = 'urlfail'; |
774
|
|
|
$_output .= '<td class="'.$_classname.'">'.$PMF_LANG['ad_linkcheck_checkFailed'].'</td>'; |
775
|
|
|
if ($value['reason'] != '') { |
776
|
|
|
$failreasons[] = $value['reason']; |
777
|
|
|
} |
778
|
|
|
} |
779
|
|
|
$_output .= '</td>'; |
780
|
|
|
$output .= ' <tr class="'.$_classname.'">'."\n".$_output."\n"; |
781
|
|
|
$output .= " </tr>\n"; |
782
|
|
|
} |
783
|
|
|
} |
784
|
|
|
$output .= " </table>\n"; |
785
|
|
|
|
786
|
|
|
if (count($failreasons) > 0) { |
787
|
|
|
$output .= " <br>\n <strong>".$PMF_LANG['ad_linkcheck_failReason']."</strong>\n <ul>\n"; |
788
|
|
|
foreach ($failreasons as $reason) { |
789
|
|
|
$output .= ' <li>'.$reason."</li>\n"; |
790
|
|
|
} |
791
|
|
|
$output .= " </ul>\n"; |
792
|
|
|
} |
793
|
|
|
|
794
|
|
|
if (count($inforeasons) > 0) { |
795
|
|
|
$output .= " <br>\n <strong>".$PMF_LANG['ad_linkcheck_infoReason']."</strong>\n <ul>\n"; |
796
|
|
|
foreach ($inforeasons as $reason) { |
797
|
|
|
$output .= ' <li>'.$reason."</li>\n"; |
798
|
|
|
} |
799
|
|
|
$output .= " </ul>\n"; |
800
|
|
|
} |
801
|
|
|
|
802
|
|
|
if ($cron) { |
803
|
|
|
return ''; |
804
|
|
|
} else { |
805
|
|
|
return $output; |
806
|
|
|
} |
807
|
|
|
} |
808
|
|
|
} |
809
|
|
|
|
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..