1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* The PMF_Linkverifier class provides methods and functions for verifying URLs |
4
|
|
|
* |
5
|
|
|
* PHP Version 5.4 |
6
|
|
|
* |
7
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public License, |
8
|
|
|
* v. 2.0. If a copy of the MPL was not distributed with this file, You can |
9
|
|
|
* obtain one at http://mozilla.org/MPL/2.0/. |
10
|
|
|
* |
11
|
|
|
* The Initial Developer of the Original Code is released for external use |
12
|
|
|
* with permission from NetJapan, Inc. IT Administration Group. |
13
|
|
|
* |
14
|
|
|
* @category phpMyFAQ |
15
|
|
|
* @package PMF_Linkverifier |
16
|
|
|
* @author Minoru TODA <[email protected]> |
17
|
|
|
* @author Matteo Scaramuccia <[email protected]> |
18
|
|
|
* @author Thorsten Rinne <[email protected]> |
19
|
|
|
* @copyright 2005-2014 NetJapan, Inc. and phpMyFAQ Team |
20
|
|
|
* @license http://www.mozilla.org/MPL/2.0/ Mozilla Public License Version 2.0 |
21
|
|
|
* @link http://www.phpmyfaq.de |
22
|
|
|
* @since 2005-08-01 |
23
|
|
|
*/ |
24
|
|
|
|
25
|
|
|
if (!defined('IS_VALID_PHPMYFAQ')) { |
26
|
|
|
exit(); |
27
|
|
|
} |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* PMF_LinkVerifier |
31
|
|
|
* |
32
|
|
|
* @category phpMyFAQ |
33
|
|
|
* @package PMF_Linkverifier |
34
|
|
|
* @author Minoru TODA <[email protected]> |
35
|
|
|
* @author Matteo Scaramuccia <[email protected]> |
36
|
|
|
* @author Thorsten Rinne <[email protected]> |
37
|
|
|
* @copyright 2005-2014 NetJapan, Inc. and phpMyFAQ Team |
38
|
|
|
* @license http://www.mozilla.org/MPL/2.0/ Mozilla Public License Version 2.0 |
39
|
|
|
* @link http://www.phpmyfaq.de |
40
|
|
|
* @since 2005-08-01 |
41
|
|
|
*/ |
42
|
|
|
class PMF_Linkverifier |
43
|
|
|
{ |
44
|
|
|
/** |
45
|
|
|
* Defines number of times linkverifier follows 302 response before failing. |
46
|
|
|
*/ |
47
|
|
|
const LINKVERIFIER_MAX_REDIRECT_COUNT = 10; |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* Defines the number of seconds to wait for the remote server to respond |
51
|
|
|
*/ |
52
|
|
|
const LINKVERIFIER_CONNECT_TIMEOUT = 5; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Defines the number of seconds to wait for the remote server to send data |
56
|
|
|
*/ |
57
|
|
|
const LINKVERIFIER_RESPONSE_TIMEOUT = 10; |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* Defines the behaviour when a user click "Edit FAQs" in the backend. |
61
|
|
|
* Do you want an automatic links verification |
62
|
|
|
* with live update of each links verification status? |
63
|
|
|
* |
64
|
|
|
* Suggested value is: |
65
|
|
|
* a. false, if you don't use a cron/at entry to call 'cron.verifyurls.php' during each night. |
66
|
|
|
* This will avoid browser high load (100% CPU) |
67
|
|
|
* b. true, if you use a cron/at entry to call 'cron.verifyurls.php' during each night |
68
|
|
|
*/ |
69
|
|
|
const LINKVERIFIER_AUTOMATIC_CALL_ON_EDIT_FAQ = false; |
70
|
|
|
|
71
|
|
|
/* List of protocol and urls |
72
|
|
|
* |
73
|
|
|
* @var mixed |
74
|
|
|
*/ |
75
|
|
|
protected $urlpool = []; |
76
|
|
|
|
77
|
|
|
/* List of prohibited prefixes and messages |
78
|
|
|
* |
79
|
|
|
* @var mixed |
80
|
|
|
*/ |
81
|
|
|
protected $warnlists = []; |
82
|
|
|
|
83
|
|
|
/* List of URLs not to probe |
84
|
|
|
* |
85
|
|
|
* @var mixed |
86
|
|
|
*/ |
87
|
|
|
protected $ignorelists = []; |
88
|
|
|
|
89
|
|
|
/* List of protocols we do not want to look at |
90
|
|
|
* |
91
|
|
|
* @var mixed |
92
|
|
|
*/ |
93
|
|
|
protected $invalid_protocols = []; |
94
|
|
|
|
95
|
|
|
/* Last verify results (we might use it later) |
96
|
|
|
* |
97
|
|
|
* @var mixed |
98
|
|
|
*/ |
99
|
|
|
protected $lastResult = []; |
100
|
|
|
|
101
|
|
|
/* List of hosts that are slow to resolve. |
102
|
|
|
* |
103
|
|
|
* @var mixed |
104
|
|
|
*/ |
105
|
|
|
protected $slow_hosts = []; |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* User |
109
|
|
|
* |
110
|
|
|
* @var integer |
111
|
|
|
*/ |
112
|
|
|
protected $user = null; |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* @var PMF_Configuration |
116
|
|
|
*/ |
117
|
|
|
private $_config = null; |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* Constructor |
121
|
|
|
* |
122
|
|
|
* @param PMF_Configuration $config |
123
|
|
|
* @param string $user User |
124
|
|
|
* |
125
|
|
|
* @return PMF_LinkVerifier |
126
|
|
|
*/ |
127
|
|
|
public function __construct(PMF_Configuration $config, $user = null) |
128
|
|
|
{ |
129
|
|
|
global $PMF_LANG; |
130
|
|
|
|
131
|
|
|
$this->_config = $config; |
132
|
|
|
$this->user = $user; |
133
|
|
|
|
134
|
|
|
if (!@extension_loaded('openssl')) { // PHP 4.3.0+: fsockopen needs OpenSSL |
135
|
|
|
$this->addIgnoreProtocol("https:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "https")); |
136
|
|
|
} |
137
|
|
|
$this->addIgnoreProtocol("ftp:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "ftp")); |
138
|
|
|
$this->addIgnoreProtocol("gopher:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "gopher")); |
139
|
|
|
$this->addIgnoreProtocol("mailto:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "mailto")); |
140
|
|
|
$this->addIgnoreProtocol("telnet:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "telnet")); |
141
|
|
|
$this->addIgnoreProtocol("feed:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "feed")); |
142
|
|
|
|
143
|
|
|
// Hack: these below are not real scheme for defining protocols like the ones above |
144
|
|
|
$this->addIgnoreProtocol("file:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "file")); |
145
|
|
|
$this->addIgnoreProtocol("javascript:", sprintf($PMF_LANG['ad_linkcheck_protocol_unsupported'], "javascript")); |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
|
149
|
|
|
/** |
150
|
|
|
* Get current urls |
151
|
|
|
* |
152
|
|
|
* @return array $urlpool url list |
153
|
|
|
*/ |
154
|
|
|
public function getUrlpool() |
155
|
|
|
{ |
156
|
|
|
return $this->urlpool; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Returns whether linkverifier is ready to verify URLs. |
161
|
|
|
* |
162
|
|
|
* @return boolean true if ready to verify URLs, otherwise false |
163
|
|
|
*/ |
164
|
|
|
public function isReady() |
165
|
|
|
{ |
166
|
|
|
if ($this->_config->get('main.referenceURL') == '') { |
167
|
|
|
return false; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
return true; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
|
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* Resets url pool for next batch of processing. |
177
|
|
|
* |
178
|
|
|
* @return void |
179
|
|
|
*/ |
180
|
|
|
public function resetPool() |
181
|
|
|
{ |
182
|
|
|
$this->urlpool = $this->lastResult = []; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* This function adds entry to the internal warnlists. |
187
|
|
|
* Use this if you want to mark certain URLs unsable (like internal links of a dev-site). |
188
|
|
|
* |
189
|
|
|
* @param string $urlprefix |
190
|
|
|
* @param string $message |
191
|
|
|
* |
192
|
|
|
* @return boolean true, if successfully added, otherwise false |
193
|
|
|
*/ |
194
|
|
|
protected function addWarnlist($urlprefix = "", $message = "") |
195
|
|
|
{ |
196
|
|
|
if ($urlprefix != "") { |
197
|
|
|
$this->warnlists[$urlprefix] = $message; |
198
|
|
|
return true; |
199
|
|
|
} else { |
200
|
|
|
return false; |
201
|
|
|
} |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
/** |
205
|
|
|
* @param string $protocol |
206
|
|
|
* @param string $message |
207
|
|
|
* |
208
|
|
|
* @return boolean true, if successfully added, otherwise false |
209
|
|
|
*/ |
210
|
|
|
protected function addIgnoreProtocol($protocol = "", $message = "") |
211
|
|
|
{ |
212
|
|
|
if ($protocol != "") { |
213
|
|
|
$this->invalid_protocols[strtolower($protocol)] = $message; |
214
|
|
|
return true; |
215
|
|
|
} else { |
216
|
|
|
return false; |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* This function converts relative uri into absolute uri using specific reference point. |
222
|
|
|
* For example, |
223
|
|
|
* $relativeuri = "test/foo.html" |
224
|
|
|
* $referenceuri = "http://example.com:8000/sample/index.php" |
225
|
|
|
* will generate "http://example.com:8000/sample/test/foo.html" |
226
|
|
|
* |
227
|
|
|
* @param string $relativeuri |
228
|
|
|
* @param string $referenceuri |
229
|
|
|
* |
230
|
|
|
* @return string $result |
231
|
|
|
*/ |
232
|
|
|
protected function makeAbsoluteURL($relativeuri = '', $referenceuri = '') |
233
|
|
|
{ |
234
|
|
|
// If relativeuri is protocol we don't want to handle, don't process it. |
235
|
|
|
foreach ($this->invalid_protocols as $_protocol => $_message) { |
236
|
|
|
if (PMF_String::strpos($relativeuri, $_protocol) === 0) { |
237
|
|
|
return $relativeuri; |
238
|
|
|
} |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
// If relativeuri is absolute URI, don't process it. |
242
|
|
|
foreach (array("http://", "https://") as $_protocol) { |
243
|
|
|
if (PMF_String::strpos($relativeuri, $_protocol) === 0) { |
244
|
|
|
return $relativeuri; |
245
|
|
|
} |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
// Split reference uri into parts. |
249
|
|
|
$pathparts = parse_url($referenceuri); |
250
|
|
|
|
251
|
|
|
// If port is specified in reference uri, prefix with ":" |
252
|
|
|
if (isset($pathparts['port']) && $pathparts['port'] != "") { |
253
|
|
|
$pathparts['port'] = ":".$pathparts['port']; |
254
|
|
|
} else { |
255
|
|
|
$pathparts['port'] = ""; |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
// If path is not specified in reference uri, set as blank |
259
|
|
|
if (isset($pathparts['path'])) { |
260
|
|
|
$pathparts['path'] = str_replace("\\","/",$pathparts['path']); |
261
|
|
|
$pathparts['path'] = preg_replace("/^.*(\/)$/i","", $pathparts['path']); |
262
|
|
|
} else { |
263
|
|
|
$pathparts['path'] = ""; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
// Recombine urls |
267
|
|
|
if (PMF_String::substr($relativeuri,0,1) == "/") { |
268
|
|
|
return $pathparts['scheme']."://".$pathparts['host'].$pathparts['port'].$relativeuri; |
269
|
|
|
} else { |
270
|
|
|
return $pathparts['scheme']."://".$pathparts['host'].$pathparts['port'].$pathparts['path']."/".$relativeuri; |
271
|
|
|
} |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* This function parses HTML and extracts urls |
276
|
|
|
* |
277
|
|
|
* @param string $string String |
278
|
|
|
* |
279
|
|
|
* @return mixed false if URL is not found, otherwise returns the number of URLs found. |
280
|
|
|
*/ |
281
|
|
|
public function parse_string($string = "") |
282
|
|
|
{ |
283
|
|
|
$urlcount = 0; |
284
|
|
|
$types = array('href', 'src', 'url'); |
285
|
|
|
$matches = []; |
286
|
|
|
|
287
|
|
|
// Clean $this->urlpool |
288
|
|
|
$this->urlpool = []; |
289
|
|
|
while(list(,$type) = each($types)) { |
290
|
|
|
preg_match_all("|[^?&]$type\=(\"?'?`?)([[:alnum:]\:\#%?=;&@/\ \.\_\-\{\}]+)\\1|i", $string, $matches); |
291
|
|
|
$sz = sizeof($matches[2]); |
292
|
|
|
for ($i = 0;$i < $sz; $i++) { |
293
|
|
|
$this->urlpool[$type][] = $matches[2][$i]; |
294
|
|
|
$urlcount++; |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
return ($urlcount == 0) ? false : $urlcount; |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
/** |
302
|
|
|
* Checks whether a URL can be opened. |
303
|
|
|
* if $redirect is specified, will handle Location: redirects. |
304
|
|
|
* |
305
|
|
|
* @param string $url |
306
|
|
|
* @param string $redirect |
307
|
|
|
* @param integer $redirectCount |
308
|
|
|
* |
309
|
|
|
* @return array |
310
|
|
|
*/ |
311
|
|
|
protected function openURL($url = '', $redirect = '', $redirectCount = 0) |
312
|
|
|
{ |
313
|
|
|
global $PMF_LANG; |
314
|
|
|
|
315
|
|
|
// If prequisites fail |
316
|
|
|
if ($this->isReady() == false) { |
317
|
|
|
return array(false, $redirectCount, $PMF_LANG['ad_linkcheck_openurl_notready']); |
318
|
|
|
} |
319
|
|
|
|
320
|
|
|
// Recursing too much ? |
321
|
|
|
if (($redirectCount >= self::LINKVERIFIER_MAX_REDIRECT_COUNT) || ($url == $redirect)) { |
322
|
|
|
return array( |
323
|
|
|
false, |
324
|
|
|
$redirectCount, |
325
|
|
|
sprintf( |
326
|
|
|
$PMF_LANG['ad_linkcheck_openurl_maxredirect'], |
327
|
|
|
self::LINKVERIFIER_MAX_REDIRECT_COUNT |
328
|
|
|
) |
329
|
|
|
); |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
// If destination is blank, fail. |
333
|
|
|
if (trim($url) == "") { |
334
|
|
|
return array(false, $redirectCount, $PMF_LANG['ad_linkcheck_openurl_urlisblank']); |
335
|
|
|
} |
336
|
|
|
|
337
|
|
|
if ($redirect != "") { |
338
|
|
|
$url = $this->makeAbsoluteURL($redirect, $url); |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
// parse URL |
342
|
|
|
$urlParts = @parse_url($url); |
343
|
|
|
foreach(array( |
344
|
|
|
'scheme' => 'http', |
345
|
|
|
'host' => $_SERVER['HTTP_HOST'], |
346
|
|
|
'user' => '', |
347
|
|
|
'pass' => '', |
348
|
|
|
'path' => '/', |
349
|
|
|
'query' => '', |
350
|
|
|
'fragment' => '') as $_key => $_value) { |
351
|
|
|
if (!(isset($urlParts[$_key]))) { |
352
|
|
|
$urlParts[$_key] = $_value; |
353
|
|
|
} |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
if (!(isset($urlParts['port']))) { |
357
|
|
|
switch ($urlParts['scheme']) { |
358
|
|
|
case 'https': $urlParts['port'] = '443'; break; |
359
|
|
|
case 'http': $urlParts['port'] = '80'; break; |
360
|
|
|
default: $urlParts['port'] = '80'; break; |
361
|
|
|
} |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
// Hack: fix any unsafe space chars in any component of the path to avoid HTTP 400 status during HEAD crawling |
365
|
|
|
if ($urlParts['path'] != '') { |
366
|
|
|
$urlSubParts = explode('/', $urlParts['path']); |
367
|
|
|
for ($i = 0; $i < count($urlSubParts); $i++) { |
368
|
|
|
$urlSubParts[$i] = str_replace(' ', '%20', $urlSubParts[$i]); |
369
|
|
|
} |
370
|
|
|
$urlParts['path'] = implode('/', $urlSubParts); |
371
|
|
|
} |
372
|
|
|
|
373
|
|
|
if ($urlParts['query'] != "") { |
374
|
|
|
$urlParts['query'] = "?".$urlParts['query']; |
375
|
|
|
} |
376
|
|
|
|
377
|
|
|
if ($urlParts['fragment'] != "") { |
378
|
|
|
$urlParts['fragment'] = "#".$urlParts['fragment']; |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
// Check whether we tried the host before |
382
|
|
View Code Duplication |
if (isset($this->slow_hosts[$urlParts['host']])) { |
383
|
|
|
return array( |
384
|
|
|
false, |
385
|
|
|
$redirectCount, |
386
|
|
|
sprintf( |
387
|
|
|
$PMF_LANG['ad_linkcheck_openurl_tooslow'], |
388
|
|
|
PMF_String::htmlspecialchars($urlParts['host']) |
389
|
|
|
) |
390
|
|
|
); |
391
|
|
|
} |
392
|
|
|
|
393
|
|
|
// Check whether the hostname exists |
394
|
|
|
if (gethostbynamel($urlParts['host']) === false) { |
395
|
|
|
// mark this host too slow to verify |
396
|
|
|
$this->slow_hosts[$urlParts['host']] = true; |
397
|
|
|
return array( |
398
|
|
|
false, |
399
|
|
|
$redirectCount, |
400
|
|
|
sprintf( |
401
|
|
|
$PMF_LANG['ad_linkcheck_openurl_nodns'], |
402
|
|
|
PMF_String::htmlspecialchars($urlParts['host']) |
403
|
|
|
) |
404
|
|
|
); |
405
|
|
|
} |
406
|
|
|
|
407
|
|
|
$_response = ""; |
408
|
|
|
// open socket for remote server with timeout (default: 5secs) |
409
|
|
|
// PHP 4.3.0+: when compiled w/ OpenSSL support, fsockopen can connect to the remote host using SSL |
410
|
|
|
$_host = $urlParts['host']; |
411
|
|
|
if (@extension_loaded('openssl') && ('https' == $urlParts['scheme'])) { |
412
|
|
|
$_host = 'ssl://'.$_host; |
413
|
|
|
} |
414
|
|
|
$fp = @fsockopen($_host, $urlParts['port'], $errno, $errstr, self::LINKVERIFIER_CONNECT_TIMEOUT); |
415
|
|
View Code Duplication |
if (!$fp) { |
416
|
|
|
// mark this host too slow to verify |
417
|
|
|
$this->slow_hosts[$urlParts['host']] = true; |
418
|
|
|
return array( |
419
|
|
|
false, |
420
|
|
|
$redirectCount, |
421
|
|
|
sprintf( |
422
|
|
|
$PMF_LANG['ad_linkcheck_openurl_tooslow'], |
423
|
|
|
PMF_String::htmlspecialchars($urlParts['host']) |
424
|
|
|
) |
425
|
|
|
); |
426
|
|
|
} |
427
|
|
|
|
428
|
|
|
// wait for data with timeout (default: 10secs) |
429
|
|
|
stream_set_timeout($fp, self::LINKVERIFIER_RESPONSE_TIMEOUT, 0); |
430
|
|
|
$_url = $urlParts['path'].$urlParts['query'].$urlParts['fragment']; |
431
|
|
|
fputs($fp, "HEAD ".$_url." HTTP/1.0\r\nHost: ".$urlParts['host']."\r\n"); |
432
|
|
|
// Be polite: let our probe declares itself |
433
|
|
|
fputs($fp, "User-Agent: phpMyFAQ Link Checker\r\n"); |
434
|
|
|
fputs($fp, "\r\n"); |
435
|
|
|
while (!feof($fp)) { $_response .= fread($fp, 4096); } |
436
|
|
|
fclose($fp); |
437
|
|
|
|
438
|
|
|
// parse response |
439
|
|
|
$code = 0; |
440
|
|
|
$allowVerbs = 'n/a'; |
441
|
|
|
$httpStatusMsg = ''; |
442
|
|
|
$location = $url; |
443
|
|
|
$response = explode("\r\n", $_response); |
444
|
|
|
$httpStatusMsg = strip_tags($response[count($response) - 1]); |
445
|
|
|
|
446
|
|
|
foreach ($response as $_response) { |
447
|
|
|
if (preg_match("/^HTTP\/[^ ]+ ([01-9]+) .*$/", $_response, $matches)) { |
448
|
|
|
$code = $matches[1]; |
449
|
|
|
} |
450
|
|
|
if (preg_match("/^Location: (.*)$/", $_response, $matches)) { |
451
|
|
|
$location = $matches[1]; |
452
|
|
|
} |
453
|
|
|
if (preg_match("/^[a|A]llow: (.*)$/", $_response, $matches)) { |
454
|
|
|
$allowVerbs = $matches[1]; |
455
|
|
|
} |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
// process response code |
459
|
|
|
switch ($code) { |
460
|
|
|
// TODO: Add more explicit http status management |
461
|
|
|
case '200': // OK |
462
|
|
|
$_reason = ($redirectCount > 0) ? sprintf($PMF_LANG['ad_linkcheck_openurl_redirected'],PMF_String::htmlspecialchars($url)) : ""; |
463
|
|
|
return array(true, $redirectCount, $_reason); |
464
|
|
|
break; |
465
|
|
|
case '301': // Moved Permanently (go recursive ?) |
466
|
|
|
case '302': // Found (go recursive ?) |
467
|
|
|
return $this->openURL($url, $location, $redirectCount + 1); |
468
|
|
|
break; |
469
|
|
View Code Duplication |
case 400: // Bad Request |
470
|
|
|
return array(false, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_ambiguous'].'<br />'.$httpStatusMsg, $code)); |
471
|
|
|
break; |
472
|
|
|
case 404: // Not found |
473
|
|
|
return array(false, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_not_found'], $urlParts['host'])); |
474
|
|
|
break; |
475
|
|
|
case '300': // Multiple choices |
476
|
|
View Code Duplication |
case '401': // Unauthorized (but it's there. right ?) |
477
|
|
|
return array(true, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_ambiguous'], $code)); |
478
|
|
|
break; |
479
|
|
|
case '405': // Method Not Allowed |
480
|
|
|
// TODO: Add a fallback to use GET method, otherwise this link should be marked as bad |
481
|
|
|
return array(true, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_not_allowed'], $urlParts['host'], $allowVerbs)); |
482
|
|
|
break; |
483
|
|
View Code Duplication |
default: // All other statuses |
484
|
|
|
return array(false, $redirectCount, sprintf($PMF_LANG['ad_linkcheck_openurl_ambiguous'], $code)); |
485
|
|
|
break; |
486
|
|
|
} |
487
|
|
|
|
488
|
|
|
return array(false, $redirectCount, ''); |
489
|
|
|
} |
490
|
|
|
|
491
|
|
|
/** |
492
|
|
|
* Perform link validation to each URLs found |
493
|
|
|
* |
494
|
|
|
* @param string $referenceuri |
495
|
|
|
* |
496
|
|
|
* @return array |
497
|
|
|
*/ |
498
|
|
|
public function VerifyURLs($referenceuri = '') |
499
|
|
|
{ |
500
|
|
|
$this->lastResult = []; |
501
|
|
|
|
502
|
|
|
foreach ($this->urlpool as $_type => $_value) { |
503
|
|
|
foreach ($_value as $_key => $_url) { |
504
|
|
|
if (!(isset($result[$_type][$_url]))) { |
505
|
|
|
$_result = []; |
506
|
|
|
$_result['type'] = $_type; |
507
|
|
|
$_result['rawurl'] = $_url; |
508
|
|
|
$_result['reference'] = $referenceuri; |
509
|
|
|
|
510
|
|
|
// Expand uri into absolute URL. |
511
|
|
|
$_absurl = $this->makeAbsoluteURL($_url, $referenceuri); |
512
|
|
|
$_result['absurl'] = $_absurl; |
513
|
|
|
|
514
|
|
|
list($_result['valid'], $_result['redirects'], $_result['reason']) = $this->openURL($_absurl); |
515
|
|
|
$this->lastResult[$_type][$_url] = $_result; |
516
|
|
|
} |
517
|
|
|
} |
518
|
|
|
|
519
|
|
|
} |
520
|
|
|
return $this->lastResult; |
521
|
|
|
} |
522
|
|
|
|
523
|
|
|
/** |
524
|
|
|
* logs the current state of link to the specified entry |
525
|
|
|
* |
526
|
|
|
* @param integer $id |
527
|
|
|
* @param string $artlang |
528
|
|
|
* @param string $state (optional) |
529
|
|
|
* |
530
|
|
|
* @return boolean true if operation successful, otherwise false |
531
|
|
|
*/ |
532
|
|
|
public function markEntry($id = 0, $artlang = '', $state = '') |
533
|
|
|
{ |
534
|
|
|
if (($id < 1) || (trim($artlang) == "")) { |
535
|
|
|
return false; |
536
|
|
|
} |
537
|
|
|
|
538
|
|
|
if ($state == '') { |
539
|
|
|
$state = $this->getLinkStateString(); |
540
|
|
|
} |
541
|
|
|
|
542
|
|
|
$query = sprintf(" |
543
|
|
|
UPDATE |
544
|
|
|
%sfaqdata |
545
|
|
|
SET |
546
|
|
|
links_state = '%s', links_check_date = %d |
547
|
|
|
WHERE |
548
|
|
|
id = %d |
549
|
|
|
AND |
550
|
|
|
lang='%s'", |
551
|
|
|
PMF_Db::getTablePrefix(), |
552
|
|
|
$state, |
553
|
|
|
$_SERVER['REQUEST_TIME'], |
554
|
|
|
$id, |
555
|
|
|
$artlang); |
556
|
|
|
|
557
|
|
|
if ($this->_config->getDb()->query($query)) { |
558
|
|
|
return true; |
559
|
|
|
} else { |
560
|
|
|
return false; |
561
|
|
|
} |
562
|
|
|
} |
563
|
|
|
|
564
|
|
|
/** |
565
|
|
|
* Retrieves the oldest timestamp for stored link validation result |
566
|
|
|
* |
567
|
|
|
* @return integer |
568
|
|
|
*/ |
569
|
|
|
public function getURLValidateInterval() |
570
|
|
|
{ |
571
|
|
|
if ($this->_config->get('main.urlValidateInterval') != '') { |
572
|
|
|
$requestTime = $_SERVER['REQUEST_TIME'] - $this->_config->get('main.urlValidateInterval'); |
573
|
|
|
} else { |
574
|
|
|
$requestTime = $_SERVER['REQUEST_TIME'] - 86400; // default in recheck links once a day unless explicitly requested. |
575
|
|
|
} |
576
|
|
|
|
577
|
|
|
return $requestTime; |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
/** |
581
|
|
|
* retrieves stored link state and validates timestamp |
582
|
|
|
* |
583
|
|
|
* @param int $id |
584
|
|
|
* @param string $artlang |
585
|
|
|
* @param boolean $checkDate |
586
|
|
|
* |
587
|
|
|
* @return boolean|string |
588
|
|
|
*/ |
589
|
|
|
public function getEntryState($id = 0, $artlang = '', $checkDate = false) |
590
|
|
|
{ |
591
|
|
|
$interval = $this->getURLValidateInterval(); |
592
|
|
|
$query = sprintf(" |
593
|
|
|
SELECT |
594
|
|
|
links_state, links_check_date |
595
|
|
|
FROM |
596
|
|
|
%sfaqdata |
597
|
|
|
WHERE |
598
|
|
|
id = %d |
599
|
|
|
AND |
600
|
|
|
lang = '%s'", |
601
|
|
|
PMF_Db::getTablePrefix(), |
602
|
|
|
$id, |
603
|
|
|
$this->_config->getDb()->escape($artlang)); |
604
|
|
|
|
605
|
|
|
if ($result = $this->_config->getDb()->query($query)) { |
606
|
|
|
while ($row = $this->_config->getDb()->fetchObject($result)) { |
607
|
|
|
$_linkState = $row->links_state; |
608
|
|
|
if (trim($_linkState) == "") { |
609
|
|
|
$_linkState = true; |
610
|
|
|
} |
611
|
|
|
|
612
|
|
|
if ($row->links_check_date > $interval) { |
613
|
|
|
return $_linkState; |
614
|
|
|
} else { |
615
|
|
|
if ($checkDate == false) { |
616
|
|
|
return $_linkState; |
617
|
|
|
} else { |
618
|
|
|
return true; |
619
|
|
|
} |
620
|
|
|
} |
621
|
|
|
} |
622
|
|
|
} else { |
623
|
|
|
return false; |
624
|
|
|
} |
625
|
|
|
} |
626
|
|
|
|
627
|
|
|
/** |
628
|
|
|
* gets the HTML text that needs to be shown in entry listing |
629
|
|
|
* |
630
|
|
|
* @param int $id |
631
|
|
|
* @param string $artlang |
632
|
|
|
* |
633
|
|
|
* @return string |
634
|
|
|
*/ |
635
|
|
|
public function getEntryStateHTML($id = 0, $artlang = "") |
636
|
|
|
{ |
637
|
|
|
global $PMF_LANG; |
638
|
|
|
|
639
|
|
|
// Check if feature is disabled. |
640
|
|
|
if ($this->isReady() == false) { |
641
|
|
|
//return '<img src="images/url-disabled.png">'; |
642
|
|
|
return '<div class="url-disabled"><span>'.$PMF_LANG['ad_linkcheck_feedback_url-disabled'].'</span></div>'; |
643
|
|
|
} |
644
|
|
|
|
645
|
|
|
// check if article entry exists (we should not need this) |
646
|
|
|
$src = $this->getEntryState($id, $artlang, false); |
647
|
|
|
if ($src === false) { |
648
|
|
|
//return '<img src="images/url-disabled.png">'; |
649
|
|
|
return '<div class="url-disabled"><span>'.$PMF_LANG['ad_linkcheck_feedback_url-disabled'].'</span></div>'; |
650
|
|
|
} |
651
|
|
|
|
652
|
|
|
if ($src === true) { |
653
|
|
|
$src = "noscript"; |
654
|
|
|
} |
655
|
|
|
|
656
|
|
|
// define name for javascripting |
657
|
|
|
$imgId = "imgurl_".$artlang."_".$id; |
658
|
|
|
$spanId = "spanurl_".$artlang."_".$id; |
659
|
|
|
$divId = "divurl_".$artlang."_".$id; |
660
|
|
|
|
661
|
|
|
$onLoad = ''; |
662
|
|
|
if ($this->getEntryState($id, $artlang, true) === true) { |
663
|
|
|
if (self::LINKVERIFIER_AUTOMATIC_CALL_ON_EDIT_FAQ) { |
664
|
|
|
$onLoad = " onload=\"verifyEntryURL(".$id.",'".$artlang."');\""; |
665
|
|
|
} |
666
|
|
|
} |
667
|
|
|
|
668
|
|
|
$output = sprintf( |
669
|
|
|
'<div id="%s" class="url-%s"><span id="%s"><a href="javascript:onDemandVerifyURL(%d,\'%s\');">%s</a></span></div>', |
670
|
|
|
$divId, |
671
|
|
|
$src, |
672
|
|
|
$spanId, |
673
|
|
|
$id, |
674
|
|
|
$artlang, |
675
|
|
|
$PMF_LANG['ad_linkcheck_feedback_url-'.$src] |
676
|
|
|
); |
677
|
|
|
return $output; |
678
|
|
|
} |
679
|
|
|
|
680
|
|
|
/** |
681
|
|
|
* gets the current status string for link check result |
682
|
|
|
* |
683
|
|
|
* "nolinks" - no links were found in contents |
684
|
|
|
* "linkok" - link(s) were found and were all ok |
685
|
|
|
* "linkbad" - link(s) were found and at least one link was broken |
686
|
|
|
* |
687
|
|
|
* @result string |
688
|
|
|
*/ |
689
|
|
|
public function getLinkStateString() |
690
|
|
|
{ |
691
|
|
|
$linkcount = 0; |
692
|
|
|
$errorcount = 0; |
693
|
|
|
|
694
|
|
|
foreach ($this->lastResult as $_type => $_value) { |
695
|
|
|
foreach ($_value as $_url => $value) { |
696
|
|
|
$linkcount++; |
697
|
|
|
if ($value['valid'] == false) { |
698
|
|
|
$errorcount++; |
699
|
|
|
} |
700
|
|
|
} |
701
|
|
|
} |
702
|
|
|
|
703
|
|
|
if ($linkcount == 0) { |
704
|
|
|
return "nolinks"; |
705
|
|
|
} else { |
706
|
|
|
if ($errorcount == 0) { |
707
|
|
|
return "linkok"; |
708
|
|
|
} else { |
709
|
|
|
return "linkbad"; |
710
|
|
|
} |
711
|
|
|
} |
712
|
|
|
} |
713
|
|
|
|
714
|
|
|
/** |
715
|
|
|
* Verifies specified article content and update links_state database entry |
716
|
|
|
* |
717
|
|
|
* @param string $contents |
718
|
|
|
* @param integer $id |
719
|
|
|
* @param string $artlang |
720
|
|
|
* @param boolean $cron |
721
|
|
|
* |
722
|
|
|
* @return string HTML text, if $cron is false (default) |
723
|
|
|
*/ |
724
|
|
|
public function verifyArticleURL($contents = '', $id = 0, $artlang = '', $cron = false) |
725
|
|
|
{ |
726
|
|
|
global $PMF_LANG; |
727
|
|
|
|
728
|
|
View Code Duplication |
if ($this->_config->get('main.referenceURL') == '') { |
729
|
|
|
$output = $PMF_LANG['ad_linkcheck_noReferenceURL']; |
730
|
|
|
return ($cron ? '' : '<br /><br />'.$output); |
731
|
|
|
} |
732
|
|
|
|
733
|
|
View Code Duplication |
if (trim('' == $this->_config->get('main.referenceURL'))) { |
734
|
|
|
$output = $PMF_LANG['ad_linkcheck_noReferenceURL']; |
735
|
|
|
return ($cron ? '' : '<br /><br />'.$output); |
736
|
|
|
} |
737
|
|
|
|
738
|
|
|
if ($this->isReady() === false) { |
739
|
|
|
$output = $PMF_LANG['ad_linkcheck_noAllowUrlOpen']; |
740
|
|
|
return ($cron ? '' : '<br /><br />'.$output); |
741
|
|
|
} |
742
|
|
|
|
743
|
|
|
// Parse contents and verify URLs |
744
|
|
|
$this->parse_string($contents); |
745
|
|
|
$result = $this->VerifyURLs($this->_config->get('main.referenceURL')); |
746
|
|
|
$this->markEntry($id, $artlang); |
747
|
|
|
|
748
|
|
|
// If no URLs found |
749
|
|
|
if ($result == false) { |
750
|
|
|
$output = sprintf('<h2>%s</h2><br />%s', |
751
|
|
|
$PMF_LANG['ad_linkcheck_checkResult'], |
752
|
|
|
$PMF_LANG['ad_linkcheck_noLinksFound']); |
753
|
|
|
return ($cron ? '' : $output); |
754
|
|
|
} |
755
|
|
|
|
756
|
|
|
$failreasons = $inforeasons = []; |
757
|
|
|
$output = " <h2>".$PMF_LANG['ad_linkcheck_checkResult']."</h2>\n"; |
758
|
|
|
$output .= ' <table class="verifyArticleURL">'."\n"; |
759
|
|
|
foreach ($result as $type => $_value) { |
760
|
|
|
$output .= " <tr><td><strong>".PMF_String::htmlspecialchars($type)."</strong></td></tr>\n"; |
761
|
|
|
foreach ($_value as $value) { |
762
|
|
|
$_output = ' <td />'; |
763
|
|
|
$_output .= ' <td><a href="'.$value['absurl'].'" target="_blank">'.PMF_String::htmlspecialchars($value['absurl'])."</a></td>\n"; |
764
|
|
|
$_output .= ' <td>'; |
765
|
|
|
if (isset($value['redirects']) && ($value['redirects'] > 0)) { |
766
|
|
|
$_redirects = "(".$value['redirects'].")"; |
767
|
|
|
} else { |
768
|
|
|
$_redirects = ""; |
769
|
|
|
} |
770
|
|
|
if ($value['valid'] === true) { |
771
|
|
|
$_classname = "urlsuccess"; |
772
|
|
|
$_output .= '<td class="'.$_classname.'">'.$PMF_LANG['ad_linkcheck_checkSuccess'].$_redirects.'</td>'; |
773
|
|
|
if ($value['reason'] != "") { |
774
|
|
|
$inforeasons[] = sprintf( |
775
|
|
|
$PMF_LANG['ad_linkcheck_openurl_infoprefix'], |
776
|
|
|
PMF_String::htmlspecialchars($value['absurl']) |
777
|
|
|
) . $value['reason']; |
778
|
|
|
} |
779
|
|
|
} else { |
780
|
|
|
$_classname = "urlfail"; |
781
|
|
|
$_output .= '<td class="'.$_classname.'">'.$PMF_LANG['ad_linkcheck_checkFailed'].'</td>'; |
782
|
|
|
if ($value['reason'] != "") { |
783
|
|
|
$failreasons[] = $value['reason']; |
784
|
|
|
} |
785
|
|
|
} |
786
|
|
|
$_output .= '</td>'; |
787
|
|
|
$output .= ' <tr class="'.$_classname.'">'."\n".$_output."\n"; |
788
|
|
|
$output .= " </tr>\n"; |
789
|
|
|
} |
790
|
|
|
} |
791
|
|
|
$output .= " </table>\n"; |
792
|
|
|
|
793
|
|
|
if (count($failreasons) > 0) { |
794
|
|
|
$output .= " <br />\n <strong>".$PMF_LANG['ad_linkcheck_failReason']."</strong>\n <ul>\n"; |
795
|
|
|
foreach ($failreasons as $reason) { |
796
|
|
|
$output .= " <li>".$reason."</li>\n"; |
797
|
|
|
} |
798
|
|
|
$output .= " </ul>\n"; |
799
|
|
|
} |
800
|
|
|
|
801
|
|
|
if (count($inforeasons) > 0) { |
802
|
|
|
$output .= " <br />\n <strong>".$PMF_LANG['ad_linkcheck_infoReason']."</strong>\n <ul>\n"; |
803
|
|
|
foreach ($inforeasons as $reason) { |
804
|
|
|
$output .= " <li>".$reason."</li>\n"; |
805
|
|
|
} |
806
|
|
|
$output .= " </ul>\n"; |
807
|
|
|
} |
808
|
|
|
|
809
|
|
|
if ($cron) { |
810
|
|
|
return ''; |
811
|
|
|
} else { |
812
|
|
|
return $output; |
813
|
|
|
} |
814
|
|
|
} |
815
|
|
|
} |
816
|
|
|
|