1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Validate; |
4
|
|
|
|
5
|
|
|
use Validate\Traits\FakeNameTrait; |
6
|
|
|
|
7
|
|
|
class Url implements \Validate\Contracts\Validate |
8
|
|
|
{ |
9
|
|
|
use FakeNameTrait; |
10
|
|
|
|
11
|
|
|
public static function toDatabase(string $url) |
12
|
|
|
{ |
13
|
|
|
$url = str_replace('http://', '', $url); |
14
|
|
|
$url = str_replace('https://', '', $url); |
15
|
|
|
return $url; |
16
|
|
|
} |
17
|
|
|
|
18
|
|
|
public static function toUser($url) |
19
|
|
|
{ |
20
|
|
|
return 'https://'.$url; |
21
|
|
|
} |
22
|
|
|
|
23
|
|
|
public static function validate($url) |
24
|
|
|
{ |
25
|
|
|
if (strpos($url, ' ') !== false) { |
26
|
|
|
return false; |
27
|
|
|
} |
28
|
|
|
return true; |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
public static function break(string $url) |
32
|
|
|
{ |
33
|
|
|
return self::splitUrl($url); |
34
|
|
|
} |
35
|
|
|
|
36
|
|
|
public static function isSame(string $to, string $from) |
37
|
|
|
{ |
38
|
|
|
return (self::toDatabase($to)===self::toDatabase($from)); |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Given a URL calculates the page's directory |
44
|
|
|
* |
45
|
|
|
* @params string $url target URL |
46
|
|
|
* @return string Directory |
47
|
|
|
*/ |
48
|
|
|
public function parseDir($url) { |
49
|
|
|
$slash = strrpos($url,'/'); |
50
|
|
|
return substr($url,0,$slash+1); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Link Checking Functions |
55
|
|
|
*/ |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Uniformly cleans a link to avoid duplicates |
59
|
|
|
* |
60
|
|
|
* 1. Changes relative links to absolute (/bar to http://www.foo.com/bar) |
61
|
|
|
* 2. Removes anchor tags (foo.html#bar to foo.html) |
62
|
|
|
* 3. Adds trailing slash if directory (foo.com/bar to foo.com/bar/) |
63
|
|
|
* 4. Adds www if there is not a subdomain (foo.com to www.foo.com but not bar.foo.com) |
64
|
|
|
* |
65
|
|
|
* @params string $relativeUrl link to clean |
66
|
|
|
* @parmas string $baseUrl directory of parent (linking) page |
67
|
|
|
* @return strin cleaned link |
|
|
|
|
68
|
|
|
*/ |
69
|
|
|
public function cleanLink($relativeUrl, $baseUrl) { |
70
|
|
|
|
71
|
|
|
$relativeUrl = self::urlToAbsolute($baseUrl, $relativeUrl); //make them absolute, not relative |
72
|
|
|
|
73
|
|
|
if (stripos($relativeUrl,'#') != FALSE) { |
|
|
|
|
74
|
|
|
$relativeUrl = substr($relativeUrl,0,stripos($relativeUrl,'#')); //remove anchors |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
if (!preg_match('#(^http://(.*)/$)|http://(.*)/(.*)\.([A-Za-z0-9]+)|http://(.*)/([^\?\#]*)(\?|\#)([^/]*)#i',$relativeUrl)) { |
78
|
|
|
$relativeUrl .= '/'; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
$relativeUrl = preg_replace('#http://([^.]+).([a-zA-z]{3})/#i','http://www.$1.$2/',$relativeUrl); |
82
|
|
|
return $relativeUrl; |
|
|
|
|
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
|
86
|
|
|
/** |
87
|
|
|
* Performs a regular expression to see if a given link is an image |
88
|
|
|
* |
89
|
|
|
* @params string $link target link |
90
|
|
|
* @return bool true on image, false on anything else |
91
|
|
|
*/ |
92
|
|
|
public static function isImage($link) { |
93
|
|
|
if (preg_match('%\.(gif|jpe?g|png|bmp)$%i',$link)) return true; |
94
|
|
|
else return false; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Checks to see that a given link is within the domain/host whitelist |
99
|
|
|
* |
100
|
|
|
* Improved from original to use regular expression and match hosts. |
101
|
|
|
* |
102
|
|
|
* @params string $link target link |
103
|
|
|
* @return bool true if out of domain, false if on domain whitelist |
104
|
|
|
*/ |
105
|
|
|
public static function outOfDomain($link, $domainArray) { |
106
|
|
|
if (!is_array($domainArray)) { |
107
|
|
|
$domainArray[] = $domainArray; |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
// get host name from URL |
111
|
|
|
preg_match("/^(http:\/\/)?([^\/]+)/i", $link, $matches); |
112
|
|
|
$host = $matches[2]; |
113
|
|
|
// echo "<br />host: $host"; |
114
|
|
|
// get last two segments of host name |
115
|
|
|
// preg_match("/[^\.\/]+\.[^\.\/]+$/", $host, $matches); |
116
|
|
|
foreach ($domainArray as $domain) { |
117
|
|
|
if ($domain == $host) { |
118
|
|
|
return FALSE; |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
return TRUE; |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* Checks to see that a given link matches a pattern in the exclude list |
126
|
|
|
* |
127
|
|
|
* @params string $link target link |
128
|
|
|
* @return bool true if matches exclude, false if no match |
129
|
|
|
*/ |
130
|
|
|
public function excludeByPattern($link, $excludedArray = []) { |
131
|
|
|
if (!is_array($excludedArray)) { |
132
|
|
|
$excludedArray[] = $excludedArray; |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
foreach ($excludedArray as $pattern) { |
136
|
|
|
if ( preg_match($pattern, urldecode($link)) ) { |
137
|
|
|
echo "<p>matched exclude pattern <b>$pattern</b> in ".urldecode($link)."</p>"; |
138
|
|
|
return TRUE; |
139
|
|
|
} |
140
|
|
|
} |
141
|
|
|
return FALSE; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* Checks to see if a given link is in fact a mailto: link |
146
|
|
|
* |
147
|
|
|
* @params string $link Link to check |
148
|
|
|
* @return bool true on mailto:, false on everything else |
149
|
|
|
*/ |
150
|
|
|
public static function isMailto($link) { |
151
|
|
|
if (stripos($link,'mailto:')===FALSE) return false; |
152
|
|
|
else return true; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
/* Depreciated (I think) |
156
|
|
|
|
157
|
|
|
public function count_slashes($url) { |
158
|
|
|
if (strlen($url)<7) return 0; |
159
|
|
|
return substr_count($url,'/',7); |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
public function get_slashes($url) { |
163
|
|
|
if (preg_match_all('#/#',$url,$matches,PREG_OFFSET_CAPTURE,7)) return $matches[0]; |
164
|
|
|
else return array(); |
165
|
|
|
} |
166
|
|
|
*/ |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* Converts a relative URL (/bar) to an absolute URL (http://www.foo.com/bar) |
170
|
|
|
* |
171
|
|
|
* Inspired from code available at http://nadeausoftware.com/node/79, |
172
|
|
|
* Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php) |
173
|
|
|
* |
174
|
|
|
* @params string $baseUrl Directory of linking page |
175
|
|
|
* @params string $relativeURL URL to convert to absolute |
176
|
|
|
* @return string Absolute URL |
177
|
|
|
*/ |
178
|
|
|
public static function urlToAbsolute( $baseUrl, $relativeUrl ) { |
179
|
|
|
// If relative URL has a scheme, clean path and return. |
180
|
|
|
$r = self::splitUrl( $relativeUrl ); |
181
|
|
|
if ( $r === FALSE ) { |
|
|
|
|
182
|
|
|
return FALSE; |
|
|
|
|
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
|
186
|
|
|
if ( !empty( $r['scheme'] ) ) |
187
|
|
|
{ |
188
|
|
|
if ( !empty( $r['path'] ) && $r['path'][0] == '/' ) { |
189
|
|
|
$r['path'] = self::urlRemoveDotSegments( $r['path'] ); |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
return self::joinUrl( $r ); |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
// Make sure the base URL is absolute. |
196
|
|
|
$b = self::splitUrl( $baseUrl ); |
197
|
|
|
if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) ) { |
198
|
|
|
return FALSE; |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
$r['scheme'] = $b['scheme']; |
202
|
|
|
|
203
|
|
|
// If relative URL has an authority, clean path and return. |
204
|
|
|
if ( isset( $r['host'] ) ) |
205
|
|
|
{ |
206
|
|
|
if ( !empty( $r['path'] ) ) { |
207
|
|
|
$r['path'] = self::urlRemoveDotSegments( $r['path'] ); |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
return self::joinUrl( $r ); |
211
|
|
|
} |
212
|
|
|
unset( $r['port'] ); |
213
|
|
|
unset( $r['user'] ); |
214
|
|
|
unset( $r['pass'] ); |
215
|
|
|
|
216
|
|
|
// Copy base authority. |
217
|
|
|
$r['host'] = $b['host']; |
218
|
|
|
if ( isset( $b['port'] ) ) $r['port'] = $b['port']; |
219
|
|
|
if ( isset( $b['user'] ) ) $r['user'] = $b['user']; |
220
|
|
|
if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass']; |
221
|
|
|
|
222
|
|
|
// If relative URL has no path, use base path |
223
|
|
|
if ( empty( $r['path'] ) ) |
224
|
|
|
{ |
225
|
|
|
if ( !empty( $b['path'] ) ) { |
226
|
|
|
$r['path'] = $b['path']; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
if ( !isset( $r['query'] ) && isset( $b['query'] ) ) { |
230
|
|
|
$r['query'] = $b['query']; |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
return self::joinUrl( $r ); |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
// If relative URL path doesn't start with /, merge with base path |
237
|
|
|
if ( $r['path'][0] != '/' ) |
238
|
|
|
{ |
239
|
|
|
$base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' ); |
240
|
|
|
if ( $base === FALSE ) $base = ''; |
241
|
|
|
$r['path'] = $base . '/' . $r['path']; |
242
|
|
|
} |
243
|
|
|
$r['path'] = self::urlRemoveDotSegments( $r['path'] ); |
244
|
|
|
return self::joinUrl( $r ); |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
/** |
248
|
|
|
* Required public function of URL to absolute |
249
|
|
|
* |
250
|
|
|
* Inspired from code available at http://nadeausoftware.com/node/79, |
251
|
|
|
* Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php) |
252
|
|
|
* |
253
|
|
|
*/ |
254
|
|
|
public static function urlRemoveDotSegments( $path ) { |
255
|
|
|
// multi-byte character explode |
256
|
|
|
$inSegs = preg_split( '!/!u', $path ); |
257
|
|
|
$outSegs = array( ); |
258
|
|
|
foreach ( $inSegs as $seg ) |
259
|
|
|
{ |
260
|
|
|
if ( $seg == '' || $seg == '.') { |
261
|
|
|
continue; |
262
|
|
|
} |
263
|
|
|
if ( $seg == '..' ) { |
264
|
|
|
array_pop( $outSegs ); |
265
|
|
|
} else { |
266
|
|
|
array_push( $outSegs, $seg ); |
267
|
|
|
} |
268
|
|
|
} |
269
|
|
|
$outPath = implode( '/', $outSegs ); |
270
|
|
|
if ( $path[0] == '/' ) { |
271
|
|
|
$outPath = '/' . $outPath; |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
// compare last multi-byte character against '/' |
275
|
|
|
if ( $outPath != '/' && |
276
|
|
|
(mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) ) { |
|
|
|
|
277
|
|
|
$outPath .= '/'; |
278
|
|
|
} |
279
|
|
|
|
280
|
|
|
return $outPath; |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Required public function of URL to absolute |
285
|
|
|
* |
286
|
|
|
* Inspired from code available at http://nadeausoftware.com/node/79, |
287
|
|
|
* Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php) |
288
|
|
|
* |
289
|
|
|
*/ |
290
|
|
|
public static function splitUrl( $url, $decode=TRUE ) |
291
|
|
|
{ |
292
|
|
|
$m = []; |
293
|
|
|
$xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;='; |
294
|
|
|
$xpchar = $xunressub . ':@%'; |
295
|
|
|
|
296
|
|
|
$xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)'; |
297
|
|
|
|
298
|
|
|
$xuserinfo = '(([' . $xunressub . '%]*)' . |
299
|
|
|
'(:([' . $xunressub . ':%]*))?)'; |
300
|
|
|
|
301
|
|
|
$xipv4 = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'; |
302
|
|
|
|
303
|
|
|
$xipv6 = '(\[([a-fA-F\d.:]+)\])'; |
304
|
|
|
|
305
|
|
|
$xhost_name = '([a-zA-Z%]+)'; |
306
|
|
|
// $xhost_name = '([a-zA-Z\d-.%]+)'; @todo alterado pq tava dando erro nesse parser |
307
|
|
|
|
308
|
|
|
$xhost = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')'; |
309
|
|
|
$xport = '(\d*)'; |
310
|
|
|
$xauthority = '((' . $xuserinfo . '@)?' . $xhost . |
311
|
|
|
'?(:' . $xport . ')?)'; |
312
|
|
|
|
313
|
|
|
$xslash_seg = '(/[' . $xpchar . ']*)'; |
314
|
|
|
$xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))'; |
315
|
|
|
$xpath_rel = '([' . $xpchar . ']+' . $xslash_seg . '*)'; |
316
|
|
|
$xpath_abs = '(/(' . $xpath_rel . ')?)'; |
317
|
|
|
$xapath = '(' . $xpath_authabs . '|' . $xpath_abs . |
318
|
|
|
'|' . $xpath_rel . ')'; |
319
|
|
|
|
320
|
|
|
$xqueryfrag = '([' . $xpchar . '/?' . ']*)'; |
321
|
|
|
|
322
|
|
|
$xurl = '^(' . $xscheme . ':)?' . $xapath . '?' . |
323
|
|
|
'(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$'; |
324
|
|
|
|
325
|
|
|
|
326
|
|
|
// Split the URL into components. |
327
|
|
|
if ( !preg_match( '!' . $xurl . '!', $url, $m ) ) { |
328
|
|
|
return FALSE; |
329
|
|
|
} |
330
|
|
|
|
331
|
|
|
if ( !empty($m[2]) ) $parts['scheme'] = strtolower($m[2]); |
|
|
|
|
332
|
|
|
|
333
|
|
|
if ( !empty($m[7]) ) { |
334
|
|
|
if ( isset( $m[9] ) ) $parts['user'] = $m[9]; |
335
|
|
|
else $parts['user'] = ''; |
336
|
|
|
} |
337
|
|
|
if ( !empty($m[10]) ) $parts['pass'] = $m[11]; |
338
|
|
|
|
339
|
|
|
if ( !empty($m[13]) ) $h=$parts['host'] = $m[13]; |
340
|
|
|
else if ( !empty($m[14]) ) $parts['host'] = $m[14]; |
341
|
|
|
else if ( !empty($m[16]) ) $parts['host'] = $m[16]; |
342
|
|
|
else if ( !empty( $m[5] ) ) $parts['host'] = ''; |
343
|
|
|
if ( !empty($m[17]) ) $parts['port'] = $m[18]; |
344
|
|
|
|
345
|
|
|
if ( !empty($m[19]) ) $parts['path'] = $m[19]; |
346
|
|
|
else if ( !empty($m[21]) ) $parts['path'] = $m[21]; |
347
|
|
|
else if ( !empty($m[25]) ) $parts['path'] = $m[25]; |
348
|
|
|
|
349
|
|
|
if ( !empty($m[27]) ) $parts['query'] = $m[28]; |
350
|
|
|
if ( !empty($m[29]) ) $parts['fragment']= $m[30]; |
351
|
|
|
|
352
|
|
|
if ( !$decode ) |
353
|
|
|
return $parts; |
|
|
|
|
354
|
|
|
if ( !empty($parts['user']) ) |
355
|
|
|
$parts['user'] = rawurldecode( $parts['user'] ); |
356
|
|
|
if ( !empty($parts['pass']) ) |
357
|
|
|
$parts['pass'] = rawurldecode( $parts['pass'] ); |
358
|
|
|
if ( !empty($parts['path']) ) |
359
|
|
|
$parts['path'] = rawurldecode( $parts['path'] ); |
360
|
|
|
if ( isset($h) ) |
361
|
|
|
$parts['host'] = rawurldecode( $parts['host'] ); |
362
|
|
|
if ( !empty($parts['query']) ) |
363
|
|
|
$parts['query'] = rawurldecode( $parts['query'] ); |
364
|
|
|
if ( !empty($parts['fragment']) ) |
365
|
|
|
$parts['fragment'] = rawurldecode( $parts['fragment'] ); |
366
|
|
|
return $parts; |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
/** |
370
|
|
|
* Required public function of URL to absolute |
371
|
|
|
* |
372
|
|
|
* Inspired from code available at http://nadeausoftware.com/node/79, |
373
|
|
|
* Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php) |
374
|
|
|
* |
375
|
|
|
*/ |
376
|
|
|
public static function joinUrl( $parts, $encode=TRUE ) |
377
|
|
|
{ |
378
|
|
|
if ( $encode ) |
379
|
|
|
{ |
380
|
|
|
if ( isset( $parts['user'] ) ) { |
381
|
|
|
$parts['user'] = rawurlencode( $parts['user'] ); |
382
|
|
|
} |
383
|
|
|
if ( isset( $parts['pass'] ) ) { |
384
|
|
|
$parts['pass'] = rawurlencode( $parts['pass'] ); |
385
|
|
|
} |
386
|
|
|
if ( isset( $parts['host'] ) && |
387
|
|
|
!preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) ) { |
388
|
|
|
$parts['host'] = rawurlencode( $parts['host'] ); |
389
|
|
|
} |
390
|
|
|
if ( !empty( $parts['path'] ) ) { |
391
|
|
|
$parts['path'] = preg_replace( '!%2F!ui', '/', |
392
|
|
|
rawurlencode( $parts['path'] ) ); |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
if ( isset( $parts['query'] ) ) { |
396
|
|
|
$parts['query'] = rawurlencode( $parts['query'] ); |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
if ( isset( $parts['fragment'] ) ) { |
400
|
|
|
$parts['fragment'] = rawurlencode( $parts['fragment'] ); |
401
|
|
|
} |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
$url = ''; |
405
|
|
|
if ( !empty( $parts['scheme'] ) ) |
406
|
|
|
$url .= $parts['scheme'] . ':'; |
407
|
|
|
if ( isset( $parts['host'] ) ) |
408
|
|
|
{ |
409
|
|
|
$url .= '//'; |
410
|
|
|
if ( isset( $parts['user'] ) ) |
411
|
|
|
{ |
412
|
|
|
$url .= $parts['user']; |
413
|
|
|
if ( isset( $parts['pass'] ) ) |
414
|
|
|
$url .= ':' . $parts['pass']; |
415
|
|
|
$url .= '@'; |
416
|
|
|
} |
417
|
|
|
if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) ) |
418
|
|
|
$url .= '[' . $parts['host'] . ']'; // IPv6 |
419
|
|
|
else |
420
|
|
|
$url .= $parts['host']; // IPv4 or name |
421
|
|
|
if ( isset( $parts['port'] ) ) |
422
|
|
|
$url .= ':' . $parts['port']; |
423
|
|
|
if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' ) |
424
|
|
|
$url .= '/'; |
425
|
|
|
} |
426
|
|
|
if ( !empty( $parts['path'] ) ) |
427
|
|
|
$url .= $parts['path']; |
428
|
|
|
if ( isset( $parts['query'] ) ) |
429
|
|
|
$url .= '?' . $parts['query']; |
430
|
|
|
if ( isset( $parts['fragment'] ) ) |
431
|
|
|
$url .= '#' . $parts['fragment']; |
432
|
|
|
return $url; |
433
|
|
|
} |
434
|
|
|
|
435
|
|
|
} |
436
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths