Issues (42)

src/Validate/Url.php (2 issues)

1
<?php
2
3
namespace Validate;
4
5
class Url implements \Validate\Contracts\Validate
6
{
7
    public static function toDatabase(string $url)
8
    {
9
        $url = str_replace('http://', '', $url);
10
        $url = str_replace('https://', '', $url);
11
        return $url;
12
    }
13
14
    public static function toUser($url)
15
    {
16
        return 'https://'.$url;
17
    }
18
19
    public static function validate($url)
20
    {
21
        if (strpos($url, ' ') !== false) {
22
            return false;
23
        }
24
        return true;
25
    }
26
27
    public static function break(string $url)
28
    {
29
        return self::splitUrl($url);
30
    }
31
32
    public static function isSame(string $to, string $from)
33
    {
34
        return (self::toDatabase($to)===self::toDatabase($from));
35
    }
36
37
38
    /**
39
     * Given a URL calculates the page's directory
40
     *
41
     * @params string $url target URL
42
     * @return string Directory
43
     */
44
    public function parseDir($url)
45
    {
46
        $slash = strrpos($url, '/');
47
        return substr($url, 0, $slash+1);
48
    }
49
50
    /**
51
     * Link Checking Functions
52
     */
53
54
    /**
55
     * Uniformly cleans a link to avoid duplicates
56
     *
57
     * 1. Changes relative links to absolute (/bar to http://www.foo.com/bar)
58
     * 2. Removes anchor tags (foo.html#bar to foo.html)
59
     * 3. Adds trailing slash if directory (foo.com/bar to foo.com/bar/)
60
     * 4. Adds www if there is not a subdomain (foo.com to www.foo.com but not bar.foo.com)
61
     *
62
     * @params string $relativeUrl link to clean
63
     * @parmas string $baseUrl directory of parent (linking) page
64
     * @return string cleaned link
65
     */
66
    public function cleanLink($relativeUrl, $baseUrl)
67
    {
68
        $relativeUrl = self::urlToAbsolute($baseUrl, $relativeUrl); //make them absolute, not relative
69
70
        if (stripos($relativeUrl, '#') !== false) {
71
            $relativeUrl = substr($relativeUrl, 0, stripos($relativeUrl, '#')); //remove anchors
72
        }
73
74
        if (!preg_match('#(^http://(.*)/$)|http://(.*)/(.*)\.([A-Za-z0-9]+)|http://(.*)/([^\?\#]*)(\?|\#)([^/]*)#i', $relativeUrl)) {
75
            $relativeUrl .= '/';
76
        }
77
78
        $relativeUrl = preg_replace('#http://([^.]+).([a-zA-z]{3})/#i', 'http://www.$1.$2/', $relativeUrl);
79
        return $relativeUrl;
80
    }
81
82
83
    /**
84
     * Performs a regular expression to see if a given link is an image
85
     *
86
     * @params string $link target link
87
     * @return bool true on image, false on anything else
88
     */
89
    public static function isImage($link)
90
    {
91
        if (preg_match('%\.(gif|jpe?g|png|bmp)$%i', $link)) {
92
            return true;
93
        } else {
94
            return false;
95
        }
96
    }
97
98
    /**
99
     * Checks to see that a given link is within the domain/host whitelist
100
     *
101
     * Improved from original to use regular expression and match hosts.
102
     *
103
     * @params string $link target link
104
     * @return bool true if out of domain, false if on domain whitelist
105
     */
106
    public static function outOfDomain($link, $domainArray)
107
    {
108
        if (!is_array($domainArray)) {
109
            $domainArray[] = $domainArray;
110
        }
111
112
        // get host name from URL
113
        preg_match("/^(http:\/\/)?([^\/]+)/i", $link, $matches);
114
        $host = $matches[2];
115
        // echo "<br />host: $host";
116
        // get last two segments of host name
117
        // preg_match("/[^\.\/]+\.[^\.\/]+$/", $host, $matches);
118
        foreach ($domainArray as $domain) {
119
            if ($domain == $host) {
120
                return false;
121
            }
122
        }
123
        return true;
124
    }
125
126
    /**
127
     * Checks to see that a given link matches a pattern in the exclude list
128
     *
129
     * @params string $link target link
130
     * @return bool true if matches exclude, false if no match
131
     */
132
    public function excludeByPattern($link, $excludedArray = [])
133
    {
134
        if (!is_array($excludedArray)) {
135
            $excludedArray[] = $excludedArray;
136
        }
137
138
        foreach ($excludedArray as $pattern) {
139
            if (preg_match($pattern, urldecode($link))) {
140
                echo "<p>matched exclude pattern <b>$pattern</b> in ".urldecode($link)."</p>";
141
                return true;
142
            }
143
        }
144
        return false;
145
    }
146
147
    /**
148
     * Checks to see if a given link is in fact a mailto: link
149
     *
150
     * @params string $link Link to check
151
     * @return bool true on mailto:, false on everything else
152
     */
153
    public static function isMailto($link)
154
    {
155
        if (stripos($link, 'mailto:')===false) {
156
            return false;
157
        } else {
158
            return true;
159
        }
160
    }
161
162
    /* Depreciated (I think)
163
164
    public function count_slashes($url) {
165
        if (strlen($url)<7) return 0;
166
        return substr_count($url,'/',7);
167
    }
168
169
    public function get_slashes($url) {
170
        if (preg_match_all('#/#',$url,$matches,PREG_OFFSET_CAPTURE,7)) return $matches[0];
171
        else return array();
172
    }
173
    */
174
175
    /**
176
     * Converts a relative URL (/bar) to an absolute URL (http://www.foo.com/bar)
177
     *
178
     * Inspired from code available at http://nadeausoftware.com/node/79,
179
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
180
     *
181
     * @params string $baseUrl Directory of linking page
182
     * @params string $relativeURL URL to convert to absolute
183
     * @return string Absolute URL
184
     */
185
    public static function urlToAbsolute($baseUrl, $relativeUrl)
186
    {
187
        // If relative URL has a scheme, clean path and return.
188
        if (!$r = self::splitUrl($relativeUrl)) {
189
            return false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return false returns the type false which is incompatible with the documented return type string.
Loading history...
190
        }
191
192
193
        if (!empty($r['scheme'])) {
194
            if (!empty($r['path']) && $r['path'][0] == '/') {
195
                $r['path'] = self::urlRemoveDotSegments($r['path']);
196
            }
197
198
            return self::joinUrl($r);
199
        }
200
    
201
        // Make sure the base URL is absolute.
202
        $b = self::splitUrl($baseUrl);
203
        if ($b === false || empty($b['scheme']) || empty($b['host'])) {
204
            return false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return false returns the type false which is incompatible with the documented return type string.
Loading history...
205
        }
206
207
        $r['scheme'] = $b['scheme'];
208
    
209
        // If relative URL has an authority, clean path and return.
210
        if (isset($r['host'])) {
211
            if (!empty($r['path'])) {
212
                $r['path'] = self::urlRemoveDotSegments($r['path']);
213
            }
214
215
            return self::joinUrl($r);
216
        }
217
        unset($r['port']);
218
        unset($r['user']);
219
        unset($r['pass']);
220
    
221
        // Copy base authority.
222
        $r['host'] = $b['host'];
223
        if (isset($b['port'])) {
224
            $r['port'] = $b['port'];
225
        }
226
        if (isset($b['user'])) {
227
            $r['user'] = $b['user'];
228
        }
229
        if (isset($b['pass'])) {
230
            $r['pass'] = $b['pass'];
231
        }
232
    
233
        // If relative URL has no path, use base path
234
        if (empty($r['path'])) {
235
            if (!empty($b['path'])) {
236
                $r['path'] = $b['path'];
237
            }
238
239
            if (!isset($r['query']) && isset($b['query'])) {
240
                $r['query'] = $b['query'];
241
            }
242
243
            return self::joinUrl($r);
244
        }
245
    
246
        // If relative URL path doesn't start with /, merge with base path
247
        if ($r['path'][0] != '/') {
248
            $base = mb_strrchr($b['path'], '/', true, 'UTF-8');
249
            if ($base === false) {
250
                $base = '';
251
            }
252
            $r['path'] = $base . '/' . $r['path'];
253
        }
254
        $r['path'] = self::urlRemoveDotSegments($r['path']);
255
        return self::joinUrl($r);
256
    }
257
258
    /**
259
     * Required public function of URL to absolute
260
     *
261
     * Inspired from code available at http://nadeausoftware.com/node/79,
262
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
263
     */
264
    public static function urlRemoveDotSegments($path)
265
    {
266
        // multi-byte character explode
267
        $inSegs  = preg_split('!/!u', $path);
268
        $outSegs = array( );
269
        foreach ($inSegs as $seg) {
270
            if ($seg == '' || $seg == '.') {
271
                continue;
272
            }
273
            if ($seg == '..') {
274
                array_pop($outSegs);
275
            } else {
276
                array_push($outSegs, $seg);
277
            }
278
        }
279
        $outPath = implode('/', $outSegs);
280
        if ($path[0] == '/') {
281
            $outPath = '/' . $outPath;
282
        }
283
284
        // compare last multi-byte character against '/'
285
        if ($outPath != '/' 
286
            && (mb_strlen($path)-1) == mb_strrpos($path, '/')
287
        ) {
288
            $outPath .= '/';
289
        }
290
291
        return $outPath;
292
    }
293
294
    /**
295
     * Required public function of URL to absolute
296
     *
297
     * Inspired from code available at http://nadeausoftware.com/node/79,
298
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
299
     */
300
    public static function splitUrl(string $url, $decode=true)
301
    {
302
        $parts = [];
303
        $m = [];
304
        $xunressub     = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
305
        $xpchar        = $xunressub . ':@%';
306
307
        $xscheme       = '([a-zA-Z][a-zA-Z\d+-.]*)';
308
309
        $xuserinfo     = '((['  . $xunressub . '%]*)' .
310
                        '(:([' . $xunressub . ':%]*))?)';
311
312
        $xipv4         = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
313
314
        $xipv6         = '(\[([a-fA-F\d.:]+)\])';
315
316
        $xhost_name    = '([a-zA-Z%]+)';
317
        // $xhost_name    = '([a-zA-Z\d-.%]+)'; @todo alterado pq tava dando erro nesse parser
318
319
        $xhost         = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
320
        $xport         = '(\d*)';
321
        $xauthority    = '((' . $xuserinfo . '@)?' . $xhost .
322
                        '?(:' . $xport . ')?)';
323
324
        $xslash_seg    = '(/[' . $xpchar . ']*)';
325
        $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
326
        $xpath_rel     = '([' . $xpchar . ']+' . $xslash_seg . '*)';
327
        $xpath_abs     = '(/(' . $xpath_rel . ')?)';
328
        $xapath        = '(' . $xpath_authabs . '|' . $xpath_abs .
329
                        '|' . $xpath_rel . ')';
330
331
        $xqueryfrag    = '([' . $xpchar . '/?' . ']*)';
332
333
        $xurl          = '^(' . $xscheme . ':)?' .  $xapath . '?' .
334
                        '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
335
    
336
    
337
        // Split the URL into components.
338
        if (!preg_match('!' . $xurl . '!', $url, $m)) {
339
            return false;
340
        }
341
    
342
        if (!empty($m[2])) {
343
            $parts['scheme']  = strtolower($m[2]);
344
        }
345
    
346
        if (!empty($m[7])) {
347
            if (isset($m[9])) {
348
                $parts['user']    = $m[9];
349
            } else {
350
                $parts['user']    = '';
351
            }
352
        }
353
        if (!empty($m[10])) {
354
            $parts['pass']    = $m[11];
355
        }
356
    
357
        if (!empty($m[13])) {
358
            $h=$parts['host'] = $m[13];
359
        } elseif (!empty($m[14])) {
360
            $parts['host']    = $m[14];
361
        } elseif (!empty($m[16])) {
362
            $parts['host']    = $m[16];
363
        } elseif (!empty($m[5])) {
364
            $parts['host']    = '';
365
        }
366
        if (!empty($m[17])) {
367
            $parts['port']    = $m[18];
368
        }
369
    
370
        if (!empty($m[19])) {
371
            $parts['path']    = $m[19];
372
        } elseif (!empty($m[21])) {
373
            $parts['path']    = $m[21];
374
        } elseif (!empty($m[25])) {
375
            $parts['path']    = $m[25];
376
        }
377
    
378
        if (!empty($m[27])) {
379
            $parts['query']   = $m[28];
380
        }
381
        if (!empty($m[29])) {
382
            $parts['fragment']= $m[30];
383
        }
384
    
385
        if (!$decode) {
386
            return $parts;
387
        }
388
        if (!empty($parts['user'])) {
389
            $parts['user']     = rawurldecode($parts['user']);
390
        }
391
        if (!empty($parts['pass'])) {
392
            $parts['pass']     = rawurldecode($parts['pass']);
393
        }
394
        if (!empty($parts['path'])) {
395
            $parts['path']     = rawurldecode($parts['path']);
396
        }
397
        if (isset($h)) {
398
            $parts['host']     = rawurldecode($parts['host']);
399
        }
400
        if (!empty($parts['query'])) {
401
            $parts['query']    = rawurldecode($parts['query']);
402
        }
403
        if (!empty($parts['fragment'])) {
404
            $parts['fragment'] = rawurldecode($parts['fragment']);
405
        }
406
        return $parts;
407
    }
408
409
    /**
410
     * Required public function of URL to absolute
411
     *
412
     * Inspired from code available at http://nadeausoftware.com/node/79,
413
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
414
     */
415
    public static function joinUrl($parts, $encode=true)
416
    {
417
        if ($encode) {
418
            if (isset($parts['user'])) {
419
                $parts['user']     = rawurlencode($parts['user']);
420
            }
421
            if (isset($parts['pass'])) {
422
                $parts['pass']     = rawurlencode($parts['pass']);
423
            }
424
            if (isset($parts['host']) 
425
                && !preg_match('!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'])
426
            ) {
427
                $parts['host']     = rawurlencode($parts['host']);
428
            }
429
            if (!empty($parts['path'])) {
430
                $parts['path']     = preg_replace(
431
                    '!%2F!ui',
432
                    '/',
433
                    rawurlencode($parts['path'])
434
                );
435
            }
436
437
            if (isset($parts['query'])) {
438
                $parts['query']    = rawurlencode($parts['query']);
439
            }
440
441
            if (isset($parts['fragment'])) {
442
                $parts['fragment'] = rawurlencode($parts['fragment']);
443
            }
444
        }
445
    
446
        $url = '';
447
        if (!empty($parts['scheme'])) {
448
            $url .= $parts['scheme'] . ':';
449
        }
450
        if (isset($parts['host'])) {
451
            $url .= '//';
452
            if (isset($parts['user'])) {
453
                $url .= $parts['user'];
454
                if (isset($parts['pass'])) {
455
                    $url .= ':' . $parts['pass'];
456
                }
457
                $url .= '@';
458
            }
459
            if (preg_match('!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'])) {
460
                $url .= '[' . $parts['host'] . ']';
461
            } // IPv6
462
            else {
463
                $url .= $parts['host'];
464
            }             // IPv4 or name
465
            if (isset($parts['port'])) {
466
                $url .= ':' . $parts['port'];
467
            }
468
            if (!empty($parts['path']) && $parts['path'][0] != '/') {
469
                $url .= '/';
470
            }
471
        }
472
        if (!empty($parts['path'])) {
473
            $url .= $parts['path'];
474
        }
475
        if (isset($parts['query'])) {
476
            $url .= '?' . $parts['query'];
477
        }
478
        if (isset($parts['fragment'])) {
479
            $url .= '#' . $parts['fragment'];
480
        }
481
        return $url;
482
    }
483
}
484