Test Failed
Push — master ( f47101...348e9e )
by Ricardo
02:03
created

Url::isSame()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Validate;
4
5
use Validate\Traits\FakeNameTrait;
6
7
class Url implements \Validate\Contracts\Validate
8
{
9
    use FakeNameTrait;
10
11
    public static function toDatabase(string $url)
12
    {
13
        $url = str_replace('http://', '', $url);
14
        $url = str_replace('https://', '', $url);
15
        return $url;
16
    }
17
18
    public static function toUser($url)
19
    {
20
        return 'https://'.$url;
21
    }
22
23
    public static function validate($url)
24
    {
25
        if (strpos($url, ' ') !== false) { 
26
            return false;
27
        }
28
        return true;
29
    }
30
31
    public static function break(string $url)
32
    {
33
        return self::splitUrl($url);
34
    }
35
36
    public static function isSame(string $to, string $from)
37
    {
38
        return (self::toDatabase($to)===self::toDatabase($from));
39
    }
40
41
42
    /**
43
     * Given a URL calculates the page's directory
44
     *
45
     * @params string $url target URL
46
     * @return string Directory
47
     */
48
    public function parseDir($url) {
49
        $slash = strrpos($url,'/');
50
        return substr($url,0,$slash+1);
51
    }
52
53
    /**
54
     * Link Checking Functions
55
     */
56
57
    /**
58
     * Uniformly cleans a link to avoid duplicates
59
     *
60
     * 1. Changes relative links to absolute (/bar to http://www.foo.com/bar)
61
     * 2. Removes anchor tags (foo.html#bar to foo.html)
62
     * 3. Adds trailing slash if directory (foo.com/bar to foo.com/bar/)
63
     * 4. Adds www if there is not a subdomain (foo.com to www.foo.com but not bar.foo.com)
64
     *
65
     * @params string $relativeUrl link to clean
66
     * @parmas string $baseUrl directory of parent (linking) page
67
     * @return strin cleaned link
0 ignored issues
show
Bug introduced by
The type Validate\strin was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
68
     */
69
    public function cleanLink($relativeUrl, $baseUrl) {
70
71
        $relativeUrl = self::urlToAbsolute($baseUrl, $relativeUrl); //make them absolute, not relative
72
73
        if (stripos($relativeUrl,'#') != FALSE) { 
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing stripos($relativeUrl, '#') of type integer to the boolean FALSE. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
74
            $relativeUrl = substr($relativeUrl,0,stripos($relativeUrl,'#')); //remove anchors
75
        }
76
77
        if (!preg_match('#(^http://(.*)/$)|http://(.*)/(.*)\.([A-Za-z0-9]+)|http://(.*)/([^\?\#]*)(\?|\#)([^/]*)#i',$relativeUrl)) { 
78
            $relativeUrl .= '/';
79
        }
80
81
        $relativeUrl = preg_replace('#http://([^.]+).([a-zA-z]{3})/#i','http://www.$1.$2/',$relativeUrl);
82
        return $relativeUrl;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $relativeUrl returns the type string which is incompatible with the documented return type Validate\strin.
Loading history...
83
    }
84
85
86
    /**
87
     * Performs a regular expression to see if a given link is an image
88
     *
89
     * @params string $link target link
90
     * @return bool true on image, false on anything else
91
     */
92
    public static function isImage($link) {
93
        if (preg_match('%\.(gif|jpe?g|png|bmp)$%i',$link)) return true;
94
        else return false;
95
    }
96
97
    /**
98
     * Checks to see that a given link is within the domain/host whitelist
99
     *
100
     * Improved from original to use regular expression and match hosts.
101
     *
102
     * @params string $link target link
103
     * @return bool true if out of domain, false if on domain whitelist
104
     */
105
    public static function outOfDomain($link, $domainArray) {
106
        if (!is_array($domainArray)) {
107
            $domainArray[] = $domainArray;
108
        }
109
110
        // get host name from URL
111
        preg_match("/^(http:\/\/)?([^\/]+)/i", $link, $matches);
112
        $host = $matches[2];
113
        // echo "<br />host: $host"; 
114
        // get last two segments of host name
115
        // preg_match("/[^\.\/]+\.[^\.\/]+$/", $host, $matches);
116
        foreach ($domainArray as $domain) {
117
            if ($domain == $host) {
118
                return FALSE;
119
            }
120
        }
121
        return TRUE;
122
    }
123
124
    /**
125
     * Checks to see that a given link matches a pattern in the exclude list
126
     *
127
     * @params string $link target link
128
     * @return bool true if matches exclude, false if no match
129
     */
130
    public function excludeByPattern($link, $excludedArray = []) {
131
        if (!is_array($excludedArray)) {
132
            $excludedArray[] = $excludedArray;
133
        }
134
135
        foreach ($excludedArray as $pattern) {
136
            if ( preg_match($pattern, urldecode($link)) ) {
137
                echo "<p>matched exclude pattern <b>$pattern</b> in ".urldecode($link)."</p>";
138
                return TRUE;
139
            } 
140
        }
141
        return FALSE;
142
    }
143
144
    /**
145
     * Checks to see if a given link is in fact a mailto: link
146
     *
147
     * @params string $link Link to check
148
     * @return bool true on mailto:, false on everything else
149
     */
150
    public static function isMailto($link) {
151
        if (stripos($link,'mailto:')===FALSE) return false;
152
        else return true;
153
    }
154
155
    /* Depreciated (I think)
156
157
    public function count_slashes($url) {
158
        if (strlen($url)<7) return 0;
159
        return substr_count($url,'/',7);
160
    }
161
162
    public function get_slashes($url) {
163
        if (preg_match_all('#/#',$url,$matches,PREG_OFFSET_CAPTURE,7)) return $matches[0];
164
        else return array();
165
    }
166
    */
167
168
    /**
169
     * Converts a relative URL (/bar) to an absolute URL (http://www.foo.com/bar)
170
     *
171
     * Inspired from code available at http://nadeausoftware.com/node/79, 
172
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
173
     * 
174
     * @params string $baseUrl Directory of linking page
175
     * @params string $relativeURL URL to convert to absolute
176
     * @return string Absolute URL
177
     */
178
    public static function urlToAbsolute( $baseUrl, $relativeUrl ) {
179
        // If relative URL has a scheme, clean path and return.
180
        $r = self::splitUrl( $relativeUrl );
181
        if ( $r === FALSE ) {
0 ignored issues
show
introduced by
The condition $r === FALSE is always true.
Loading history...
182
            return FALSE;
0 ignored issues
show
Bug Best Practice introduced by
The expression return FALSE returns the type false which is incompatible with the documented return type string.
Loading history...
183
        }
184
185
186
        if ( !empty( $r['scheme'] ) )
187
        {
188
            if ( !empty( $r['path'] ) && $r['path'][0] == '/' ) {
189
                $r['path'] = self::urlRemoveDotSegments( $r['path'] );
190
            }
191
192
            return self::joinUrl( $r );
193
        }
194
    
195
        // Make sure the base URL is absolute.
196
        $b = self::splitUrl( $baseUrl );
197
        if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) ) {
198
            return FALSE;
199
        }
200
201
        $r['scheme'] = $b['scheme'];
202
    
203
        // If relative URL has an authority, clean path and return.
204
        if ( isset( $r['host'] ) )
205
        {
206
            if ( !empty( $r['path'] ) ) {
207
                $r['path'] = self::urlRemoveDotSegments( $r['path'] );
208
            }
209
210
            return self::joinUrl( $r );
211
        }
212
        unset( $r['port'] );
213
        unset( $r['user'] );
214
        unset( $r['pass'] );
215
    
216
        // Copy base authority.
217
        $r['host'] = $b['host'];
218
        if ( isset( $b['port'] ) ) $r['port'] = $b['port'];
219
        if ( isset( $b['user'] ) ) $r['user'] = $b['user'];
220
        if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];
221
    
222
        // If relative URL has no path, use base path
223
        if ( empty( $r['path'] ) )
224
        {
225
            if ( !empty( $b['path'] ) ) {
226
                $r['path'] = $b['path'];
227
            }
228
229
            if ( !isset( $r['query'] ) && isset( $b['query'] ) ) {
230
                $r['query'] = $b['query'];
231
            }
232
233
            return self::joinUrl( $r );
234
        }
235
    
236
        // If relative URL path doesn't start with /, merge with base path
237
        if ( $r['path'][0] != '/' )
238
        {
239
            $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' );
240
            if ( $base === FALSE ) $base = '';
241
            $r['path'] = $base . '/' . $r['path'];
242
        }
243
        $r['path'] = self::urlRemoveDotSegments( $r['path'] );
244
        return self::joinUrl( $r );
245
    }
246
247
    /**
248
     * Required public function of URL to absolute
249
     *
250
     * Inspired from code available at http://nadeausoftware.com/node/79, 
251
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
252
     * 
253
     */
254
    public static function urlRemoveDotSegments( $path ) {
255
        // multi-byte character explode
256
        $inSegs  = preg_split( '!/!u', $path );
257
        $outSegs = array( );
258
        foreach ( $inSegs as $seg )
259
        {
260
            if ( $seg == '' || $seg == '.') {
261
                continue;
262
            }
263
            if ( $seg == '..' ) {
264
                array_pop( $outSegs );
265
            } else {
266
                array_push( $outSegs, $seg );
267
            }
268
        }
269
        $outPath = implode( '/', $outSegs );
270
        if ( $path[0] == '/' ) {
271
            $outPath = '/' . $outPath;
272
        }
273
274
        // compare last multi-byte character against '/'
275
        if ( $outPath != '/' &&
276
            (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) ) {
0 ignored issues
show
Bug introduced by
'UTF-8' of type string is incompatible with the type integer expected by parameter $offset of mb_strrpos(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

276
            (mb_strlen($path)-1) == mb_strrpos( $path, '/', /** @scrutinizer ignore-type */ 'UTF-8' ) ) {
Loading history...
277
            $outPath .= '/';
278
        }
279
280
        return $outPath;
281
    }
282
283
    /**
284
     * Required public function of URL to absolute
285
     *
286
     * Inspired from code available at http://nadeausoftware.com/node/79, 
287
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
288
     * 
289
     */
290
    public static function splitUrl( $url, $decode=TRUE )
291
    {
292
        $m = [];
293
        $xunressub     = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
294
        $xpchar        = $xunressub . ':@%';
295
296
        $xscheme       = '([a-zA-Z][a-zA-Z\d+-.]*)';
297
298
        $xuserinfo     = '((['  . $xunressub . '%]*)' .
299
                        '(:([' . $xunressub . ':%]*))?)';
300
301
        $xipv4         = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
302
303
        $xipv6         = '(\[([a-fA-F\d.:]+)\])';
304
305
        $xhost_name    = '([a-zA-Z%]+)';
306
        // $xhost_name    = '([a-zA-Z\d-.%]+)'; @todo alterado pq tava dando erro nesse parser
307
308
        $xhost         = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
309
        $xport         = '(\d*)';
310
        $xauthority    = '((' . $xuserinfo . '@)?' . $xhost .
311
                        '?(:' . $xport . ')?)';
312
313
        $xslash_seg    = '(/[' . $xpchar . ']*)';
314
        $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
315
        $xpath_rel     = '([' . $xpchar . ']+' . $xslash_seg . '*)';
316
        $xpath_abs     = '(/(' . $xpath_rel . ')?)';
317
        $xapath        = '(' . $xpath_authabs . '|' . $xpath_abs .
318
                        '|' . $xpath_rel . ')';
319
320
        $xqueryfrag    = '([' . $xpchar . '/?' . ']*)';
321
322
        $xurl          = '^(' . $xscheme . ':)?' .  $xapath . '?' .
323
                        '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
324
    
325
    
326
        // Split the URL into components.
327
        if ( !preg_match( '!' . $xurl . '!', $url, $m ) ) {
328
            return FALSE;
329
        }
330
    
331
        if ( !empty($m[2]) )        $parts['scheme']  = strtolower($m[2]);
0 ignored issues
show
Comprehensibility Best Practice introduced by
$parts was never initialized. Although not strictly required by PHP, it is generally a good practice to add $parts = array(); before regardless.
Loading history...
332
    
333
        if ( !empty($m[7]) ) {
334
            if ( isset( $m[9] ) )   $parts['user']    = $m[9];
335
            else            $parts['user']    = '';
336
        }
337
        if ( !empty($m[10]) )       $parts['pass']    = $m[11];
338
    
339
        if ( !empty($m[13]) )       $h=$parts['host'] = $m[13];
340
        else if ( !empty($m[14]) )  $parts['host']    = $m[14];
341
        else if ( !empty($m[16]) )  $parts['host']    = $m[16];
342
        else if ( !empty( $m[5] ) ) $parts['host']    = '';
343
        if ( !empty($m[17]) )       $parts['port']    = $m[18];
344
    
345
        if ( !empty($m[19]) )       $parts['path']    = $m[19];
346
        else if ( !empty($m[21]) )  $parts['path']    = $m[21];
347
        else if ( !empty($m[25]) )  $parts['path']    = $m[25];
348
    
349
        if ( !empty($m[27]) )       $parts['query']   = $m[28];
350
        if ( !empty($m[29]) )       $parts['fragment']= $m[30];
351
    
352
        if ( !$decode )
353
            return $parts;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $parts does not seem to be defined for all execution paths leading up to this point.
Loading history...
354
        if ( !empty($parts['user']) )
355
            $parts['user']     = rawurldecode( $parts['user'] );
356
        if ( !empty($parts['pass']) )
357
            $parts['pass']     = rawurldecode( $parts['pass'] );
358
        if ( !empty($parts['path']) )
359
            $parts['path']     = rawurldecode( $parts['path'] );
360
        if ( isset($h) )
361
            $parts['host']     = rawurldecode( $parts['host'] );
362
        if ( !empty($parts['query']) )
363
            $parts['query']    = rawurldecode( $parts['query'] );
364
        if ( !empty($parts['fragment']) )
365
            $parts['fragment'] = rawurldecode( $parts['fragment'] );
366
        return $parts;
367
    }
368
369
    /**
370
     * Required public function of URL to absolute
371
     *
372
     * Inspired from code available at http://nadeausoftware.com/node/79, 
373
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
374
     * 
375
     */
376
    public static function joinUrl( $parts, $encode=TRUE )
377
    {
378
        if ( $encode )
379
        {
380
            if ( isset( $parts['user'] ) ) {
381
                $parts['user']     = rawurlencode( $parts['user'] );
382
            }
383
            if ( isset( $parts['pass'] ) ) {
384
                $parts['pass']     = rawurlencode( $parts['pass'] );
385
            }
386
            if ( isset( $parts['host'] ) &&
387
                !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) ) {
388
                $parts['host']     = rawurlencode( $parts['host'] );
389
            }
390
            if ( !empty( $parts['path'] ) ) {
391
                $parts['path']     = preg_replace( '!%2F!ui', '/',
392
                    rawurlencode( $parts['path'] ) );
393
            }
394
395
            if ( isset( $parts['query'] ) ) {
396
                $parts['query']    = rawurlencode( $parts['query'] );
397
            }
398
399
            if ( isset( $parts['fragment'] ) ) {
400
                $parts['fragment'] = rawurlencode( $parts['fragment'] );
401
            }
402
        }
403
    
404
        $url = '';
405
        if ( !empty( $parts['scheme'] ) )
406
            $url .= $parts['scheme'] . ':';
407
        if ( isset( $parts['host'] ) )
408
        {
409
            $url .= '//';
410
            if ( isset( $parts['user'] ) )
411
            {
412
                $url .= $parts['user'];
413
                if ( isset( $parts['pass'] ) )
414
                    $url .= ':' . $parts['pass'];
415
                $url .= '@';
416
            }
417
            if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )
418
                $url .= '[' . $parts['host'] . ']'; // IPv6
419
            else
420
                $url .= $parts['host'];             // IPv4 or name
421
            if ( isset( $parts['port'] ) )
422
                $url .= ':' . $parts['port'];
423
            if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )
424
                $url .= '/';
425
        }
426
        if ( !empty( $parts['path'] ) )
427
            $url .= $parts['path'];
428
        if ( isset( $parts['query'] ) )
429
            $url .= '?' . $parts['query'];
430
        if ( isset( $parts['fragment'] ) )
431
            $url .= '#' . $parts['fragment'];
432
        return $url;
433
    }
434
435
}
436