Test Failed
Push — master ( c6c6a6...f47101 )
by Ricardo
02:04
created

Url::excludeByPattern()   A

Complexity

Conditions 4
Paths 6

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 7
nc 6
nop 2
dl 0
loc 12
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Validate;
4
5
use Validate\Traits\FakeNameTrait;
6
7
class Url implements \Validate\Contracts\Validate
8
{
9
    use FakeNameTrait;
10
11
    public static function toDatabase(string $url)
12
    {
13
        $url = str_replace('http://', '', $url);
14
        $url = str_replace('https://', '', $url);
15
        return $url;
16
    }
17
18
    public static function toUser($url)
19
    {
20
        return 'https://'.$url;
21
    }
22
23
    public static function validate($url)
24
    {
25
        if (strpos($url, ' ') !== false) { 
26
            return false;
27
        }
28
        return true;
29
    }
30
31
    public static function break(string $url)
32
    {
33
        return self::splitUrl($url);
34
    }
35
36
37
    /**
38
     * Given a URL calculates the page's directory
39
     *
40
     * @params string $url target URL
41
     * @return string Directory
42
     */
43
    public function parseDir($url) {
44
        $slash = strrpos($url,'/');
45
        return substr($url,0,$slash+1);
46
    }
47
48
    /**
49
     * Link Checking Functions
50
     */
51
52
    /**
53
     * Uniformly cleans a link to avoid duplicates
54
     *
55
     * 1. Changes relative links to absolute (/bar to http://www.foo.com/bar)
56
     * 2. Removes anchor tags (foo.html#bar to foo.html)
57
     * 3. Adds trailing slash if directory (foo.com/bar to foo.com/bar/)
58
     * 4. Adds www if there is not a subdomain (foo.com to www.foo.com but not bar.foo.com)
59
     *
60
     * @params string $relativeUrl link to clean
61
     * @parmas string $baseUrl directory of parent (linking) page
62
     * @return strin cleaned link
0 ignored issues
show
Bug introduced by
The type Validate\strin was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
63
     */
64
    public function cleanLink($relativeUrl, $baseUrl) {
65
66
        $relativeUrl = self::urlToAbsolute($baseUrl, $relativeUrl); //make them absolute, not relative
67
68
        if (stripos($relativeUrl,'#') != FALSE) { 
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing stripos($relativeUrl, '#') of type integer to the boolean FALSE. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
69
            $relativeUrl = substr($relativeUrl,0,stripos($relativeUrl,'#')); //remove anchors
70
        }
71
72
        if (!preg_match('#(^http://(.*)/$)|http://(.*)/(.*)\.([A-Za-z0-9]+)|http://(.*)/([^\?\#]*)(\?|\#)([^/]*)#i',$relativeUrl)) { 
73
            $relativeUrl .= '/';
74
        }
75
76
        $relativeUrl = preg_replace('#http://([^.]+).([a-zA-z]{3})/#i','http://www.$1.$2/',$relativeUrl);
77
        return $relativeUrl;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $relativeUrl returns the type string which is incompatible with the documented return type Validate\strin.
Loading history...
78
    }
79
80
81
    /**
82
     * Performs a regular expression to see if a given link is an image
83
     *
84
     * @params string $link target link
85
     * @return bool true on image, false on anything else
86
     */
87
    public static function isImage($link) {
88
        if (preg_match('%\.(gif|jpe?g|png|bmp)$%i',$link)) return true;
89
        else return false;
90
    }
91
92
    /**
93
     * Checks to see that a given link is within the domain/host whitelist
94
     *
95
     * Improved from original to use regular expression and match hosts.
96
     *
97
     * @params string $link target link
98
     * @return bool true if out of domain, false if on domain whitelist
99
     */
100
    public static function outOfDomain($link, $domainArray) {
101
        if (!is_array($domainArray)) {
102
            $domainArray[] = $domainArray;
103
        }
104
105
        // get host name from URL
106
        preg_match("/^(http:\/\/)?([^\/]+)/i", $link, $matches);
107
        $host = $matches[2];
108
        // echo "<br />host: $host"; 
109
        // get last two segments of host name
110
        // preg_match("/[^\.\/]+\.[^\.\/]+$/", $host, $matches);
111
        foreach ($domainArray as $domain) {
112
            if ($domain == $host) {
113
                return FALSE;
114
            }
115
        }
116
        return TRUE;
117
    }
118
119
    /**
120
     * Checks to see that a given link matches a pattern in the exclude list
121
     *
122
     * @params string $link target link
123
     * @return bool true if matches exclude, false if no match
124
     */
125
    public function excludeByPattern($link, $excludedArray = []) {
126
        if (!is_array($excludedArray)) {
127
            $excludedArray[] = $excludedArray;
128
        }
129
130
        foreach ($excludedArray as $pattern) {
131
            if ( preg_match($pattern, urldecode($link)) ) {
132
                echo "<p>matched exclude pattern <b>$pattern</b> in ".urldecode($link)."</p>";
133
                return TRUE;
134
            } 
135
        }
136
        return FALSE;
137
    }
138
139
    /**
140
     * Checks to see if a given link is in fact a mailto: link
141
     *
142
     * @params string $link Link to check
143
     * @return bool true on mailto:, false on everything else
144
     */
145
    public static function isMailto($link) {
146
        if (stripos($link,'mailto:')===FALSE) return false;
147
        else return true;
148
    }
149
150
    /* Depreciated (I think)
151
152
    public function count_slashes($url) {
153
        if (strlen($url)<7) return 0;
154
        return substr_count($url,'/',7);
155
    }
156
157
    public function get_slashes($url) {
158
        if (preg_match_all('#/#',$url,$matches,PREG_OFFSET_CAPTURE,7)) return $matches[0];
159
        else return array();
160
    }
161
    */
162
163
    /**
164
     * Converts a relative URL (/bar) to an absolute URL (http://www.foo.com/bar)
165
     *
166
     * Inspired from code available at http://nadeausoftware.com/node/79, 
167
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
168
     * 
169
     * @params string $baseUrl Directory of linking page
170
     * @params string $relativeURL URL to convert to absolute
171
     * @return string Absolute URL
172
     */
173
    public static function urlToAbsolute( $baseUrl, $relativeUrl ) {
174
        // If relative URL has a scheme, clean path and return.
175
        $r = self::splitUrl( $relativeUrl );
176
        if ( $r === FALSE ) {
0 ignored issues
show
introduced by
The condition $r === FALSE is always true.
Loading history...
177
            return FALSE;
0 ignored issues
show
Bug Best Practice introduced by
The expression return FALSE returns the type false which is incompatible with the documented return type string.
Loading history...
178
        }
179
180
181
        if ( !empty( $r['scheme'] ) )
182
        {
183
            if ( !empty( $r['path'] ) && $r['path'][0] == '/' ) {
184
                $r['path'] = self::urlRemoveDotSegments( $r['path'] );
185
            }
186
187
            return self::joinUrl( $r );
188
        }
189
    
190
        // Make sure the base URL is absolute.
191
        $b = self::splitUrl( $baseUrl );
192
        if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) ) {
193
            return FALSE;
194
        }
195
196
        $r['scheme'] = $b['scheme'];
197
    
198
        // If relative URL has an authority, clean path and return.
199
        if ( isset( $r['host'] ) )
200
        {
201
            if ( !empty( $r['path'] ) ) {
202
                $r['path'] = self::urlRemoveDotSegments( $r['path'] );
203
            }
204
205
            return self::joinUrl( $r );
206
        }
207
        unset( $r['port'] );
208
        unset( $r['user'] );
209
        unset( $r['pass'] );
210
    
211
        // Copy base authority.
212
        $r['host'] = $b['host'];
213
        if ( isset( $b['port'] ) ) $r['port'] = $b['port'];
214
        if ( isset( $b['user'] ) ) $r['user'] = $b['user'];
215
        if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];
216
    
217
        // If relative URL has no path, use base path
218
        if ( empty( $r['path'] ) )
219
        {
220
            if ( !empty( $b['path'] ) ) {
221
                $r['path'] = $b['path'];
222
            }
223
224
            if ( !isset( $r['query'] ) && isset( $b['query'] ) ) {
225
                $r['query'] = $b['query'];
226
            }
227
228
            return self::joinUrl( $r );
229
        }
230
    
231
        // If relative URL path doesn't start with /, merge with base path
232
        if ( $r['path'][0] != '/' )
233
        {
234
            $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' );
235
            if ( $base === FALSE ) $base = '';
236
            $r['path'] = $base . '/' . $r['path'];
237
        }
238
        $r['path'] = self::urlRemoveDotSegments( $r['path'] );
239
        return self::joinUrl( $r );
240
    }
241
242
    /**
243
     * Required public function of URL to absolute
244
     *
245
     * Inspired from code available at http://nadeausoftware.com/node/79, 
246
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
247
     * 
248
     */
249
    public static function urlRemoveDotSegments( $path ) {
250
        // multi-byte character explode
251
        $inSegs  = preg_split( '!/!u', $path );
252
        $outSegs = array( );
253
        foreach ( $inSegs as $seg )
254
        {
255
            if ( $seg == '' || $seg == '.') {
256
                continue;
257
            }
258
            if ( $seg == '..' ) {
259
                array_pop( $outSegs );
260
            } else {
261
                array_push( $outSegs, $seg );
262
            }
263
        }
264
        $outPath = implode( '/', $outSegs );
265
        if ( $path[0] == '/' ) {
266
            $outPath = '/' . $outPath;
267
        }
268
269
        // compare last multi-byte character against '/'
270
        if ( $outPath != '/' &&
271
            (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) ) {
0 ignored issues
show
Bug introduced by
'UTF-8' of type string is incompatible with the type integer expected by parameter $offset of mb_strrpos(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

271
            (mb_strlen($path)-1) == mb_strrpos( $path, '/', /** @scrutinizer ignore-type */ 'UTF-8' ) ) {
Loading history...
272
            $outPath .= '/';
273
        }
274
275
        return $outPath;
276
    }
277
278
    /**
279
     * Required public function of URL to absolute
280
     *
281
     * Inspired from code available at http://nadeausoftware.com/node/79, 
282
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
283
     * 
284
     */
285
    public static function splitUrl( $url, $decode=TRUE )
286
    {
287
        $m = [];
288
        $xunressub     = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
289
        $xpchar        = $xunressub . ':@%';
290
291
        $xscheme       = '([a-zA-Z][a-zA-Z\d+-.]*)';
292
293
        $xuserinfo     = '((['  . $xunressub . '%]*)' .
294
                        '(:([' . $xunressub . ':%]*))?)';
295
296
        $xipv4         = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
297
298
        $xipv6         = '(\[([a-fA-F\d.:]+)\])';
299
300
        $xhost_name    = '([a-zA-Z%]+)';
301
        // $xhost_name    = '([a-zA-Z\d-.%]+)'; @todo alterado pq tava dando erro nesse parser
302
303
        $xhost         = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
304
        $xport         = '(\d*)';
305
        $xauthority    = '((' . $xuserinfo . '@)?' . $xhost .
306
                        '?(:' . $xport . ')?)';
307
308
        $xslash_seg    = '(/[' . $xpchar . ']*)';
309
        $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
310
        $xpath_rel     = '([' . $xpchar . ']+' . $xslash_seg . '*)';
311
        $xpath_abs     = '(/(' . $xpath_rel . ')?)';
312
        $xapath        = '(' . $xpath_authabs . '|' . $xpath_abs .
313
                        '|' . $xpath_rel . ')';
314
315
        $xqueryfrag    = '([' . $xpchar . '/?' . ']*)';
316
317
        $xurl          = '^(' . $xscheme . ':)?' .  $xapath . '?' .
318
                        '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
319
    
320
    
321
        // Split the URL into components.
322
        if ( !preg_match( '!' . $xurl . '!', $url, $m ) ) {
323
            return FALSE;
324
        }
325
    
326
        if ( !empty($m[2]) )        $parts['scheme']  = strtolower($m[2]);
0 ignored issues
show
Comprehensibility Best Practice introduced by
$parts was never initialized. Although not strictly required by PHP, it is generally a good practice to add $parts = array(); before regardless.
Loading history...
327
    
328
        if ( !empty($m[7]) ) {
329
            if ( isset( $m[9] ) )   $parts['user']    = $m[9];
330
            else            $parts['user']    = '';
331
        }
332
        if ( !empty($m[10]) )       $parts['pass']    = $m[11];
333
    
334
        if ( !empty($m[13]) )       $h=$parts['host'] = $m[13];
335
        else if ( !empty($m[14]) )  $parts['host']    = $m[14];
336
        else if ( !empty($m[16]) )  $parts['host']    = $m[16];
337
        else if ( !empty( $m[5] ) ) $parts['host']    = '';
338
        if ( !empty($m[17]) )       $parts['port']    = $m[18];
339
    
340
        if ( !empty($m[19]) )       $parts['path']    = $m[19];
341
        else if ( !empty($m[21]) )  $parts['path']    = $m[21];
342
        else if ( !empty($m[25]) )  $parts['path']    = $m[25];
343
    
344
        if ( !empty($m[27]) )       $parts['query']   = $m[28];
345
        if ( !empty($m[29]) )       $parts['fragment']= $m[30];
346
    
347
        if ( !$decode )
348
            return $parts;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $parts does not seem to be defined for all execution paths leading up to this point.
Loading history...
349
        if ( !empty($parts['user']) )
350
            $parts['user']     = rawurldecode( $parts['user'] );
351
        if ( !empty($parts['pass']) )
352
            $parts['pass']     = rawurldecode( $parts['pass'] );
353
        if ( !empty($parts['path']) )
354
            $parts['path']     = rawurldecode( $parts['path'] );
355
        if ( isset($h) )
356
            $parts['host']     = rawurldecode( $parts['host'] );
357
        if ( !empty($parts['query']) )
358
            $parts['query']    = rawurldecode( $parts['query'] );
359
        if ( !empty($parts['fragment']) )
360
            $parts['fragment'] = rawurldecode( $parts['fragment'] );
361
        return $parts;
362
    }
363
364
    /**
365
     * Required public function of URL to absolute
366
     *
367
     * Inspired from code available at http://nadeausoftware.com/node/79, 
368
     * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
369
     * 
370
     */
371
    public static function joinUrl( $parts, $encode=TRUE )
372
    {
373
        if ( $encode )
374
        {
375
            if ( isset( $parts['user'] ) ) {
376
                $parts['user']     = rawurlencode( $parts['user'] );
377
            }
378
            if ( isset( $parts['pass'] ) ) {
379
                $parts['pass']     = rawurlencode( $parts['pass'] );
380
            }
381
            if ( isset( $parts['host'] ) &&
382
                !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) ) {
383
                $parts['host']     = rawurlencode( $parts['host'] );
384
            }
385
            if ( !empty( $parts['path'] ) ) {
386
                $parts['path']     = preg_replace( '!%2F!ui', '/',
387
                    rawurlencode( $parts['path'] ) );
388
            }
389
390
            if ( isset( $parts['query'] ) ) {
391
                $parts['query']    = rawurlencode( $parts['query'] );
392
            }
393
394
            if ( isset( $parts['fragment'] ) ) {
395
                $parts['fragment'] = rawurlencode( $parts['fragment'] );
396
            }
397
        }
398
    
399
        $url = '';
400
        if ( !empty( $parts['scheme'] ) )
401
            $url .= $parts['scheme'] . ':';
402
        if ( isset( $parts['host'] ) )
403
        {
404
            $url .= '//';
405
            if ( isset( $parts['user'] ) )
406
            {
407
                $url .= $parts['user'];
408
                if ( isset( $parts['pass'] ) )
409
                    $url .= ':' . $parts['pass'];
410
                $url .= '@';
411
            }
412
            if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )
413
                $url .= '[' . $parts['host'] . ']'; // IPv6
414
            else
415
                $url .= $parts['host'];             // IPv4 or name
416
            if ( isset( $parts['port'] ) )
417
                $url .= ':' . $parts['port'];
418
            if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )
419
                $url .= '/';
420
        }
421
        if ( !empty( $parts['path'] ) )
422
            $url .= $parts['path'];
423
        if ( isset( $parts['query'] ) )
424
            $url .= '?' . $parts['query'];
425
        if ( isset( $parts['fragment'] ) )
426
            $url .= '#' . $parts['fragment'];
427
        return $url;
428
    }
429
430
}
431