UriString::filterPort()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 5
c 1
b 0
f 0
nc 3
nop 1
dl 0
loc 11
ccs 6
cts 6
cp 1
crap 3
rs 10
1
<?php
2
3
/**
4
 * League.Uri (https://uri.thephpleague.com)
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Uri;
15
16
use League\Uri\Exceptions\IdnSupportMissing;
17
use League\Uri\Exceptions\SyntaxError;
18
use function array_merge;
19
use function defined;
20
use function explode;
21
use function filter_var;
22
use function function_exists;
23
use function gettype;
24
use function idn_to_ascii;
25
use function implode;
26
use function inet_pton;
27
use function is_object;
28
use function is_scalar;
29
use function method_exists;
30
use function preg_match;
31
use function rawurldecode;
32
use function sprintf;
33
use function strpos;
34
use function substr;
35
use const FILTER_FLAG_IPV6;
36
use const FILTER_VALIDATE_IP;
37
use const IDNA_ERROR_BIDI;
38
use const IDNA_ERROR_CONTEXTJ;
39
use const IDNA_ERROR_DISALLOWED;
40
use const IDNA_ERROR_DOMAIN_NAME_TOO_LONG;
41
use const IDNA_ERROR_EMPTY_LABEL;
42
use const IDNA_ERROR_HYPHEN_3_4;
43
use const IDNA_ERROR_INVALID_ACE_LABEL;
44
use const IDNA_ERROR_LABEL_HAS_DOT;
45
use const IDNA_ERROR_LABEL_TOO_LONG;
46
use const IDNA_ERROR_LEADING_COMBINING_MARK;
47
use const IDNA_ERROR_LEADING_HYPHEN;
48
use const IDNA_ERROR_PUNYCODE;
49
use const IDNA_ERROR_TRAILING_HYPHEN;
50
use const INTL_IDNA_VARIANT_UTS46;
51
52
/**
53
 * A class to parse a URI string according to RFC3986.
54
 *
55
 * @link    https://tools.ietf.org/html/rfc3986
56
 * @package League\Uri
57
 * @author  Ignace Nyamagana Butera <[email protected]>
58
 * @since   6.0.0
59
 */
60
final class UriString
61
{
62
    /**
63
     * Default URI component values.
64
     */
65
    private const URI_COMPONENTS = [
66
        'scheme' => null, 'user' => null, 'pass' => null, 'host' => null,
67
        'port' => null, 'path' => '', 'query' => null, 'fragment' => null,
68
    ];
69
70
    /**
71
     * Simple URI which do not need any parsing.
72
     */
73
    private const URI_SCHORTCUTS = [
74
        '' => [],
75
        '#' => ['fragment' => ''],
76
        '?' => ['query' => ''],
77
        '?#' => ['query' => '', 'fragment' => ''],
78
        '/' => ['path' => '/'],
79
        '//' => ['host' => ''],
80
    ];
81
82
    /**
83
     * Range of invalid characters in URI string.
84
     */
85
    private const REGEXP_INVALID_URI_CHARS = '/[\x00-\x1f\x7f]/';
86
87
    /**
88
     * RFC3986 regular expression URI splitter.
89
     *
90
     * @link https://tools.ietf.org/html/rfc3986#appendix-B
91
     */
92
    private const REGEXP_URI_PARTS = ',^
93
        (?<scheme>(?<scontent>[^:/?\#]+):)?    # URI scheme component
94
        (?<authority>//(?<acontent>[^/?\#]*))? # URI authority part
95
        (?<path>[^?\#]*)                       # URI path component
96
        (?<query>\?(?<qcontent>[^\#]*))?       # URI query component
97
        (?<fragment>\#(?<fcontent>.*))?        # URI fragment component
98
    ,x';
99
100
    /**
101
     * URI scheme regular expresssion.
102
     *
103
     * @link https://tools.ietf.org/html/rfc3986#section-3.1
104
     */
105
    private const REGEXP_URI_SCHEME = '/^([a-z][a-z\d\+\.\-]*)?$/i';
106
107
    /**
108
     * IPvFuture regular expression.
109
     *
110
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
111
     */
112
    private const REGEXP_IP_FUTURE = '/^
113
        v(?<version>[A-F0-9])+\.
114
        (?:
115
            (?<unreserved>[a-z0-9_~\-\.])|
116
            (?<sub_delims>[!$&\'()*+,;=:])  # also include the : character
117
        )+
118
    $/ix';
119
120
    /**
121
     * General registered name regular expression.
122
     *
123
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
124
     */
125
    private const REGEXP_REGISTERED_NAME = '/(?(DEFINE)
126
        (?<unreserved>[a-z0-9_~\-])   # . is missing as it is used to separate labels
127
        (?<sub_delims>[!$&\'()*+,;=])
128
        (?<encoded>%[A-F0-9]{2})
129
        (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*)
130
    )
131
    ^(?:(?&reg_name)\.)*(?&reg_name)\.?$/ix';
132
133
    /**
134
     * Invalid characters in host regular expression.
135
     *
136
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
137
     */
138
    private const REGEXP_INVALID_HOST_CHARS = '/
139
        [:\/?#\[\]@ ]  # gen-delims characters as well as the space character
140
    /ix';
141
142
    /**
143
     * Invalid path for URI without scheme and authority regular expression.
144
     *
145
     * @link https://tools.ietf.org/html/rfc3986#section-3.3
146
     */
147
    private const REGEXP_INVALID_PATH = ',^(([^/]*):)(.*)?/,';
148
149
    /**
150
     * Host and Port splitter regular expression.
151
     */
152
    private const REGEXP_HOST_PORT = ',^(?<host>\[.*\]|[^:]*)(:(?<port>.*))?$,';
153
154
    /**
155
     * IDN Host detector regular expression.
156
     */
157
    private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/';
158
159
    /**
160
     * Only the address block fe80::/10 can have a Zone ID attach to
161
     * let's detect the link local significant 10 bits.
162
     */
163
    private const ZONE_ID_ADDRESS_BLOCK = "\xfe\x80";
164
165
    /**
166
     * Generate an URI string representation from its parsed representation
167
     * returned by League\Uri\parse() or PHP's parse_url.
168
     *
169
     * If you supply your own array, you are responsible for providing
170
     * valid components without their URI delimiters.
171
     *
172
     * @link https://tools.ietf.org/html/rfc3986#section-5.3
173
     * @link https://tools.ietf.org/html/rfc3986#section-7.5
174
     *
175
     * @param array{
176
     *  scheme:?string,
177
     *  user:?string,
178
     *  pass:?string,
179
     *  host:?string,
180
     *  port:?int,
181
     *  path:string,
182
     *  query:?string,
183
     *  fragment:?string
184
     * } $components
185
     */
186 94
    public static function build(array $components): string
187
    {
188 94
        $result = $components['path'] ?? '';
189 94
        if (isset($components['query'])) {
190 50
            $result .= '?'.$components['query'];
191
        }
192
193 94
        if (isset($components['fragment'])) {
194 56
            $result .= '#'.$components['fragment'];
195
        }
196
197 94
        $scheme = null;
198 94
        if (isset($components['scheme'])) {
199 52
            $scheme = $components['scheme'].':';
200
        }
201
202 94
        if (!isset($components['host'])) {
203 42
            return $scheme.$result;
204
        }
205
206 52
        $scheme .= '//';
207 52
        $authority = $components['host'];
208 52
        if (isset($components['port'])) {
209 14
            $authority .= ':'.$components['port'];
210
        }
211
212 52
        if (!isset($components['user'])) {
213 34
            return $scheme.$authority.$result;
214
        }
215
216 18
        $authority = '@'.$authority;
217 18
        if (!isset($components['pass'])) {
218 4
            return $scheme.$components['user'].$authority.$result;
219
        }
220
221 14
        return $scheme.$components['user'].':'.$components['pass'].$authority.$result;
222
    }
223
224
    /**
225
     * Parse an URI string into its components.
226
     *
227
     * This method parses a URI and returns an associative array containing any
228
     * of the various components of the URI that are present.
229
     *
230
     * <code>
231
     * $components = (new Parser())->parse('http://[email protected]:42?query#');
232
     * var_export($components);
233
     * //will display
234
     * array(
235
     *   'scheme' => 'http',           // the URI scheme component
236
     *   'user' => 'foo',              // the URI user component
237
     *   'pass' => null,               // the URI pass component
238
     *   'host' => 'test.example.com', // the URI host component
239
     *   'port' => 42,                 // the URI port component
240
     *   'path' => '',                 // the URI path component
241
     *   'query' => 'query',           // the URI query component
242
     *   'fragment' => '',             // the URI fragment component
243
     * );
244
     * </code>
245
     *
246
     * The returned array is similar to PHP's parse_url return value with the following
247
     * differences:
248
     *
249
     * <ul>
250
     * <li>All components are always present in the returned array</li>
251
     * <li>Empty and undefined component are treated differently. And empty component is
252
     *   set to the empty string while an undefined component is set to the `null` value.</li>
253
     * <li>The path component is never undefined</li>
254
     * <li>The method parses the URI following the RFC3986 rules but you are still
255
     *   required to validate the returned components against its related scheme specific rules.</li>
256
     * </ul>
257
     *
258
     * @link https://tools.ietf.org/html/rfc3986
259
     *
260
     * @param mixed $uri any scalar or stringable object
261
     *
262
     * @throws SyntaxError if the URI contains invalid characters
263
     * @throws SyntaxError if the URI contains an invalid scheme
264
     * @throws SyntaxError if the URI contains an invalid path
265
     *
266
     * @return array{
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{ at position 2 could not be parsed: the token is null at position 2.
Loading history...
267
     *                scheme:?string,
268
     *                user:?string,
269
     *                pass:?string,
270
     *                host:?string,
271
     *                port:?int,
272
     *                path:string,
273
     *                query:?string,
274
     *                fragment:?string
275
     *                }
276
     */
277 536
    public static function parse($uri): array
278
    {
279 536
        if (is_object($uri) && method_exists($uri, '__toString')) {
280 2
            $uri = (string) $uri;
281
        }
282
283 536
        if (!is_scalar($uri)) {
284 2
            throw new \TypeError(sprintf('The uri must be a scalar or a stringable object `%s` given', gettype($uri)));
285
        }
286
287 534
        $uri = (string) $uri;
288
289 534
        if (isset(self::URI_SCHORTCUTS[$uri])) {
290
            /** @var array{scheme:?string, user:?string, pass:?string, host:?string, port:?int, path:string, query:?string, fragment:?string} $components */
291 46
            $components = array_merge(self::URI_COMPONENTS, self::URI_SCHORTCUTS[$uri]);
292
293 46
            return $components;
294
        }
295
296 504
        if (1 === preg_match(self::REGEXP_INVALID_URI_CHARS, $uri)) {
297 2
            throw new SyntaxError(sprintf('The uri `%s` contains invalid characters', $uri));
298
        }
299
300
        //if the first character is a known URI delimiter parsing can be simplified
301 502
        $first_char = $uri[0];
302
303
        //The URI is made of the fragment only
304 502
        if ('#' === $first_char) {
305 6
            [, $fragment] = explode('#', $uri, 2);
306 6
            $components = self::URI_COMPONENTS;
307 6
            $components['fragment'] = $fragment;
308
309 6
            return $components;
310
        }
311
312
        //The URI is made of the query and fragment
313 498
        if ('?' === $first_char) {
314 6
            [, $partial] = explode('?', $uri, 2);
315 6
            [$query, $fragment] = explode('#', $partial, 2) + [1 => null];
316 6
            $components = self::URI_COMPONENTS;
317 6
            $components['query'] = $query;
318 6
            $components['fragment'] = $fragment;
319
320 6
            return $components;
321
        }
322
323
        //use RFC3986 URI regexp to split the URI
324 494
        preg_match(self::REGEXP_URI_PARTS, $uri, $parts);
325 494
        $parts += ['query' => '', 'fragment' => ''];
326
327 494
        if (':' === $parts['scheme'] || 1 !== preg_match(self::REGEXP_URI_SCHEME, $parts['scontent'])) {
328 6
            throw new SyntaxError(sprintf('The uri `%s` contains an invalid scheme', $uri));
329
        }
330
331 488
        if ('' === $parts['scheme'].$parts['authority'] && 1 === preg_match(self::REGEXP_INVALID_PATH, $parts['path'])) {
332 2
            throw new SyntaxError(sprintf('The uri `%s` contains an invalid path.', $uri));
333
        }
334
335
        /** @var array{scheme:?string, user:?string, pass:?string, host:?string, port:?int, path:string, query:?string, fragment:?string} $components */
336 486
        $components = array_merge(
337 486
            self::URI_COMPONENTS,
338 486
            '' === $parts['authority'] ? [] : self::parseAuthority($parts['acontent']),
339
            [
340 450
                'path' => $parts['path'],
341 450
                'scheme' => '' === $parts['scheme'] ? null : $parts['scontent'],
342 450
                'query' => '' === $parts['query'] ? null : $parts['qcontent'],
343 450
                'fragment' => '' === $parts['fragment'] ? null : $parts['fcontent'],
344
            ]
345
        );
346
347 450
        return $components;
348
    }
349
350
    /**
351
     * Parses the URI authority part.
352
     *
353
     * @link https://tools.ietf.org/html/rfc3986#section-3.2
354
     *
355
     * @throws SyntaxError If the port component is invalid
356
     *
357
     * @return array{user:?string, pass:?string, host:?string, port:?int}
358
     */
359 386
    private static function parseAuthority(string $authority): array
360
    {
361 386
        $components = ['user' => null, 'pass' => null, 'host' => '', 'port' => null];
362 386
        if ('' === $authority) {
363 14
            return $components;
364
        }
365
366 374
        $parts = explode('@', $authority, 2);
367 374
        if (isset($parts[1])) {
368 94
            [$components['user'], $components['pass']] = explode(':', $parts[0], 2) + [1 => null];
369
        }
370
371 374
        preg_match(self::REGEXP_HOST_PORT, $parts[1] ?? $parts[0], $matches);
372 374
        $matches += ['port' => ''];
373
374 374
        $components['port'] = self::filterPort($matches['port']);
375 362
        $components['host'] = self::filterHost($matches['host']);
376
377 338
        return $components;
378
    }
379
380
    /**
381
     * Filter and format the port component.
382
     *
383
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
384
     *
385
     * @throws SyntaxError if the registered name is invalid
386
     */
387 374
    private static function filterPort(string $port): ?int
388
    {
389 374
        if ('' === $port) {
390 264
            return null;
391
        }
392
393 114
        if (1 === preg_match('/^\d*$/', $port)) {
394 102
            return (int) $port;
395
        }
396
397 12
        throw new SyntaxError(sprintf('The port `%s` is invalid', $port));
398
    }
399
400
    /**
401
     * Returns whether a hostname is valid.
402
     *
403
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
404
     *
405
     * @throws SyntaxError if the registered name is invalid
406
     */
407 362
    private static function filterHost(string $host): string
408
    {
409 362
        if ('' === $host) {
410 4
            return $host;
411
        }
412
413 360
        if ('[' !== $host[0] || ']' !== substr($host, -1)) {
414 324
            return self::filterRegisteredName($host);
415
        }
416
417 36
        if (!self::isIpHost(substr($host, 1, -1))) {
418 10
            throw new SyntaxError(sprintf('Host `%s` is invalid : the IP host is malformed', $host));
419
        }
420
421 26
        return $host;
422
    }
423
424
    /**
425
     * Returns whether the host is an IPv4 or a registered named.
426
     *
427
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
428
     *
429
     * @throws SyntaxError       if the registered name is invalid
430
     * @throws IdnSupportMissing if IDN support or ICU requirement are not available or met.
431
     */
432 324
    private static function filterRegisteredName(string $host): string
433
    {
434
        // @codeCoverageIgnoreStart
435
        // added because it is not possible in travis to disabled the ext/intl extension
436
        // see travis issue https://github.com/travis-ci/travis-ci/issues/4701
437
        static $idn_support = null;
438
        $idn_support = $idn_support ?? function_exists('idn_to_ascii') && defined('INTL_IDNA_VARIANT_UTS46');
439
        // @codeCoverageIgnoreEnd
440
441 324
        $formatted_host = rawurldecode($host);
442 324
        if (1 === preg_match(self::REGEXP_REGISTERED_NAME, $formatted_host)) {
443 308
            if (false === strpos($formatted_host, 'xn--')) {
444 304
                return $host;
445
            }
446
447
            // @codeCoverageIgnoreStart
448
            if (!$idn_support) {
449
                throw new IdnSupportMissing(sprintf('the host `%s` could not be processed for IDN. Verify that ext/intl is installed for IDN support and that ICU is at least version 4.6.', $host));
450
            }
451
            // @codeCoverageIgnoreEnd
452
453 4
            $unicode = idn_to_utf8($host, 0, INTL_IDNA_VARIANT_UTS46, $arr);
454 4
            if (0 !== $arr['errors']) {
455 2
                throw new SyntaxError(sprintf('The host `%s` is invalid : %s', $host, self::getIDNAErrors($arr['errors'])));
456
            }
457
458
            // @codeCoverageIgnoreStart
459
            if (false === $unicode) {
460
                throw new IdnSupportMissing(sprintf('The Intl extension is misconfigured for %s, please correct this issue before proceeding.', PHP_OS));
461
            }
462
            // @codeCoverageIgnoreEnd
463
464 2
            return $host;
465
        }
466
467
        //to test IDN host non-ascii characters must be present in the host
468 16
        if (1 !== preg_match(self::REGEXP_IDN_PATTERN, $formatted_host)) {
469 2
            throw new SyntaxError(sprintf('Host `%s` is invalid : the host is not a valid registered name', $host));
470
        }
471
472
        // @codeCoverageIgnoreStart
473
        if (!$idn_support) {
474
            throw new IdnSupportMissing(sprintf('the host `%s` could not be processed for IDN. Verify that ext/intl is installed for IDN support and that ICU is at least version 4.6.', $host));
475
        }
476
        // @codeCoverageIgnoreEnd
477
478 14
        $retval = idn_to_ascii($formatted_host, 0, INTL_IDNA_VARIANT_UTS46, $arr);
479
480 14
        if ([] === $arr) {
481 2
            throw new SyntaxError(sprintf('Host `%s` is not a valid IDN host', $host));
482
        }
483
484 12
        if (0 !== $arr['errors']) {
485 4
            throw new SyntaxError(sprintf('Host `%s` is not a valid IDN host : %s', $host, self::getIDNAErrors($arr['errors'])));
486
        }
487
488
        // @codeCoverageIgnoreStart
489
        if (false === $retval) {
490
            throw new IdnSupportMissing(sprintf('The Intl extension is misconfigured for %s, please correct this issue before proceeding.', PHP_OS));
491
        }
492
        // @codeCoverageIgnoreEnd
493
494 8
        if (false !== strpos($retval, '%')) {
495 4
            throw new SyntaxError(sprintf('Host `%s` is invalid : the host is not a valid registered name', $host));
496
        }
497
498 4
        return $host;
499
    }
500
501
    /**
502
     * Retrieves and format IDNA conversion error message.
503
     *
504
     * @link http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html
505
     */
506 6
    private static function getIDNAErrors(int $error_byte): string
507
    {
508
        /**
509
         * IDNA errors.
510
         */
511 6
        static $idn_errors = [
512
            IDNA_ERROR_EMPTY_LABEL => 'a non-final domain name label (or the whole domain name) is empty',
513
            IDNA_ERROR_LABEL_TOO_LONG => 'a domain name label is longer than 63 bytes',
514
            IDNA_ERROR_DOMAIN_NAME_TOO_LONG => 'a domain name is longer than 255 bytes in its storage form',
515
            IDNA_ERROR_LEADING_HYPHEN => 'a label starts with a hyphen-minus ("-")',
516
            IDNA_ERROR_TRAILING_HYPHEN => 'a label ends with a hyphen-minus ("-")',
517
            IDNA_ERROR_HYPHEN_3_4 => 'a label contains hyphen-minus ("-") in the third and fourth positions',
518
            IDNA_ERROR_LEADING_COMBINING_MARK => 'a label starts with a combining mark',
519
            IDNA_ERROR_DISALLOWED => 'a label or domain name contains disallowed characters',
520
            IDNA_ERROR_PUNYCODE => 'a label starts with "xn--" but does not contain valid Punycode',
521
            IDNA_ERROR_LABEL_HAS_DOT => 'a label contains a dot=full stop',
522
            IDNA_ERROR_INVALID_ACE_LABEL => 'An ACE label does not contain a valid label string',
523
            IDNA_ERROR_BIDI => 'a label does not meet the IDNA BiDi requirements (for right-to-left characters)',
524
            IDNA_ERROR_CONTEXTJ => 'a label does not meet the IDNA CONTEXTJ requirements',
525
        ];
526
527 6
        $res = [];
528 6
        foreach ($idn_errors as $error => $reason) {
529 6
            if ($error === ($error_byte & $error)) {
530 6
                $res[] = $reason;
531
            }
532
        }
533
534 6
        return [] === $res ? 'Unknown IDNA conversion error.' : implode(', ', $res).'.';
535
    }
536
537
    /**
538
     * Validates a IPv6/IPvfuture host.
539
     *
540
     * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
541
     * @link https://tools.ietf.org/html/rfc6874#section-2
542
     * @link https://tools.ietf.org/html/rfc6874#section-4
543
     */
544 36
    private static function isIpHost(string $ip_host): bool
545
    {
546 36
        if (false !== filter_var($ip_host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) {
547 20
            return true;
548
        }
549
550 16
        if (1 === preg_match(self::REGEXP_IP_FUTURE, $ip_host, $matches)) {
551 4
            return !in_array($matches['version'], ['4', '6'], true);
552
        }
553
554 12
        $pos = strpos($ip_host, '%');
555 12
        if (false === $pos || 1 === preg_match(
556 8
            self::REGEXP_INVALID_HOST_CHARS,
557 12
            rawurldecode(substr($ip_host, $pos))
558
        )) {
559 6
            return false;
560
        }
561
562 6
        $ip_host = substr($ip_host, 0, $pos);
563
564 6
        return false !== filter_var($ip_host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)
565 6
            && 0 === strpos((string) inet_pton($ip_host), self::ZONE_ID_ADDRESS_BLOCK);
566
    }
567
}
568