Completed
Push — master ( d1c62f...a04d4f )
by Markus
03:56
created

IRI.php (4 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/*
4
 * (c) Markus Lanthaler <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
10
namespace ML\IRI;
11
12
/**
13
 * IRI represents an IRI as per RFC3987.
14
 *
15
 * @author Markus Lanthaler <[email protected]>
16
 *
17
 * @link http://tools.ietf.org/html/rfc3987 RFC3987
18
 */
19
class IRI
20
{
21
    /**
22
     * The scheme
23
     *
24
     * @var string|null
25
     */
26
    private $scheme = null;
27
28
    /**
29
     * The user information
30
     *
31
     * @var string|null
32
     */
33
    private $userinfo = null;
34
35
    /**
36
     * The host
37
     *
38
     * @var string|null
39
     */
40
    private $host = null;
41
42
    /**
43
     * The port
44
     *
45
     * @var string|null
46
     */
47
    private $port = null;
48
49
    /**
50
     * The path
51
     *
52
     * @var string
53
     */
54
    private $path = '';
55
56
    /**
57
     * The query component
58
     *
59
     * @var string|null
60
     */
61
    private $query = null;
62
63
    /**
64
     * The fragment identifier
65
     *
66
     * @var string|null
67
     */
68
    private $fragment = null;
69
70
71
    /**
72
     * Constructor
73
     *
74
     * @param null|string|IRI $iri The IRI.
75
     *
76
     * @throws \InvalidArgumentException If an invalid IRI is passed.
77
     *
78
     * @api
79
     */
80 802
    public function __construct($iri = null)
81
    {
82 802
        if (null === $iri) {
83 4
            return;
84 802
        } elseif (is_string($iri)) {
85 801
            $this->parse($iri);
86 802
        } elseif ($iri instanceof IRI) {
87 12
            $this->scheme = $iri->scheme;
88 12
            $this->userinfo = $iri->userinfo;
89 12
            $this->host = $iri->host;
90 12
            $this->port = $iri->port;
91 12
            $this->path = $iri->path;
92 12
            $this->query = $iri->query;
93 12
            $this->fragment = $iri->fragment;
94 12
        } else {
95 1
            throw new \InvalidArgumentException(
96
                'Expecting a string or an IRI, got ' .
97 1
                (is_object($iri) ? get_class($iri) : gettype($iri))
98 1
            );
99
        }
100 801
    }
101
102
    /**
103
     * Get the scheme
104
     *
105
     * @return string|null Returns the scheme or null if not set.
106
     */
107 12
    public function getScheme()
108
    {
109 12
        return $this->scheme;
110
    }
111
112
    /**
113
     * Get the authority
114
     *
115
     * @return string|null Returns the authority or null if not set.
116
     */
117 724
    public function getAuthority()
118
    {
119 724
        $authority = null;
120
121 724
        if (null !== $this->host) {
122
123 487
            if (null !== $this->userinfo) {
124 17
                $authority .= $this->userinfo . '@';
125 17
            }
126 487
            $authority .= $this->host;
127 487
            if (null !== $this->port) {
128 53
                $authority .= ':' . $this->port;
129 53
            }
130 487
        }
131
132 724
        return $authority;
133
    }
134
135
    /**
136
     * Get the user information
137
     *
138
     * @return string|null Returns the user information or null if not set.
139
     */
140 12
    public function getUserInfo()
141
    {
142 12
        return $this->userinfo;
143
    }
144
145
    /**
146
     * Get the host
147
     *
148
     * @return string|null Returns the host or null if not set.
149
     */
150 12
    public function getHost()
151
    {
152 12
        return $this->host;
153
    }
154
155
    /**
156
     * Get the port
157
     *
158
     * @return string|null Returns the port or null if not set.
159
     */
160 12
    public function getPort()
161
    {
162 12
        return $this->port;
163
    }
164
165
    /**
166
     * Get the path
167
     *
168
     * @return string Returns the path which might be empty.
169
     */
170 12
    public function getPath()
171
    {
172 12
        return $this->path;
173
    }
174
175
    /**
176
     * Get the query component
177
     *
178
     * @return string|null Returns the query component or null if not set.
179
     */
180 12
    public function getQuery()
181
    {
182 12
        return $this->query;
183
    }
184
185
    /**
186
     * Get the fragment identifier
187
     *
188
     * @return string|null Returns the fragment identifier or null if not set.
189
     */
190 12
    public function getFragment()
191
    {
192 12
        return $this->fragment;
193
    }
194
195
    /**
196
     * Find out whether the IRI is absolute
197
     *
198
     * @return bool Returns true if the IRI is absolute, false otherwise.
199
     *
200
     * @api
201
     */
202 78
    public function isAbsolute()
203
    {
204 78
        return (null !== $this->scheme);
205
    }
206
207
    /**
208
     * Get as absolute IRI, i.e., without fragment identifier
209
     *
210
     * @return IRI The absolute IRI, i.e., without fragment identifier
211
     *
212
     * @throws \UnexpectedValueException If the IRI is a relative IRI.
213
     *
214
     * @link http://tools.ietf.org/html/rfc3987#section-2.2 RFC3987 absolute-IRI
215
     *
216
     * @api
217
     */
218 2
    public function getAbsoluteIri()
219
    {
220 2
        if (false === $this->isAbsolute()) {
221 1
            throw new \UnexpectedValueException('Cannot get the absolute IRI of a relative IRI.');
222
        }
223
224 1
        $absolute  = clone $this;
225 1
        $absolute->fragment = null;
226
227 1
        return $absolute;
228
    }
229
230
    /**
231
     * Check whether the passed IRI is equal
232
     *
233
     * @param IRI|string $iri IRI to compare to this instance.
234
     *
235
     * @return bool Returns true if the two IRIs are equal, false otherwise.
236
     *
237
     * @api
238
     */
239 12
    public function equals($iri)
240
    {
241
        // Make sure both instances are strings
242 12
        return ($this->__toString() === (string)$iri);
243
    }
244
245
    /**
246
     * Resolve a (relative) IRI reference against this IRI
247
     *
248
     * @param IRI|string $reference The (relative) IRI reference that should
249
     *                              be resolved against this IRI.
250
     *
251
     * @return IRI The resolved IRI.
252
     *
253
     * @throws \InvalidArgumentException If an invalid IRI is passed.
254
     *
255
     * @link http://tools.ietf.org/html/rfc3986#section-5.2
256
     *
257
     * @api
258
     */
259 654
    public function resolve($reference)
260
    {
261 654
        $reference = new IRI($reference);
262
263 654
        $scheme = null;
264 654
        $authority = null;
265 654
        $path = '';
266 654
        $query = null;
267 654
        $fragment = null;
268
269
        // The Transform References algorithm as specified by RFC3986
270
        // see: http://tools.ietf.org/html/rfc3986#section-5.2.2
271 654
        if ($reference->scheme) {
272 219
            $scheme = $reference->scheme;
273 219
            $authority = $reference->getAuthority();
274 219
            $path = self::removeDotSegments($reference->path);
275 219
            $query = $reference->query;
276 219
        } else {
277 435
            if (null !== $reference->getAuthority()) {
278 33
                $authority = $reference->getAuthority();
279 33
                $path = self::removeDotSegments($reference->path);
280 33
                $query = $reference->query;
281 33
            } else {
282 402
                if (0 === strlen($reference->path)) {
283 48
                    $path = $this->path;
284 48
                    if (null !== $reference->query) {
285 13
                        $query = $reference->query;
286 13
                    } else {
287 35
                        $query = $this->query;
288
                    }
289 48
                } else {
290 354
                    if ('/' === $reference->path[0]) {
291 72
                        $path = self::removeDotSegments($reference->path);
292 72
                    } else {
293
                        // T.path = merge(Base.path, R.path);
294 282
                        if ((null !== $this->getAuthority()) && ('' === $this->path)) {
295 100
                            $path = '/' . $reference->path;
296 100
                        } else {
297 182
                            if (false !== ($end = strrpos($this->path, '/'))) {
298 163
                                $path = substr($this->path, 0, $end + 1);
299 163
                            }
300 182
                            $path .= $reference->path;
301
                        }
302 282
                        $path = self::removeDotSegments($path);
303
                    }
304 354
                    $query = $reference->query;
305
                }
306
307 402
                $authority = $this->getAuthority();
308
            }
309 435
            $scheme = $this->scheme;
310
        }
311
312 654
        $fragment = $reference->fragment;
313
314
315
        // The Component Recomposition algorithm as specified by RFC3986
316
        // see: http://tools.ietf.org/html/rfc3986#section-5.3
317 654
        $result = '';
318
319 654
        if ($scheme) {
320 649
            $result = $scheme . ':';
321 649
        }
322
323 654
        if (null !== $authority) {
324 434
            $result .= '//' . $authority;
325 434
        }
326
327 654
        $result .= $path;
328
329 654
        if (null !== $query) {
330 62
            $result .= '?' . $query;
331 62
        }
332
333 654
        if (null !== $fragment) {
334 61
            $result .= '#' . $fragment;
335 61
        }
336
337 654
        return new IRI($result);
338
    }
339
340
    /**
341
     * Transform this IRI to a IRI reference relative to the passed base IRI
342
     *
343
     * @param IRI|string $base The (relative) IRI reference that should be
344
     *                         be used as base IRI.
345
     * @param bool             Defines whether schema-relative IRIs such
346
     *                         as `//example.com` should be created (`true`)
347
     *                         or not (`false`).
348
     *
349
     * @return IRI The IRI reference relative to the passed base IRI.
350
     *
351
     * @throws \InvalidArgumentException If an invalid IRI is passed.
352
     *
353
     * @api
354
     */
355 57
    public function relativeTo($base, $schemaRelative = false)
356
    {
357 57
        if (false === ($base instanceof IRI)) {
358 57
            $base = new IRI($base);
359 57
        }
360 57
        $relative = clone $this;
361
362
        // Compare scheme
363 57
        if ($relative->scheme !== $base->scheme) {
364 5
            return $relative;
365
        }
366
367
        // Compare authority
368 52
        if ($relative->getAuthority() !== $base->getAuthority()) {
369 8
            if (true === $schemaRelative) {
370 4
                $relative->scheme = null;
371 4
            }
372
373 8
            return $relative;
374
        }
375 44
        $relative->scheme = null;
376 44
        $relative->host = null;
377 44
        $relative->userinfo = null;
378 44
        $relative->port = null;
379
380
        // Compare path
381 44
        $baseSegments     = explode('/', $base->path);
382 44
        $relativeSegments = explode('/', $relative->path);
383 44
        $len = min(count($baseSegments), count($relativeSegments)) - 1;  // do not move beyond last segment
384
385 44
        $pos = 0;
386
387 44
        while (($baseSegments[$pos] === $relativeSegments[$pos]) && ($pos < $len)) {
388 43
            $pos++;
389 43
        }
390
391 44
        $relative->path = '';
392 44
        $numBaseSegments = count($baseSegments) - $pos - 1;
393 44
        if ($numBaseSegments > 0) {
394 18
            $relative->path .= str_repeat('../', $numBaseSegments);
395 18
        }
396
397 44
        if (($baseSegments[$pos] !== $relativeSegments[$pos]) ||
398 14
            ((null === $relative->query) && (null === $relative->fragment)) ||
399 44
            ($base->path === '')) {
400
            // if the two paths differ or if there's neither a query component nor a fragment
401
            // or there is no base path, we need to consider this IRI's path
402
403 38
            if (($relative->path === '') && (false !== strpos($relativeSegments[$pos], ':'))) {
404
                // if the first path segment contains a colon, we need to
405
                // prepend a ./ to distinguish it from an absolute IRI
406 1
                $relative->path .= './';
407 1
            }
408
409 38
            $relative->path .= implode('/', array_slice($relativeSegments, $pos));
410
411
            // .. and ensure that the resulting path isn't empty
412 38
            if (($relative->path === '')) {
413 4
                $relative->path .= './';
414 4
            }
415 38
        }
416
417 44
        if ($relative->query !== $base->query) {
418 5
            return $relative;
419
        }
420
421 39
        if (null !== $relative->fragment) {
422 12
            $relative->query = null;
423 12
        }
424
425 39
        return $relative;
426
    }
427
428
    /**
429
     * Convert an IRI to a relative IRI reference using this IRI as base
430
     *
431
     * This method provides a more convenient interface than the
432
     * {@link IRI::relativeTo()} method if the base IRI stays the same while
433
     * the IRIs to convert to relative IRI references change.
434
     *
435
     * @param  string|IRI $iri The IRI to convert to a relative reference
436
     * @param bool             Defines whether schema-relative IRIs such
437
     *                         as `//example.com` should be created (`true`)
438
     *                         or not (`false`).
439
     *
440
     * @throws \InvalidArgumentException If an invalid IRI is passed.
441
     *
442
     * @see \ML\IRI\IRI::relativeTo()
443
     *
444
     * @return IRI      The relative IRI reference
445
     */
446 57
    public function baseFor($iri, $schemaRelative = false)
447
    {
448 57
        if (false === ($iri instanceof IRI)) {
449 57
            $iri = new IRI($iri);
450 57
        }
451
452 57
        return $iri->relativeTo($this, $schemaRelative);
453
    }
454
455
    /**
456
     * Get a string representation of this IRI object
457
     *
458
     * @return string A string representation of this IRI instance.
459
     *
460
     * @api
461
     */
462 724
    public function __toString()
463
    {
464 724
        $result = '';
465
466 724
        if ($this->scheme) {
467 719
            $result .= $this->scheme . ':';
468 719
        }
469
470 724
        if (null !== ($authority = $this->getAuthority())) {
471 487
            $result .= '//' . $authority;
472 487
        }
473
474 724
        $result .= $this->path;
475
476 724
        if (null !== $this->query) {
477 72
            $result .= '?' . $this->query;
478 72
        }
479
480 724
        if (null !== $this->fragment) {
481 81
            $result .= '#' . $this->fragment;
482 81
        }
483
484 724
        return $result;
485
    }
486
487
    /**
488
     * Parse an IRI into it's components
489
     *
490
     * This is done according to
491
     * {@link http://tools.ietf.org/html/rfc3986#section-3.1 RFC3986}.
492
     *
493
     * @param string $iri The IRI to parse.
494
     */
495 801
    protected function parse($iri)
496
    {
497
        // Parse IRI by using the regular expression as specified by
498
        // http://tools.ietf.org/html/rfc3986#appendix-B
499
        $regex = '|^((?P<scheme>[^:/?#]+):)?' .
500 801
                    '((?P<doubleslash>//)(?P<authority>[^/?#]*))?(?P<path>[^?#]*)' .
501 801
                    '((?P<querydef>\?)(?P<query>[^#]*))?(#(?P<fragment>.*))?|';
502 801
        preg_match($regex, $iri, $match);
503
504
        // Extract scheme
505 801
        if (false === empty($match['scheme'])) {
506 742
            $this->scheme = $match['scheme'];
507 742
        }
508
509
        // Parse authority (http://tools.ietf.org/html/rfc3986#section-3.2)
1 ignored issue
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
510 801
        if ('//' === $match['doubleslash']) {
511 603
            if (0 === strlen($match['authority'])) {
512 201
                $this->host = '';
513 201
            } else {
514 430
                $authority = $match['authority'];
515
516
                // Split authority into userinfo and host
517
                // (use last @ to ignore unescaped @ symbols)
518 430 View Code Duplication
                if (false !== ($pos = strrpos($authority, '@'))) {
519 17
                    $this->userinfo = substr($authority, 0, $pos);
520 17
                    $authority = substr($authority, $pos + 1);
521 17
                }
522
523
                // Split authority into host and port
524 430
                $hostEnd = 0;
525 430
                if (('[' === $authority[0]) && (false !== ($pos = strpos($authority, ']')))) {
526 19
                    $hostEnd = $pos;
527 19
                }
528
529 430
                if ((false !== ($pos = strrpos($authority, ':'))) && ($pos > $hostEnd)) {
530 57
                    $this->host = substr($authority, 0, $pos);
531 57
                    $this->port = substr($authority, $pos + 1);
532 57
                } else {
533 395
                    $this->host = $authority;
534
                }
535
            }
536 603
        }
537
538
        // Extract path (http://tools.ietf.org/html/rfc3986#section-3.3)
1 ignored issue
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
539
        // The path is always present but might be empty
540 801
        $this->path = $match['path'];
541
542
        // Extract query (http://tools.ietf.org/html/rfc3986#section-3.4)
1 ignored issue
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
543 801
        if (false === empty($match['querydef'])) {
544 130
            $this->query = $match['query'];
545 130
        }
546
547
        // Extract fragment (http://tools.ietf.org/html/rfc3986#section-3.5)
1 ignored issue
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
548 801
        if (isset($match['fragment'])) {
549 117
            $this->fragment = $match['fragment'];
550 117
        }
551 801
    }
552
553
    /**
554
     * Remove dot-segments
555
     *
556
     * This method removes the special "." and ".." complete path segments
557
     * from an IRI.
558
     *
559
     * @param string $input The IRI from which dot segments should be removed.
560
     *
561
     * @return string The IRI with all dot-segments removed.
562
     *
563
     * @link http://tools.ietf.org/html/rfc3986#section-5.2.4
564
     */
565 606
    private static function removeDotSegments($input)
566
    {
567 606
        $output = '';
568
569 606
        while (strlen($input) > 0) {
570 564
            if (('../' === substr($input, 0, 3)) || ('./' === substr($input, 0, 2))) {
571 1
                $input = substr($input, strpos($input, '/'));
572 564
            } elseif ('/./' === substr($input, 0, 3)) {
573 29
                $input = substr($input, 2);
574 564
            } elseif ('/.' === $input) {
575 10
                $input = '/';
576 564
            } elseif (('/../' === substr($input, 0, 4)) || ('/..' === $input)) {
577 61
                if ($input == '/..') {
578 12
                    $input = '/';
579 12
                } else {
580 54
                    $input = substr($input, 3);
581
                }
582
583 61 View Code Duplication
                if (false !== ($end = strrpos($output, '/'))) {
584 49
                    $output = substr($output, 0, $end);
585 49
                } else {
586 20
                    $output = '';
587
                }
588 564
            } elseif (('..' === $input) || ('.' === $input)) {
589 3
                $input = '';
590 3
            } else {
591 561
                if (false === ($end = strpos($input, '/', 1))) {
592 561
                    $output .= $input;
593 561
                    $input = '';
594 561
                } else {
595 240
                    $output .= substr($input, 0, $end);
596 240
                    $input = substr($input, $end);
597
                }
598
            }
599 564
        }
600 606
        return $output;
601
    }
602
}
603