Code

< 40 %
40-60 %
> 60 %
1
<?php
2
3
/*
4
 * (c) Markus Lanthaler <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
10
namespace ML\IRI;
11
12
/**
13
 * IRI represents an IRI as per RFC3987.
14
 *
15
 * @author Markus Lanthaler <[email protected]>
16
 *
17
 * @link http://tools.ietf.org/html/rfc3987 RFC3987
18
 */
19
class IRI
20
{
21
    /**
22
     * The scheme
23
     *
24
     * @var string|null
25
     */
26
    private $scheme = null;
27
28
    /**
29
     * The user information
30
     *
31
     * @var string|null
32
     */
33
    private $userinfo = null;
34
35
    /**
36
     * The host
37
     *
38
     * @var string|null
39
     */
40
    private $host = null;
41
42
    /**
43
     * The port
44
     *
45
     * @var string|null
46
     */
47
    private $port = null;
48
49
    /**
50
     * The path
51
     *
52
     * @var string
53
     */
54
    private $path = '';
55
56
    /**
57
     * The query component
58
     *
59
     * @var string|null
60
     */
61
    private $query = null;
62
63
    /**
64
     * The fragment identifier
65
     *
66
     * @var string|null
67
     */
68
    private $fragment = null;
69
70
71
    /**
72
     * Constructor
73
     *
74
     * @param null|string|IRI $iri The IRI.
75
     *
76
     * @throws \InvalidArgumentException If an invalid IRI is passed.
77
     *
78
     * @api
79
     */
80 802
    public function __construct($iri = null)
81
    {
82 802
        if (null === $iri) {
83 4
            return;
84 802
        } elseif (is_string($iri)) {
85 801
            $this->parse($iri);
86 802
        } elseif ($iri instanceof IRI) {
87 12
            $this->scheme = $iri->scheme;
88 12
            $this->userinfo = $iri->userinfo;
89 12
            $this->host = $iri->host;
90 12
            $this->port = $iri->port;
91 12
            $this->path = $iri->path;
92 12
            $this->query = $iri->query;
93 12
            $this->fragment = $iri->fragment;
94 12
        } else {
95 1
            throw new \InvalidArgumentException(
96
                'Expecting a string or an IRI, got ' .
97 1
                (is_object($iri) ? get_class($iri) : gettype($iri))
98 1
            );
99
        }
100 801
    }
101
102
    /**
103
     * Get the scheme
104
     *
105
     * @return string|null Returns the scheme or null if not set.
106
     */
107 12
    public function getScheme()
108
    {
109 12
        return $this->scheme;
110
    }
111
112
    /**
113
     * Get the authority
114
     *
115
     * @return string|null Returns the authority or null if not set.
116
     */
117 724
    public function getAuthority()
118
    {
119 724
        $authority = null;
120
121 724
        if (null !== $this->host) {
122
123 487
            if (null !== $this->userinfo) {
124 17
                $authority .= $this->userinfo . '@';
125 17
            }
126 487
            $authority .= $this->host;
127 487
            if (null !== $this->port) {
128 53
                $authority .= ':' . $this->port;
129 53
            }
130 487
        }
131
132 724
        return $authority;
133
    }
134
135
    /**
136
     * Get the user information
137
     *
138
     * @return string|null Returns the user information or null if not set.
139
     */
140 12
    public function getUserInfo()
141
    {
142 12
        return $this->userinfo;
143
    }
144
145
    /**
146
     * Get the host
147
     *
148
     * @return string|null Returns the host or null if not set.
149
     */
150 12
    public function getHost()
151
    {
152 12
        return $this->host;
153
    }
154
155
    /**
156
     * Get the port
157
     *
158
     * @return string|null Returns the port or null if not set.
159
     */
160 12
    public function getPort()
161
    {
162 12
        return $this->port;
163
    }
164
165
    /**
166
     * Get the path
167
     *
168
     * @return string Returns the path which might be empty.
169
     */
170 12
    public function getPath()
171
    {
172 12
        return $this->path;
173
    }
174
175
    /**
176
     * Get the query component
177
     *
178
     * @return string|null Returns the query component or null if not set.
179
     */
180 12
    public function getQuery()
181
    {
182 12
        return $this->query;
183
    }
184
185
    /**
186
     * Get the fragment identifier
187
     *
188
     * @return string|null Returns the fragment identifier or null if not set.
189
     */
190 12
    public function getFragment()
191
    {
192 12
        return $this->fragment;
193
    }
194
195
    /**
196
     * Find out whether the IRI is absolute
197
     *
198
     * @return bool Returns true if the IRI is absolute, false otherwise.
199
     *
200
     * @api
201
     */
202 78
    public function isAbsolute()
203
    {
204 78
        return (null !== $this->scheme);
205
    }
206
207
    /**
208
     * Get as absolute IRI, i.e., without fragment identifier
209
     *
210
     * @return IRI The absolute IRI, i.e., without fragment identifier
211
     *
212
     * @throws \UnexpectedValueException If the IRI is a relative IRI.
213
     *
214
     * @link http://tools.ietf.org/html/rfc3987#section-2.2 RFC3987 absolute-IRI
215
     *
216
     * @api
217
     */
218 2
    public function getAbsoluteIri()
219
    {
220 2
        if (false === $this->isAbsolute()) {
221 1
            throw new \UnexpectedValueException('Cannot get the absolute IRI of a relative IRI.');
222
        }
223
224 1
        $absolute  = clone $this;
225 1
        $absolute->fragment = null;
226
227 1
        return $absolute;
228
    }
229
230
    /**
231
     * Check whether the passed IRI is equal
232
     *
233
     * @param IRI|string $iri IRI to compare to this instance.
234
     *
235
     * @return bool Returns true if the two IRIs are equal, false otherwise.
236
     *
237
     * @api
238
     */
239 12
    public function equals($iri)
240
    {
241
        // Make sure both instances are strings
242 12
        return ($this->__toString() === (string)$iri);
243
    }
244
245
    /**
246
     * Resolve a (relative) IRI reference against this IRI
247
     *
248
     * @param IRI|string $reference The (relative) IRI reference that should
249
     *                              be resolved against this IRI.
250
     *
251
     * @return IRI The resolved IRI.
252
     *
253
     * @throws \InvalidArgumentException If an invalid IRI is passed.
254
     *
255
     * @link http://tools.ietf.org/html/rfc3986#section-5.2
256
     *
257
     * @api
258
     */
259 654
    public function resolve($reference)
260
    {
261 654
        $reference = new IRI($reference);
262
263 654
        $scheme = null;
264 654
        $authority = null;
265 654
        $path = '';
266 654
        $query = null;
267 654
        $fragment = null;
268
269
        // The Transform References algorithm as specified by RFC3986
270
        // see: http://tools.ietf.org/html/rfc3986#section-5.2.2
271 654
        if ($reference->scheme) {
272 219
            $scheme = $reference->scheme;
273 219
            $authority = $reference->getAuthority();
274 219
            $path = self::removeDotSegments($reference->path);
275 219
            $query = $reference->query;
276 219
        } else {
277 435
            if (null !== $reference->getAuthority()) {
278 33
                $authority = $reference->getAuthority();
279 33
                $path = self::removeDotSegments($reference->path);
280 33
                $query = $reference->query;
281 33
            } else {
282 402
                if (0 === strlen($reference->path)) {
283 48
                    $path = $this->path;
284 48
                    if (null !== $reference->query) {
285 13
                        $query = $reference->query;
286 13
                    } else {
287 35
                        $query = $this->query;
288
                    }
289 48
                } else {
290 354
                    if ('/' === $reference->path[0]) {
291 72
                        $path = self::removeDotSegments($reference->path);
292 72
                    } else {
293
                        // T.path = merge(Base.path, R.path);
294 282
                        if ((null !== $this->getAuthority()) && ('' === $this->path)) {
295 100
                            $path = '/' . $reference->path;
296 100
                        } else {
297 182
                            if (false !== ($end = strrpos($this->path, '/'))) {
298 163
                                $path = substr($this->path, 0, $end + 1);
299 163
                            }
300 182
                            $path .= $reference->path;
301
                        }
302 282
                        $path = self::removeDotSegments($path);
303
                    }
304 354
                    $query = $reference->query;
305
                }
306
307 402
                $authority = $this->getAuthority();
308
            }
309 435
            $scheme = $this->scheme;
310
        }
311
312 654
        $fragment = $reference->fragment;
313
314
315
        // The Component Recomposition algorithm as specified by RFC3986
316
        // see: http://tools.ietf.org/html/rfc3986#section-5.3
317 654
        $result = '';
318
319 654
        if ($scheme) {
320 649
            $result = $scheme . ':';
321 649
        }
322
323 654
        if (null !== $authority) {
324 434
            $result .= '//' . $authority;
325 434
        }
326
327 654
        $result .= $path;
328
329 654
        if (null !== $query) {
330 62
            $result .= '?' . $query;
331 62
        }
332
333 654
        if (null !== $fragment) {
334 61
            $result .= '#' . $fragment;
335 61
        }
336
337 654
        return new IRI($result);
338
    }
339
340
    /**
341
     * Transform this IRI to a IRI reference relative to the passed base IRI
342
     *
343
     * @param IRI|string $base The (relative) IRI reference that should be
344
     *                         be used as base IRI.
345
     * @param bool             Defines whether schema-relative IRIs such
346
     *                         as `//example.com` should be created (`true`)
347
     *                         or not (`false`).
348
     *
349
     * @return IRI The IRI reference relative to the passed base IRI.
350
     *
351
     * @throws \InvalidArgumentException If an invalid IRI is passed.
352
     *
353
     * @api
354
     */
355 57
    public function relativeTo($base, $schemaRelative = false)
356
    {
357 57
        if (false === ($base instanceof IRI)) {
358 57
            $base = new IRI($base);
359 57
        }
360 57
        $relative = clone $this;
361
362
        // Compare scheme
363 57
        if ($relative->scheme !== $base->scheme) {
364 5
            return $relative;
365
        }
366
367
        // Compare authority
368 52
        if ($relative->getAuthority() !== $base->getAuthority()) {
369 8
            if (true === $schemaRelative) {
370 4
                $relative->scheme = null;
371 4
            }
372
373 8
            return $relative;
374
        }
375 44
        $relative->scheme = null;
376 44
        $relative->host = null;
377 44
        $relative->userinfo = null;
378 44
        $relative->port = null;
379
380
        // Compare path
381 44
        $baseSegments     = explode('/', $base->path);
382 44
        $relativeSegments = explode('/', $relative->path);
383 44
        $len = min(count($baseSegments), count($relativeSegments)) - 1;  // do not move beyond last segment
384
385 44
        $pos = 0;
386
387 44
        while (($baseSegments[$pos] === $relativeSegments[$pos]) && ($pos < $len)) {
388 43
            $pos++;
389 43
        }
390
391 44
        $relative->path = '';
392 44
        $numBaseSegments = count($baseSegments) - $pos - 1;
393 44
        if ($numBaseSegments > 0) {
394 18
            $relative->path .= str_repeat('../', $numBaseSegments);
395 18
        }
396
397 44
        if (($baseSegments[$pos] !== $relativeSegments[$pos]) ||
398 14
            ((null === $relative->query) && (null === $relative->fragment)) ||
399 44
            ($base->path === '')) {
400
            // if the two paths differ or if there's neither a query component nor a fragment
401
            // or there is no base path, we need to consider this IRI's path
402
403 38
            if (($relative->path === '') && (false !== strpos($relativeSegments[$pos], ':'))) {
404
                // if the first path segment contains a colon, we need to
405
                // prepend a ./ to distinguish it from an absolute IRI
406 1
                $relative->path .= './';
407 1
            }
408
409 38
            $relative->path .= implode('/', array_slice($relativeSegments, $pos));
410
411
            // .. and ensure that the resulting path isn't empty
412 38
            if (($relative->path === '')) {
413 4
                $relative->path .= './';
414 4
            }
415 38
        }
416
417 44
        if ($relative->query !== $base->query) {
418 5
            return $relative;
419
        }
420
421 39
        if (null !== $relative->fragment) {
422 12
            $relative->query = null;
423 12
        }
424
425 39
        return $relative;
426
    }
427
428
    /**
429
     * Convert an IRI to a relative IRI reference using this IRI as base
430
     *
431
     * This method provides a more convenient interface than the
432
     * {@link IRI::relativeTo()} method if the base IRI stays the same while
433
     * the IRIs to convert to relative IRI references change.
434
     *
435
     * @param  string|IRI $iri The IRI to convert to a relative reference
436
     * @param bool             Defines whether schema-relative IRIs such
437
     *                         as `//example.com` should be created (`true`)
438
     *                         or not (`false`).
439
     *
440
     * @throws \InvalidArgumentException If an invalid IRI is passed.
441
     *
442
     * @see \ML\IRI\IRI::relativeTo()
443
     *
444
     * @return IRI      The relative IRI reference
445
     */
446 57
    public function baseFor($iri, $schemaRelative = false)
447
    {
448 57
        if (false === ($iri instanceof IRI)) {
449 57
            $iri = new IRI($iri);
450 57
        }
451
452 57
        return $iri->relativeTo($this, $schemaRelative);
453
    }
454
455
    /**
456
     * Get a string representation of this IRI object
457
     *
458
     * @return string A string representation of this IRI instance.
459
     *
460
     * @api
461
     */
462 724
    public function __toString()
463
    {
464 724
        $result = '';
465
466 724
        if ($this->scheme) {
467 719
            $result .= $this->scheme . ':';
468 719
        }
469
470 724
        if (null !== ($authority = $this->getAuthority())) {
471 487
            $result .= '//' . $authority;
472 487
        }
473
474 724
        $result .= $this->path;
475
476 724
        if (null !== $this->query) {
477 72
            $result .= '?' . $this->query;
478 72
        }
479
480 724
        if (null !== $this->fragment) {
481 81
            $result .= '#' . $this->fragment;
482 81
        }
483
484 724
        return $result;
485
    }
486
487
    /**
488
     * Parse an IRI into it's components
489
     *
490
     * This is done according to
491
     * {@link http://tools.ietf.org/html/rfc3986#section-3.1 RFC3986}.
492
     *
493
     * @param string $iri The IRI to parse.
494
     */
495 801
    protected function parse($iri)
496
    {
497
        // Parse IRI by using the regular expression as specified by
498
        // http://tools.ietf.org/html/rfc3986#appendix-B
499
        $regex = '|^((?P<scheme>[^:/?#]+):)?' .
500 801
                    '((?P<doubleslash>//)(?P<authority>[^/?#]*))?(?P<path>[^?#]*)' .
501 801
                    '((?P<querydef>\?)(?P<query>[^#]*))?(#(?P<fragment>.*))?|';
502 801
        preg_match($regex, $iri, $match);
503
504
        // Extract scheme
505 801
        if (false === empty($match['scheme'])) {
506 742
            $this->scheme = $match['scheme'];
507 742
        }
508
509
        // Parse authority (http://tools.ietf.org/html/rfc3986#section-3.2)
510 801
        if ('//' === $match['doubleslash']) {
511 603
            if (0 === strlen($match['authority'])) {
512 201
                $this->host = '';
513 201
            } else {
514 430
                $authority = $match['authority'];
515
516
                // Split authority into userinfo and host
517
                // (use last @ to ignore unescaped @ symbols)
518 430 View Code Duplication
                if (false !== ($pos = strrpos($authority, '@'))) {
519 17
                    $this->userinfo = substr($authority, 0, $pos);
520 17
                    $authority = substr($authority, $pos + 1);
521 17
                }
522
523
                // Split authority into host and port
524 430
                $hostEnd = 0;
525 430
                if ((strlen($authority) > 0) &&
526 430
                    ('[' === $authority[0]) &&
527 430
                    (false !== ($pos = strpos($authority, ']')))) {
528 19
                    $hostEnd = $pos;
529 19
                }
530
531 430
                if ((false !== ($pos = strrpos($authority, ':'))) && ($pos > $hostEnd)) {
532 57
                    $this->host = substr($authority, 0, $pos);
533 57
                    $this->port = substr($authority, $pos + 1);
534 57
                } else {
535 395
                    $this->host = $authority;
536
                }
537
            }
538 603
        }
539
540
        // Extract path (http://tools.ietf.org/html/rfc3986#section-3.3)
541
        // The path is always present but might be empty
542 801
        $this->path = $match['path'];
543
544
        // Extract query (http://tools.ietf.org/html/rfc3986#section-3.4)
545 801
        if (false === empty($match['querydef'])) {
546 130
            $this->query = $match['query'];
547 130
        }
548
549
        // Extract fragment (http://tools.ietf.org/html/rfc3986#section-3.5)
550 801
        if (isset($match['fragment'])) {
551 117
            $this->fragment = $match['fragment'];
552 117
        }
553 801
    }
554
555
    /**
556
     * Remove dot-segments
557
     *
558
     * This method removes the special "." and ".." complete path segments
559
     * from an IRI.
560
     *
561
     * @param string $input The IRI from which dot segments should be removed.
562
     *
563
     * @return string The IRI with all dot-segments removed.
564
     *
565
     * @link http://tools.ietf.org/html/rfc3986#section-5.2.4
566
     */
567 606
    private static function removeDotSegments($input)
568
    {
569 606
        $output = '';
570
571 606
        while (strlen($input) > 0) {
572 564
            if (('../' === substr($input, 0, 3)) || ('./' === substr($input, 0, 2))) {
573 1
                $input = substr($input, strpos($input, '/'));
574 564
            } elseif ('/./' === substr($input, 0, 3)) {
575 29
                $input = substr($input, 2);
576 564
            } elseif ('/.' === $input) {
577 10
                $input = '/';
578 564
            } elseif (('/../' === substr($input, 0, 4)) || ('/..' === $input)) {
579 61
                if ($input == '/..') {
580 12
                    $input = '/';
581 12
                } else {
582 54
                    $input = substr($input, 3);
583
                }
584
585 61 View Code Duplication
                if (false !== ($end = strrpos($output, '/'))) {
586 49
                    $output = substr($output, 0, $end);
587 49
                } else {
588 20
                    $output = '';
589
                }
590 564
            } elseif (('..' === $input) || ('.' === $input)) {
591 3
                $input = '';
592 3
            } else {
593 561
                if (false === ($end = strpos($input, '/', 1))) {
594 561
                    $output .= $input;
595 561
                    $input = '';
596 561
                } else {
597 240
                    $output .= substr($input, 0, $end);
598 240
                    $input = substr($input, $end);
599
                }
600
            }
601 564
        }
602 606
        return $output;
603
    }
604
}
605