Completed
Push — master ( 6126ed...f10809 )
by Markus
13:03 queued 07:10
created

IRI.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/*
4
 * (c) Markus Lanthaler <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
10
namespace ML\IRI;
11
12
/**
13
 * IRI represents an IRI as per RFC3987.
14
 *
15
 * @author Markus Lanthaler <[email protected]>
16
 *
17
 * @link http://tools.ietf.org/html/rfc3987 RFC3987
18
 */
19
class IRI
20
{
21
    /**
22
     * The scheme
23
     *
24
     * @var string|null
25
     */
26
    private $scheme = null;
27
28
    /**
29
     * The user information
30
     *
31
     * @var string|null
32
     */
33
    private $userinfo = null;
34
35
    /**
36
     * The host
37
     *
38
     * @var string|null
39
     */
40
    private $host = null;
41
42
    /**
43
     * The port
44
     *
45
     * @var string|null
46
     */
47
    private $port = null;
48
49
    /**
50
     * The path
51
     *
52
     * @var string
53
     */
54
    private $path = '';
55
56
    /**
57
     * The query component
58
     *
59
     * @var string|null
60
     */
61
    private $query = null;
62
63
    /**
64
     * The fragment identifier
65
     *
66
     * @var string|null
67
     */
68
    private $fragment = null;
69
70
71
    /**
72
     * Constructor
73
     *
74
     * @param null|string|IRI $iri The IRI.
75
     *
76
     * @throws \InvalidArgumentException If an invalid IRI is passed.
77
     *
78
     * @api
79
     */
80
    public function __construct($iri = null)
81
    {
82
        if (null === $iri) {
83
            return;
84
        } elseif (is_string($iri)) {
85
            $this->parse($iri);
86
        } elseif ($iri instanceof IRI) {
87
            $this->scheme = $iri->scheme;
88
            $this->userinfo = $iri->userinfo;
89
            $this->host = $iri->host;
90
            $this->port = $iri->port;
91
            $this->path = $iri->path;
92
            $this->query = $iri->query;
93
            $this->fragment = $iri->fragment;
94
        } else {
95
            throw new \InvalidArgumentException(
96
                'Expecting a string or an IRI, got ' .
97
                (is_object($iri) ? get_class($iri) : gettype($iri))
98
            );
99
        }
100
    }
101
102
    /**
103
     * Get the scheme
104
     *
105
     * @return string|null Returns the scheme or null if not set.
106
     */
107
    public function getScheme()
108
    {
109
        return $this->scheme;
110
    }
111
112
    /**
113
     * Get the authority
114
     *
115
     * @return string|null Returns the authority or null if not set.
116
     */
117
    public function getAuthority()
118
    {
119
        $authority = null;
120
121
        if (null !== $this->host) {
122
123
            if (null !== $this->userinfo) {
124
                $authority .= $this->userinfo . '@';
125
            }
126
            $authority .= $this->host;
127
            if (null !== $this->port) {
128
                $authority .= ':' . $this->port;
129
            }
130
        }
131
132
        return $authority;
133
    }
134
135
    /**
136
     * Get the user information
137
     *
138
     * @return string|null Returns the user information or null if not set.
139
     */
140
    public function getUserInfo()
141
    {
142
        return $this->userinfo;
143
    }
144
145
    /**
146
     * Get the host
147
     *
148
     * @return string|null Returns the host or null if not set.
149
     */
150
    public function getHost()
151
    {
152
        return $this->host;
153
    }
154
155
    /**
156
     * Get the port
157
     *
158
     * @return string|null Returns the port or null if not set.
159
     */
160
    public function getPort()
161
    {
162
        return $this->port;
163
    }
164
165
    /**
166
     * Get the path
167
     *
168
     * @return string Returns the path which might be empty.
169
     */
170
    public function getPath()
171
    {
172
        return $this->path;
173
    }
174
175
    /**
176
     * Get the query component
177
     *
178
     * @return string|null Returns the query component or null if not set.
179
     */
180
    public function getQuery()
181
    {
182
        return $this->query;
183
    }
184
185
    /**
186
     * Get the fragment identifier
187
     *
188
     * @return string|null Returns the fragment identifier or null if not set.
189
     */
190
    public function getFragment()
191
    {
192
        return $this->fragment;
193
    }
194
195
    /**
196
     * Find out whether the IRI is absolute
197
     *
198
     * @return bool Returns true if the IRI is absolute, false otherwise.
199
     *
200
     * @api
201
     */
202
    public function isAbsolute()
203
    {
204
        return (null !== $this->scheme);
205
    }
206
207
    /**
208
     * Get as absolute IRI, i.e., without fragment identifier
209
     *
210
     * @return IRI The absolute IRI, i.e., without fragment identifier
211
     *
212
     * @throws \UnexpectedValueException If the IRI is a relative IRI.
213
     *
214
     * @link http://tools.ietf.org/html/rfc3987#section-2.2 RFC3987 absolute-IRI
215
     *
216
     * @api
217
     */
218
    public function getAbsoluteIri()
219
    {
220
        if (false === $this->isAbsolute()) {
221
            throw new \UnexpectedValueException('Cannot get the absolute IRI of a relative IRI.');
222
        }
223
224
        $absolute  = clone $this;
225
        $absolute->fragment = null;
226
227
        return $absolute;
228
    }
229
230
    /**
231
     * Check whether the passed IRI is equal
232
     *
233
     * @param IRI|string $iri IRI to compare to this instance.
234
     *
235
     * @return bool Returns true if the two IRIs are equal, false otherwise.
236
     *
237
     * @api
238
     */
239
    public function equals($iri)
240
    {
241
        // Make sure both instances are strings
242
        return ($this->__toString() === (string)$iri);
243
    }
244
245
    /**
246
     * Resolve a (relative) IRI reference against this IRI
247
     *
248
     * @param IRI|string $reference The (relative) IRI reference that should
249
     *                              be resolved against this IRI.
250
     *
251
     * @return IRI The resolved IRI.
252
     *
253
     * @throws \InvalidArgumentException If an invalid IRI is passed.
254
     *
255
     * @link http://tools.ietf.org/html/rfc3986#section-5.2
256
     *
257
     * @api
258
     */
259
    public function resolve($reference)
260
    {
261
        $reference = new IRI($reference);
262
263
        $scheme = null;
264
        $authority = null;
265
        $path = '';
266
        $query = null;
267
        $fragment = null;
268
269
        // The Transform References algorithm as specified by RFC3986
270
        // see: http://tools.ietf.org/html/rfc3986#section-5.2.2
271
        if ($reference->scheme) {
272
            $scheme = $reference->scheme;
273
            $authority = $reference->getAuthority();
274
            $path = self::removeDotSegments($reference->path);
275
            $query = $reference->query;
276
        } else {
277
            if (null !== $reference->getAuthority()) {
278
                $authority = $reference->getAuthority();
279
                $path = self::removeDotSegments($reference->path);
280
                $query = $reference->query;
281
            } else {
282
                if (0 === strlen($reference->path)) {
283
                    $path = $this->path;
284
                    if (null !== $reference->query) {
285
                        $query = $reference->query;
286
                    } else {
287
                        $query = $this->query;
288
                    }
289
                } else {
290
                    if ('/' === $reference->path[0]) {
291
                        $path = self::removeDotSegments($reference->path);
292
                    } else {
293
                        // T.path = merge(Base.path, R.path);
294
                        if ((null !== $this->getAuthority()) && ('' === $this->path)) {
295
                            $path = '/' . $reference->path;
296
                        } else {
297
                            if (false !== ($end = strrpos($this->path, '/'))) {
298
                                $path = substr($this->path, 0, $end + 1);
299
                            }
300
                            $path .= $reference->path;
301
                        }
302
                        $path = self::removeDotSegments($path);
303
                    }
304
                    $query = $reference->query;
305
                }
306
307
                $authority = $this->getAuthority();
308
            }
309
            $scheme = $this->scheme;
310
        }
311
312
        $fragment = $reference->fragment;
313
314
315
        // The Component Recomposition algorithm as specified by RFC3986
316
        // see: http://tools.ietf.org/html/rfc3986#section-5.3
317
        $result = '';
318
319
        if ($scheme) {
320
            $result = $scheme . ':';
321
        }
322
323
        if (null !== $authority) {
324
            $result .= '//' . $authority;
325
        }
326
327
        $result .= $path;
328
329
        if (null !== $query) {
330
            $result .= '?' . $query;
331
        }
332
333
        if (null !== $fragment) {
334
            $result .= '#' . $fragment;
335
        }
336
337
        return new IRI($result);
338
    }
339
340
    /**
341
     * Transform this IRI to a IRI reference relative to the passed base IRI
342
     *
343
     * @param IRI|string $base The (relative) IRI reference that should be
344
     *                         be used as base IRI.
345
     * @param bool             Defines whether schema-relative IRIs such
346
     *                         as `//example.com` should be created (`true`)
347
     *                         or not (`false`).
348
     *
349
     * @return IRI The IRI reference relative to the passed base IRI.
350
     *
351
     * @throws \InvalidArgumentException If an invalid IRI is passed.
352
     *
353
     * @api
354
     */
355
    public function relativeTo($base, $schemaRelative = false)
356
    {
357
        if (false === ($base instanceof IRI)) {
358
            $base = new IRI($base);
359
        }
360
        $relative = clone $this;
361
362
        // Compare scheme
363
        if ($relative->scheme !== $base->scheme) {
364
            return $relative;
365
        }
366
367
        // Compare authority
368
        if ($relative->getAuthority() !== $base->getAuthority()) {
369
            if (true === $schemaRelative) {
370
                $relative->scheme = null;
371
            }
372
373
            return $relative;
374
        }
375
        $relative->scheme = null;
376
        $relative->host = null;
377
        $relative->userinfo = null;
378
        $relative->port = null;
379
380
        // Compare path
381
        $baseSegments     = explode('/', $base->path);
382
        $relativeSegments = explode('/', $relative->path);
383
        $len = min(count($baseSegments), count($relativeSegments)) - 1;  // do not move beyond last segment
384
385
        $pos = 0;
386
387
        while (($baseSegments[$pos] === $relativeSegments[$pos]) && ($pos < $len)) {
388
            $pos++;
389
        }
390
391
        $relative->path = '';
392
        $numBaseSegments = count($baseSegments) - $pos - 1;
393
        if ($numBaseSegments > 0) {
394
            $relative->path .= str_repeat('../', $numBaseSegments);
395
        }
396
397
        if (($baseSegments[$pos] !== $relativeSegments[$pos]) ||
0 ignored issues
show
Avoid IF statements that are always true or false
Loading history...
398
            ((null === $relative->query) && (null === $relative->fragment)) ||
399
            ($base->path === '') {
400
            // if the two paths differ or if there's neither a query component nor a fragment
401
            // or there is no base path, we need to consider this IRI's path
402
403
            if (($relative->path === '') && (false !== strpos($relativeSegments[$pos], ':'))) {
0 ignored issues
show
This code did not parse for me. Apparently, there is an error somewhere around this line:

Syntax error, unexpected T_IF
Loading history...
404
                // if the first path segment contains a colon, we need to
405
                // prepend a ./ to distinguish it from an absolute IRI
406
                $relative->path .= './';
407
            }
408
409
            $relative->path .= implode('/', array_slice($relativeSegments, $pos));
410
411
            // .. and ensure that the resulting path isn't empty
412
            if (($relative->path === '')) {
413
                $relative->path .= './';
414
            }
415
        }
416
417
        if ($relative->query !== $base->query) {
418
            return $relative;
419
        }
420
421
        if (null !== $relative->fragment) {
422
            $relative->query = null;
423
        }
424
425
        return $relative;
426
    }
427
428
    /**
429
     * Convert an IRI to a relative IRI reference using this IRI as base
430
     *
431
     * This method provides a more convenient interface than the
432
     * {@link IRI::relativeTo()} method if the base IRI stays the same while
433
     * the IRIs to convert to relative IRI references change.
434
     *
435
     * @param  string|IRI $iri The IRI to convert to a relative reference
436
     * @param bool             Defines whether schema-relative IRIs such
437
     *                         as `//example.com` should be created (`true`)
438
     *                         or not (`false`).
439
     *
440
     * @throws \InvalidArgumentException If an invalid IRI is passed.
441
     *
442
     * @see \ML\IRI\IRI::relativeTo()
443
     *
444
     * @return IRI      The relative IRI reference
445
     */
446
    public function baseFor($iri, $schemaRelative = false)
447
    {
448
        if (false === ($iri instanceof IRI)) {
449
            $iri = new IRI($iri);
450
        }
451
452
        return $iri->relativeTo($this, $schemaRelative);
453
    }
454
455
    /**
456
     * Get a string representation of this IRI object
457
     *
458
     * @return string A string representation of this IRI instance.
459
     *
460
     * @api
461
     */
462
    public function __toString()
463
    {
464
        $result = '';
465
466
        if ($this->scheme) {
467
            $result .= $this->scheme . ':';
468
        }
469
470
        if (null !== ($authority = $this->getAuthority())) {
471
            $result .= '//' . $authority;
472
        }
473
474
        $result .= $this->path;
475
476
        if (null !== $this->query) {
477
            $result .= '?' . $this->query;
478
        }
479
480
        if (null !== $this->fragment) {
481
            $result .= '#' . $this->fragment;
482
        }
483
484
        return $result;
485
    }
486
487
    /**
488
     * Parse an IRI into it's components
489
     *
490
     * This is done according to
491
     * {@link http://tools.ietf.org/html/rfc3986#section-3.1 RFC3986}.
492
     *
493
     * @param string $iri The IRI to parse.
494
     */
495
    protected function parse($iri)
496
    {
497
        // Parse IRI by using the regular expression as specified by
498
        // http://tools.ietf.org/html/rfc3986#appendix-B
499
        $regex = '|^((?P<scheme>[^:/?#]+):)?' .
500
                    '((?P<doubleslash>//)(?P<authority>[^/?#]*))?(?P<path>[^?#]*)' .
501
                    '((?P<querydef>\?)(?P<query>[^#]*))?(#(?P<fragment>.*))?|';
502
        preg_match($regex, $iri, $match);
503
504
        // Extract scheme
505
        if (false === empty($match['scheme'])) {
506
            $this->scheme = $match['scheme'];
507
        }
508
509
        // Parse authority (http://tools.ietf.org/html/rfc3986#section-3.2)
510
        if ('//' === $match['doubleslash']) {
511
            if (0 === strlen($match['authority'])) {
512
                $this->host = '';
513
            } else {
514
                $authority = $match['authority'];
515
516
                // Split authority into userinfo and host
517
                // (use last @ to ignore unescaped @ symbols)
518
                if (false !== ($pos = strrpos($authority, '@'))) {
519
                    $this->userinfo = substr($authority, 0, $pos);
520
                    $authority = substr($authority, $pos + 1);
521
                }
522
523
                // Split authority into host and port
524
                $hostEnd = 0;
525
                if (('[' === $authority[0]) && (false !== ($pos = strpos($authority, ']')))) {
526
                    $hostEnd = $pos;
527
                }
528
529
                if ((false !== ($pos = strrpos($authority, ':'))) && ($pos > $hostEnd)) {
530
                    $this->host = substr($authority, 0, $pos);
531
                    $this->port = substr($authority, $pos + 1);
532
                } else {
533
                    $this->host = $authority;
534
                }
535
            }
536
        }
537
538
        // Extract path (http://tools.ietf.org/html/rfc3986#section-3.3)
539
        // The path is always present but might be empty
540
        $this->path = $match['path'];
541
542
        // Extract query (http://tools.ietf.org/html/rfc3986#section-3.4)
543
        if (false === empty($match['querydef'])) {
544
            $this->query = $match['query'];
545
        }
546
547
        // Extract fragment (http://tools.ietf.org/html/rfc3986#section-3.5)
548
        if (isset($match['fragment'])) {
549
            $this->fragment = $match['fragment'];
550
        }
551
    }
552
553
    /**
554
     * Remove dot-segments
555
     *
556
     * This method removes the special "." and ".." complete path segments
557
     * from an IRI.
558
     *
559
     * @param string $input The IRI from which dot segments should be removed.
560
     *
561
     * @return string The IRI with all dot-segments removed.
562
     *
563
     * @link http://tools.ietf.org/html/rfc3986#section-5.2.4
564
     */
565
    private static function removeDotSegments($input)
566
    {
567
        $output = '';
568
569
        while (strlen($input) > 0) {
570
            if (('../' === substr($input, 0, 3)) || ('./' === substr($input, 0, 2))) {
571
                $input = substr($input, strpos($input, '/'));
572
            } elseif ('/./' === substr($input, 0, 3)) {
573
                $input = substr($input, 2);
574
            } elseif ('/.' === $input) {
575
                $input = '/';
576
            } elseif (('/../' === substr($input, 0, 4)) || ('/..' === $input)) {
577
                if ($input == '/..') {
578
                    $input = '/';
579
                } else {
580
                    $input = substr($input, 3);
581
                }
582
583
                if (false !== ($end = strrpos($output, '/'))) {
584
                    $output = substr($output, 0, $end);
585
                } else {
586
                    $output = '';
587
                }
588
            } elseif (('..' === $input) || ('.' === $input)) {
589
                $input = '';
590
            } else {
591
                if (false === ($end = strpos($input, '/', 1))) {
592
                    $output .= $input;
593
                    $input = '';
594
                } else {
595
                    $output .= substr($input, 0, $end);
596
                    $input = substr($input, $end);
597
                }
598
            }
599
        }
600
        return $output;
601
    }
602
}
603