1 | <?php |
||
2 | |||
3 | /* |
||
4 | * (c) Markus Lanthaler <[email protected]> |
||
5 | * |
||
6 | * For the full copyright and license information, please view the LICENSE |
||
7 | * file that was distributed with this source code. |
||
8 | */ |
||
9 | |||
10 | namespace ML\IRI; |
||
11 | |||
12 | /** |
||
13 | * IRI represents an IRI as per RFC3987. |
||
14 | * |
||
15 | * @author Markus Lanthaler <[email protected]> |
||
16 | * |
||
17 | * @link http://tools.ietf.org/html/rfc3987 RFC3987 |
||
18 | */ |
||
19 | class IRI |
||
20 | { |
||
21 | /** |
||
22 | * The scheme |
||
23 | * |
||
24 | * @var string|null |
||
25 | */ |
||
26 | private $scheme = null; |
||
27 | |||
28 | /** |
||
29 | * The user information |
||
30 | * |
||
31 | * @var string|null |
||
32 | */ |
||
33 | private $userinfo = null; |
||
34 | |||
35 | /** |
||
36 | * The host |
||
37 | * |
||
38 | * @var string|null |
||
39 | */ |
||
40 | private $host = null; |
||
41 | |||
42 | /** |
||
43 | * The port |
||
44 | * |
||
45 | * @var string|null |
||
46 | */ |
||
47 | private $port = null; |
||
48 | |||
49 | /** |
||
50 | * The path |
||
51 | * |
||
52 | * @var string |
||
53 | */ |
||
54 | private $path = ''; |
||
55 | |||
56 | /** |
||
57 | * The query component |
||
58 | * |
||
59 | * @var string|null |
||
60 | */ |
||
61 | private $query = null; |
||
62 | |||
63 | /** |
||
64 | * The fragment identifier |
||
65 | * |
||
66 | * @var string|null |
||
67 | */ |
||
68 | private $fragment = null; |
||
69 | |||
70 | |||
71 | /** |
||
72 | * Constructor |
||
73 | * |
||
74 | * @param null|string|IRI $iri The IRI. |
||
75 | * |
||
76 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
77 | * |
||
78 | * @api |
||
79 | */ |
||
80 | 802 | public function __construct($iri = null) |
|
81 | { |
||
82 | 802 | if (null === $iri) { |
|
83 | 4 | return; |
|
84 | 802 | } elseif (is_string($iri)) { |
|
85 | 801 | $this->parse($iri); |
|
86 | 802 | } elseif ($iri instanceof IRI) { |
|
87 | 12 | $this->scheme = $iri->scheme; |
|
88 | 12 | $this->userinfo = $iri->userinfo; |
|
89 | 12 | $this->host = $iri->host; |
|
90 | 12 | $this->port = $iri->port; |
|
91 | 12 | $this->path = $iri->path; |
|
92 | 12 | $this->query = $iri->query; |
|
93 | 12 | $this->fragment = $iri->fragment; |
|
94 | 12 | } else { |
|
95 | 1 | throw new \InvalidArgumentException( |
|
96 | 'Expecting a string or an IRI, got ' . |
||
97 | 1 | (is_object($iri) ? get_class($iri) : gettype($iri)) |
|
98 | 1 | ); |
|
99 | } |
||
100 | 801 | } |
|
101 | |||
102 | /** |
||
103 | * Get the scheme |
||
104 | * |
||
105 | * @return string|null Returns the scheme or null if not set. |
||
106 | */ |
||
107 | 12 | public function getScheme() |
|
108 | { |
||
109 | 12 | return $this->scheme; |
|
110 | } |
||
111 | |||
112 | /** |
||
113 | * Get the authority |
||
114 | * |
||
115 | * @return string|null Returns the authority or null if not set. |
||
116 | */ |
||
117 | 724 | public function getAuthority() |
|
118 | { |
||
119 | 724 | $authority = null; |
|
120 | |||
121 | 724 | if (null !== $this->host) { |
|
122 | |||
123 | 487 | if (null !== $this->userinfo) { |
|
124 | 17 | $authority .= $this->userinfo . '@'; |
|
125 | 17 | } |
|
126 | 487 | $authority .= $this->host; |
|
127 | 487 | if (null !== $this->port) { |
|
128 | 53 | $authority .= ':' . $this->port; |
|
129 | 53 | } |
|
130 | 487 | } |
|
131 | |||
132 | 724 | return $authority; |
|
133 | } |
||
134 | |||
135 | /** |
||
136 | * Get the user information |
||
137 | * |
||
138 | * @return string|null Returns the user information or null if not set. |
||
139 | */ |
||
140 | 12 | public function getUserInfo() |
|
141 | { |
||
142 | 12 | return $this->userinfo; |
|
143 | } |
||
144 | |||
145 | /** |
||
146 | * Get the host |
||
147 | * |
||
148 | * @return string|null Returns the host or null if not set. |
||
149 | */ |
||
150 | 12 | public function getHost() |
|
151 | { |
||
152 | 12 | return $this->host; |
|
153 | } |
||
154 | |||
155 | /** |
||
156 | * Get the port |
||
157 | * |
||
158 | * @return string|null Returns the port or null if not set. |
||
159 | */ |
||
160 | 12 | public function getPort() |
|
161 | { |
||
162 | 12 | return $this->port; |
|
163 | } |
||
164 | |||
165 | /** |
||
166 | * Get the path |
||
167 | * |
||
168 | * @return string Returns the path which might be empty. |
||
169 | */ |
||
170 | 12 | public function getPath() |
|
171 | { |
||
172 | 12 | return $this->path; |
|
173 | } |
||
174 | |||
175 | /** |
||
176 | * Get the query component |
||
177 | * |
||
178 | * @return string|null Returns the query component or null if not set. |
||
179 | */ |
||
180 | 12 | public function getQuery() |
|
181 | { |
||
182 | 12 | return $this->query; |
|
183 | } |
||
184 | |||
185 | /** |
||
186 | * Get the fragment identifier |
||
187 | * |
||
188 | * @return string|null Returns the fragment identifier or null if not set. |
||
189 | */ |
||
190 | 12 | public function getFragment() |
|
191 | { |
||
192 | 12 | return $this->fragment; |
|
193 | } |
||
194 | |||
195 | /** |
||
196 | * Find out whether the IRI is absolute |
||
197 | * |
||
198 | * @return bool Returns true if the IRI is absolute, false otherwise. |
||
199 | * |
||
200 | * @api |
||
201 | */ |
||
202 | 78 | public function isAbsolute() |
|
203 | { |
||
204 | 78 | return (null !== $this->scheme); |
|
205 | } |
||
206 | |||
207 | /** |
||
208 | * Get as absolute IRI, i.e., without fragment identifier |
||
209 | * |
||
210 | * @return IRI The absolute IRI, i.e., without fragment identifier |
||
211 | * |
||
212 | * @throws \UnexpectedValueException If the IRI is a relative IRI. |
||
213 | * |
||
214 | * @link http://tools.ietf.org/html/rfc3987#section-2.2 RFC3987 absolute-IRI |
||
215 | * |
||
216 | * @api |
||
217 | */ |
||
218 | 2 | public function getAbsoluteIri() |
|
219 | { |
||
220 | 2 | if (false === $this->isAbsolute()) { |
|
221 | 1 | throw new \UnexpectedValueException('Cannot get the absolute IRI of a relative IRI.'); |
|
222 | } |
||
223 | |||
224 | 1 | $absolute = clone $this; |
|
225 | 1 | $absolute->fragment = null; |
|
226 | |||
227 | 1 | return $absolute; |
|
228 | } |
||
229 | |||
230 | /** |
||
231 | * Check whether the passed IRI is equal |
||
232 | * |
||
233 | * @param IRI|string $iri IRI to compare to this instance. |
||
234 | * |
||
235 | * @return bool Returns true if the two IRIs are equal, false otherwise. |
||
236 | * |
||
237 | * @api |
||
238 | */ |
||
239 | 12 | public function equals($iri) |
|
240 | { |
||
241 | // Make sure both instances are strings |
||
242 | 12 | return ($this->__toString() === (string)$iri); |
|
243 | } |
||
244 | |||
245 | /** |
||
246 | * Resolve a (relative) IRI reference against this IRI |
||
247 | * |
||
248 | * @param IRI|string $reference The (relative) IRI reference that should |
||
249 | * be resolved against this IRI. |
||
250 | * |
||
251 | * @return IRI The resolved IRI. |
||
252 | * |
||
253 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
254 | * |
||
255 | * @link http://tools.ietf.org/html/rfc3986#section-5.2 |
||
256 | * |
||
257 | * @api |
||
258 | */ |
||
259 | 654 | public function resolve($reference) |
|
260 | { |
||
261 | 654 | $reference = new IRI($reference); |
|
262 | |||
263 | 654 | $scheme = null; |
|
264 | 654 | $authority = null; |
|
265 | 654 | $path = ''; |
|
266 | 654 | $query = null; |
|
267 | 654 | $fragment = null; |
|
268 | |||
269 | // The Transform References algorithm as specified by RFC3986 |
||
270 | // see: http://tools.ietf.org/html/rfc3986#section-5.2.2 |
||
271 | 654 | if ($reference->scheme) { |
|
272 | 219 | $scheme = $reference->scheme; |
|
273 | 219 | $authority = $reference->getAuthority(); |
|
274 | 219 | $path = self::removeDotSegments($reference->path); |
|
275 | 219 | $query = $reference->query; |
|
276 | 219 | } else { |
|
277 | 435 | if (null !== $reference->getAuthority()) { |
|
278 | 33 | $authority = $reference->getAuthority(); |
|
279 | 33 | $path = self::removeDotSegments($reference->path); |
|
280 | 33 | $query = $reference->query; |
|
281 | 33 | } else { |
|
282 | 402 | if (0 === strlen($reference->path)) { |
|
283 | 48 | $path = $this->path; |
|
284 | 48 | if (null !== $reference->query) { |
|
285 | 13 | $query = $reference->query; |
|
286 | 13 | } else { |
|
287 | 35 | $query = $this->query; |
|
288 | } |
||
289 | 48 | } else { |
|
290 | 354 | if ('/' === $reference->path[0]) { |
|
291 | 72 | $path = self::removeDotSegments($reference->path); |
|
292 | 72 | } else { |
|
293 | // T.path = merge(Base.path, R.path); |
||
294 | 282 | if ((null !== $this->getAuthority()) && ('' === $this->path)) { |
|
295 | 100 | $path = '/' . $reference->path; |
|
296 | 100 | } else { |
|
297 | 182 | if (false !== ($end = strrpos($this->path, '/'))) { |
|
298 | 163 | $path = substr($this->path, 0, $end + 1); |
|
299 | 163 | } |
|
300 | 182 | $path .= $reference->path; |
|
301 | } |
||
302 | 282 | $path = self::removeDotSegments($path); |
|
303 | } |
||
304 | 354 | $query = $reference->query; |
|
305 | } |
||
306 | |||
307 | 402 | $authority = $this->getAuthority(); |
|
308 | } |
||
309 | 435 | $scheme = $this->scheme; |
|
310 | } |
||
311 | |||
312 | 654 | $fragment = $reference->fragment; |
|
313 | |||
314 | |||
315 | // The Component Recomposition algorithm as specified by RFC3986 |
||
316 | // see: http://tools.ietf.org/html/rfc3986#section-5.3 |
||
317 | 654 | $result = ''; |
|
318 | |||
319 | 654 | if ($scheme) { |
|
320 | 649 | $result = $scheme . ':'; |
|
321 | 649 | } |
|
322 | |||
323 | 654 | if (null !== $authority) { |
|
324 | 434 | $result .= '//' . $authority; |
|
325 | 434 | } |
|
326 | |||
327 | 654 | $result .= $path; |
|
328 | |||
329 | 654 | if (null !== $query) { |
|
330 | 62 | $result .= '?' . $query; |
|
331 | 62 | } |
|
332 | |||
333 | 654 | if (null !== $fragment) { |
|
334 | 61 | $result .= '#' . $fragment; |
|
335 | 61 | } |
|
336 | |||
337 | 654 | return new IRI($result); |
|
338 | } |
||
339 | |||
340 | /** |
||
341 | * Transform this IRI to a IRI reference relative to the passed base IRI |
||
342 | * |
||
343 | * @param IRI|string $base The (relative) IRI reference that should be |
||
344 | * be used as base IRI. |
||
345 | * @param bool Defines whether schema-relative IRIs such |
||
346 | * as `//example.com` should be created (`true`) |
||
347 | * or not (`false`). |
||
348 | * |
||
349 | * @return IRI The IRI reference relative to the passed base IRI. |
||
350 | * |
||
351 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
352 | * |
||
353 | * @api |
||
354 | */ |
||
355 | 57 | public function relativeTo($base, $schemaRelative = false) |
|
356 | { |
||
357 | 57 | if (false === ($base instanceof IRI)) { |
|
358 | 57 | $base = new IRI($base); |
|
359 | 57 | } |
|
360 | 57 | $relative = clone $this; |
|
361 | |||
362 | // Compare scheme |
||
363 | 57 | if ($relative->scheme !== $base->scheme) { |
|
364 | 5 | return $relative; |
|
365 | } |
||
366 | |||
367 | // Compare authority |
||
368 | 52 | if ($relative->getAuthority() !== $base->getAuthority()) { |
|
369 | 8 | if (true === $schemaRelative) { |
|
370 | 4 | $relative->scheme = null; |
|
371 | 4 | } |
|
372 | |||
373 | 8 | return $relative; |
|
374 | } |
||
375 | 44 | $relative->scheme = null; |
|
376 | 44 | $relative->host = null; |
|
377 | 44 | $relative->userinfo = null; |
|
378 | 44 | $relative->port = null; |
|
379 | |||
380 | // Compare path |
||
381 | 44 | $baseSegments = explode('/', $base->path); |
|
382 | 44 | $relativeSegments = explode('/', $relative->path); |
|
383 | 44 | $len = min(count($baseSegments), count($relativeSegments)) - 1; // do not move beyond last segment |
|
384 | |||
385 | 44 | $pos = 0; |
|
386 | |||
387 | 44 | while (($baseSegments[$pos] === $relativeSegments[$pos]) && ($pos < $len)) { |
|
388 | 43 | $pos++; |
|
389 | 43 | } |
|
390 | |||
391 | 44 | $relative->path = ''; |
|
392 | 44 | $numBaseSegments = count($baseSegments) - $pos - 1; |
|
393 | 44 | if ($numBaseSegments > 0) { |
|
394 | 18 | $relative->path .= str_repeat('../', $numBaseSegments); |
|
395 | 18 | } |
|
396 | |||
397 | 44 | if (($baseSegments[$pos] !== $relativeSegments[$pos]) || |
|
398 | 14 | ((null === $relative->query) && (null === $relative->fragment)) || |
|
399 | 44 | ($base->path === '')) { |
|
400 | // if the two paths differ or if there's neither a query component nor a fragment |
||
401 | // or there is no base path, we need to consider this IRI's path |
||
402 | |||
403 | 38 | if (($relative->path === '') && (false !== strpos($relativeSegments[$pos], ':'))) { |
|
404 | // if the first path segment contains a colon, we need to |
||
405 | // prepend a ./ to distinguish it from an absolute IRI |
||
406 | 1 | $relative->path .= './'; |
|
407 | 1 | } |
|
408 | |||
409 | 38 | $relative->path .= implode('/', array_slice($relativeSegments, $pos)); |
|
410 | |||
411 | // .. and ensure that the resulting path isn't empty |
||
412 | 38 | if (($relative->path === '')) { |
|
413 | 4 | $relative->path .= './'; |
|
414 | 4 | } |
|
415 | 38 | } |
|
416 | |||
417 | 44 | if ($relative->query !== $base->query) { |
|
418 | 5 | return $relative; |
|
419 | } |
||
420 | |||
421 | 39 | if (null !== $relative->fragment) { |
|
422 | 12 | $relative->query = null; |
|
423 | 12 | } |
|
424 | |||
425 | 39 | return $relative; |
|
426 | } |
||
427 | |||
428 | /** |
||
429 | * Convert an IRI to a relative IRI reference using this IRI as base |
||
430 | * |
||
431 | * This method provides a more convenient interface than the |
||
432 | * {@link IRI::relativeTo()} method if the base IRI stays the same while |
||
433 | * the IRIs to convert to relative IRI references change. |
||
434 | * |
||
435 | * @param string|IRI $iri The IRI to convert to a relative reference |
||
436 | * @param bool Defines whether schema-relative IRIs such |
||
437 | * as `//example.com` should be created (`true`) |
||
438 | * or not (`false`). |
||
439 | * |
||
440 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
441 | * |
||
442 | * @see \ML\IRI\IRI::relativeTo() |
||
443 | * |
||
444 | * @return IRI The relative IRI reference |
||
445 | */ |
||
446 | 57 | public function baseFor($iri, $schemaRelative = false) |
|
447 | { |
||
448 | 57 | if (false === ($iri instanceof IRI)) { |
|
449 | 57 | $iri = new IRI($iri); |
|
450 | 57 | } |
|
451 | |||
452 | 57 | return $iri->relativeTo($this, $schemaRelative); |
|
453 | } |
||
454 | |||
455 | /** |
||
456 | * Get a string representation of this IRI object |
||
457 | * |
||
458 | * @return string A string representation of this IRI instance. |
||
459 | * |
||
460 | * @api |
||
461 | */ |
||
462 | 724 | public function __toString() |
|
463 | { |
||
464 | 724 | $result = ''; |
|
465 | |||
466 | 724 | if ($this->scheme) { |
|
467 | 719 | $result .= $this->scheme . ':'; |
|
468 | 719 | } |
|
469 | |||
470 | 724 | if (null !== ($authority = $this->getAuthority())) { |
|
471 | 487 | $result .= '//' . $authority; |
|
472 | 487 | } |
|
473 | |||
474 | 724 | $result .= $this->path; |
|
475 | |||
476 | 724 | if (null !== $this->query) { |
|
477 | 72 | $result .= '?' . $this->query; |
|
478 | 72 | } |
|
479 | |||
480 | 724 | if (null !== $this->fragment) { |
|
481 | 81 | $result .= '#' . $this->fragment; |
|
482 | 81 | } |
|
483 | |||
484 | 724 | return $result; |
|
485 | } |
||
486 | |||
487 | /** |
||
488 | * Parse an IRI into it's components |
||
489 | * |
||
490 | * This is done according to |
||
491 | * {@link http://tools.ietf.org/html/rfc3986#section-3.1 RFC3986}. |
||
492 | * |
||
493 | * @param string $iri The IRI to parse. |
||
494 | */ |
||
495 | 801 | protected function parse($iri) |
|
496 | { |
||
497 | // Parse IRI by using the regular expression as specified by |
||
498 | // http://tools.ietf.org/html/rfc3986#appendix-B |
||
499 | $regex = '|^((?P<scheme>[^:/?#]+):)?' . |
||
500 | 801 | '((?P<doubleslash>//)(?P<authority>[^/?#]*))?(?P<path>[^?#]*)' . |
|
501 | 801 | '((?P<querydef>\?)(?P<query>[^#]*))?(#(?P<fragment>.*))?|'; |
|
502 | 801 | preg_match($regex, $iri, $match); |
|
503 | |||
504 | // Extract scheme |
||
505 | 801 | if (false === empty($match['scheme'])) { |
|
506 | 742 | $this->scheme = $match['scheme']; |
|
507 | 742 | } |
|
508 | |||
509 | // Parse authority (http://tools.ietf.org/html/rfc3986#section-3.2) |
||
510 | 801 | if ('//' === $match['doubleslash']) { |
|
511 | 603 | if (0 === strlen($match['authority'])) { |
|
512 | 201 | $this->host = ''; |
|
513 | 201 | } else { |
|
514 | 430 | $authority = $match['authority']; |
|
515 | |||
516 | // Split authority into userinfo and host |
||
517 | // (use last @ to ignore unescaped @ symbols) |
||
518 | 430 | View Code Duplication | if (false !== ($pos = strrpos($authority, '@'))) { |
519 | 17 | $this->userinfo = substr($authority, 0, $pos); |
|
520 | 17 | $authority = substr($authority, $pos + 1); |
|
521 | 17 | } |
|
522 | |||
523 | // Split authority into host and port |
||
524 | 430 | $hostEnd = 0; |
|
525 | 430 | if ((strlen($authority) > 0) && |
|
526 | 430 | ('[' === $authority[0]) && |
|
527 | 430 | (false !== ($pos = strpos($authority, ']')))) { |
|
528 | 19 | $hostEnd = $pos; |
|
529 | 19 | } |
|
530 | |||
531 | 430 | if ((false !== ($pos = strrpos($authority, ':'))) && ($pos > $hostEnd)) { |
|
532 | 57 | $this->host = substr($authority, 0, $pos); |
|
533 | 57 | $this->port = substr($authority, $pos + 1); |
|
534 | 57 | } else { |
|
535 | 395 | $this->host = $authority; |
|
536 | } |
||
537 | } |
||
538 | 603 | } |
|
539 | |||
540 | // Extract path (http://tools.ietf.org/html/rfc3986#section-3.3) |
||
541 | // The path is always present but might be empty |
||
542 | 801 | $this->path = $match['path']; |
|
543 | |||
544 | // Extract query (http://tools.ietf.org/html/rfc3986#section-3.4) |
||
545 | 801 | if (false === empty($match['querydef'])) { |
|
546 | 130 | $this->query = $match['query']; |
|
547 | 130 | } |
|
548 | |||
549 | // Extract fragment (http://tools.ietf.org/html/rfc3986#section-3.5) |
||
550 | 801 | if (isset($match['fragment'])) { |
|
551 | 117 | $this->fragment = $match['fragment']; |
|
552 | 117 | } |
|
553 | 801 | } |
|
554 | |||
555 | /** |
||
556 | * Remove dot-segments |
||
557 | * |
||
558 | * This method removes the special "." and ".." complete path segments |
||
559 | * from an IRI. |
||
560 | * |
||
561 | * @param string $input The IRI from which dot segments should be removed. |
||
562 | * |
||
563 | * @return string The IRI with all dot-segments removed. |
||
564 | * |
||
565 | * @link http://tools.ietf.org/html/rfc3986#section-5.2.4 |
||
566 | */ |
||
567 | 606 | private static function removeDotSegments($input) |
|
568 | { |
||
569 | 606 | $output = ''; |
|
570 | |||
571 | 606 | while (strlen($input) > 0) { |
|
572 | 564 | if (('../' === substr($input, 0, 3)) || ('./' === substr($input, 0, 2))) { |
|
573 | 1 | $input = substr($input, strpos($input, '/')); |
|
574 | 564 | } elseif ('/./' === substr($input, 0, 3)) { |
|
575 | 29 | $input = substr($input, 2); |
|
576 | 564 | } elseif ('/.' === $input) { |
|
577 | 10 | $input = '/'; |
|
578 | 564 | } elseif (('/../' === substr($input, 0, 4)) || ('/..' === $input)) { |
|
579 | 61 | if ($input == '/..') { |
|
580 | 12 | $input = '/'; |
|
581 | 12 | } else { |
|
582 | 54 | $input = substr($input, 3); |
|
583 | } |
||
584 | |||
585 | 61 | View Code Duplication | if (false !== ($end = strrpos($output, '/'))) { |
586 | 49 | $output = substr($output, 0, $end); |
|
587 | 49 | } else { |
|
588 | 20 | $output = ''; |
|
589 | } |
||
590 | 564 | } elseif (('..' === $input) || ('.' === $input)) { |
|
591 | 3 | $input = ''; |
|
592 | 3 | } else { |
|
593 | 561 | if (false === ($end = strpos($input, '/', 1))) { |
|
594 | 561 | $output .= $input; |
|
595 | 561 | $input = ''; |
|
596 | 561 | } else { |
|
597 | 240 | $output .= substr($input, 0, $end); |
|
598 | 240 | $input = substr($input, $end); |
|
599 | } |
||
600 | } |
||
601 | 564 | } |
|
602 | 606 | return $output; |
|
603 | } |
||
604 | } |
||
605 |