These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | /* |
||
4 | * (c) Markus Lanthaler <[email protected]> |
||
5 | * |
||
6 | * For the full copyright and license information, please view the LICENSE |
||
7 | * file that was distributed with this source code. |
||
8 | */ |
||
9 | |||
10 | namespace ML\IRI; |
||
11 | |||
12 | /** |
||
13 | * IRI represents an IRI as per RFC3987. |
||
14 | * |
||
15 | * @author Markus Lanthaler <[email protected]> |
||
16 | * |
||
17 | * @link http://tools.ietf.org/html/rfc3987 RFC3987 |
||
18 | */ |
||
19 | class IRI |
||
20 | { |
||
21 | /** |
||
22 | * The scheme |
||
23 | * |
||
24 | * @var string|null |
||
25 | */ |
||
26 | private $scheme = null; |
||
27 | |||
28 | /** |
||
29 | * The user information |
||
30 | * |
||
31 | * @var string|null |
||
32 | */ |
||
33 | private $userinfo = null; |
||
34 | |||
35 | /** |
||
36 | * The host |
||
37 | * |
||
38 | * @var string|null |
||
39 | */ |
||
40 | private $host = null; |
||
41 | |||
42 | /** |
||
43 | * The port |
||
44 | * |
||
45 | * @var string|null |
||
46 | */ |
||
47 | private $port = null; |
||
48 | |||
49 | /** |
||
50 | * The path |
||
51 | * |
||
52 | * @var string |
||
53 | */ |
||
54 | private $path = ''; |
||
55 | |||
56 | /** |
||
57 | * The query component |
||
58 | * |
||
59 | * @var string|null |
||
60 | */ |
||
61 | private $query = null; |
||
62 | |||
63 | /** |
||
64 | * The fragment identifier |
||
65 | * |
||
66 | * @var string|null |
||
67 | */ |
||
68 | private $fragment = null; |
||
69 | |||
70 | |||
71 | /** |
||
72 | * Constructor |
||
73 | * |
||
74 | * @param null|string|IRI $iri The IRI. |
||
75 | * |
||
76 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
77 | * |
||
78 | * @api |
||
79 | */ |
||
80 | public function __construct($iri = null) |
||
81 | { |
||
82 | if (null === $iri) { |
||
83 | return; |
||
84 | } elseif (is_string($iri)) { |
||
85 | $this->parse($iri); |
||
86 | } elseif ($iri instanceof IRI) { |
||
87 | $this->scheme = $iri->scheme; |
||
88 | $this->userinfo = $iri->userinfo; |
||
89 | $this->host = $iri->host; |
||
90 | $this->port = $iri->port; |
||
91 | $this->path = $iri->path; |
||
92 | $this->query = $iri->query; |
||
93 | $this->fragment = $iri->fragment; |
||
94 | } else { |
||
95 | throw new \InvalidArgumentException( |
||
96 | 'Expecting a string or an IRI, got ' . |
||
97 | (is_object($iri) ? get_class($iri) : gettype($iri)) |
||
98 | ); |
||
99 | } |
||
100 | } |
||
101 | |||
102 | /** |
||
103 | * Get the scheme |
||
104 | * |
||
105 | * @return string|null Returns the scheme or null if not set. |
||
106 | */ |
||
107 | public function getScheme() |
||
108 | { |
||
109 | return $this->scheme; |
||
110 | } |
||
111 | |||
112 | /** |
||
113 | * Get the authority |
||
114 | * |
||
115 | * @return string|null Returns the authority or null if not set. |
||
116 | */ |
||
117 | public function getAuthority() |
||
118 | { |
||
119 | $authority = null; |
||
120 | |||
121 | if (null !== $this->host) { |
||
122 | |||
123 | if (null !== $this->userinfo) { |
||
124 | $authority .= $this->userinfo . '@'; |
||
125 | } |
||
126 | $authority .= $this->host; |
||
127 | if (null !== $this->port) { |
||
128 | $authority .= ':' . $this->port; |
||
129 | } |
||
130 | } |
||
131 | |||
132 | return $authority; |
||
133 | } |
||
134 | |||
135 | /** |
||
136 | * Get the user information |
||
137 | * |
||
138 | * @return string|null Returns the user information or null if not set. |
||
139 | */ |
||
140 | public function getUserInfo() |
||
141 | { |
||
142 | return $this->userinfo; |
||
143 | } |
||
144 | |||
145 | /** |
||
146 | * Get the host |
||
147 | * |
||
148 | * @return string|null Returns the host or null if not set. |
||
149 | */ |
||
150 | public function getHost() |
||
151 | { |
||
152 | return $this->host; |
||
153 | } |
||
154 | |||
155 | /** |
||
156 | * Get the port |
||
157 | * |
||
158 | * @return string|null Returns the port or null if not set. |
||
159 | */ |
||
160 | public function getPort() |
||
161 | { |
||
162 | return $this->port; |
||
163 | } |
||
164 | |||
165 | /** |
||
166 | * Get the path |
||
167 | * |
||
168 | * @return string Returns the path which might be empty. |
||
169 | */ |
||
170 | public function getPath() |
||
171 | { |
||
172 | return $this->path; |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * Get the query component |
||
177 | * |
||
178 | * @return string|null Returns the query component or null if not set. |
||
179 | */ |
||
180 | public function getQuery() |
||
181 | { |
||
182 | return $this->query; |
||
183 | } |
||
184 | |||
185 | /** |
||
186 | * Get the fragment identifier |
||
187 | * |
||
188 | * @return string|null Returns the fragment identifier or null if not set. |
||
189 | */ |
||
190 | public function getFragment() |
||
191 | { |
||
192 | return $this->fragment; |
||
193 | } |
||
194 | |||
195 | /** |
||
196 | * Find out whether the IRI is absolute |
||
197 | * |
||
198 | * @return bool Returns true if the IRI is absolute, false otherwise. |
||
199 | * |
||
200 | * @api |
||
201 | */ |
||
202 | public function isAbsolute() |
||
203 | { |
||
204 | return (null !== $this->scheme); |
||
205 | } |
||
206 | |||
207 | /** |
||
208 | * Get as absolute IRI, i.e., without fragment identifier |
||
209 | * |
||
210 | * @return IRI The absolute IRI, i.e., without fragment identifier |
||
211 | * |
||
212 | * @throws \UnexpectedValueException If the IRI is a relative IRI. |
||
213 | * |
||
214 | * @link http://tools.ietf.org/html/rfc3987#section-2.2 RFC3987 absolute-IRI |
||
215 | * |
||
216 | * @api |
||
217 | */ |
||
218 | public function getAbsoluteIri() |
||
219 | { |
||
220 | if (false === $this->isAbsolute()) { |
||
221 | throw new \UnexpectedValueException('Cannot get the absolute IRI of a relative IRI.'); |
||
222 | } |
||
223 | |||
224 | $absolute = clone $this; |
||
225 | $absolute->fragment = null; |
||
226 | |||
227 | return $absolute; |
||
228 | } |
||
229 | |||
230 | /** |
||
231 | * Check whether the passed IRI is equal |
||
232 | * |
||
233 | * @param IRI|string $iri IRI to compare to this instance. |
||
234 | * |
||
235 | * @return bool Returns true if the two IRIs are equal, false otherwise. |
||
236 | * |
||
237 | * @api |
||
238 | */ |
||
239 | public function equals($iri) |
||
240 | { |
||
241 | // Make sure both instances are strings |
||
242 | return ($this->__toString() === (string)$iri); |
||
243 | } |
||
244 | |||
245 | /** |
||
246 | * Resolve a (relative) IRI reference against this IRI |
||
247 | * |
||
248 | * @param IRI|string $reference The (relative) IRI reference that should |
||
249 | * be resolved against this IRI. |
||
250 | * |
||
251 | * @return IRI The resolved IRI. |
||
252 | * |
||
253 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
254 | * |
||
255 | * @link http://tools.ietf.org/html/rfc3986#section-5.2 |
||
256 | * |
||
257 | * @api |
||
258 | */ |
||
259 | public function resolve($reference) |
||
260 | { |
||
261 | $reference = new IRI($reference); |
||
262 | |||
263 | $scheme = null; |
||
264 | $authority = null; |
||
265 | $path = ''; |
||
266 | $query = null; |
||
267 | $fragment = null; |
||
268 | |||
269 | // The Transform References algorithm as specified by RFC3986 |
||
270 | // see: http://tools.ietf.org/html/rfc3986#section-5.2.2 |
||
271 | if ($reference->scheme) { |
||
272 | $scheme = $reference->scheme; |
||
273 | $authority = $reference->getAuthority(); |
||
274 | $path = self::removeDotSegments($reference->path); |
||
275 | $query = $reference->query; |
||
276 | } else { |
||
277 | if (null !== $reference->getAuthority()) { |
||
278 | $authority = $reference->getAuthority(); |
||
279 | $path = self::removeDotSegments($reference->path); |
||
280 | $query = $reference->query; |
||
281 | } else { |
||
282 | if (0 === strlen($reference->path)) { |
||
283 | $path = $this->path; |
||
284 | if (null !== $reference->query) { |
||
285 | $query = $reference->query; |
||
286 | } else { |
||
287 | $query = $this->query; |
||
288 | } |
||
289 | } else { |
||
290 | if ('/' === $reference->path[0]) { |
||
291 | $path = self::removeDotSegments($reference->path); |
||
292 | } else { |
||
293 | // T.path = merge(Base.path, R.path); |
||
294 | if ((null !== $this->getAuthority()) && ('' === $this->path)) { |
||
295 | $path = '/' . $reference->path; |
||
296 | } else { |
||
297 | if (false !== ($end = strrpos($this->path, '/'))) { |
||
298 | $path = substr($this->path, 0, $end + 1); |
||
299 | } |
||
300 | $path .= $reference->path; |
||
301 | } |
||
302 | $path = self::removeDotSegments($path); |
||
303 | } |
||
304 | $query = $reference->query; |
||
305 | } |
||
306 | |||
307 | $authority = $this->getAuthority(); |
||
308 | } |
||
309 | $scheme = $this->scheme; |
||
310 | } |
||
311 | |||
312 | $fragment = $reference->fragment; |
||
313 | |||
314 | |||
315 | // The Component Recomposition algorithm as specified by RFC3986 |
||
316 | // see: http://tools.ietf.org/html/rfc3986#section-5.3 |
||
317 | $result = ''; |
||
318 | |||
319 | if ($scheme) { |
||
320 | $result = $scheme . ':'; |
||
321 | } |
||
322 | |||
323 | if (null !== $authority) { |
||
324 | $result .= '//' . $authority; |
||
325 | } |
||
326 | |||
327 | $result .= $path; |
||
328 | |||
329 | if (null !== $query) { |
||
330 | $result .= '?' . $query; |
||
331 | } |
||
332 | |||
333 | if (null !== $fragment) { |
||
334 | $result .= '#' . $fragment; |
||
335 | } |
||
336 | |||
337 | return new IRI($result); |
||
338 | } |
||
339 | |||
340 | /** |
||
341 | * Transform this IRI to a IRI reference relative to the passed base IRI |
||
342 | * |
||
343 | * @param IRI|string $base The (relative) IRI reference that should be |
||
344 | * be used as base IRI. |
||
345 | * @param bool Defines whether schema-relative IRIs such |
||
346 | * as `//example.com` should be created (`true`) |
||
347 | * or not (`false`). |
||
348 | * |
||
349 | * @return IRI The IRI reference relative to the passed base IRI. |
||
350 | * |
||
351 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
352 | * |
||
353 | * @api |
||
354 | */ |
||
355 | public function relativeTo($base, $schemaRelative = false) |
||
356 | { |
||
357 | if (false === ($base instanceof IRI)) { |
||
358 | $base = new IRI($base); |
||
359 | } |
||
360 | $relative = clone $this; |
||
361 | |||
362 | // Compare scheme |
||
363 | if ($relative->scheme !== $base->scheme) { |
||
364 | return $relative; |
||
365 | } |
||
366 | |||
367 | // Compare authority |
||
368 | if ($relative->getAuthority() !== $base->getAuthority()) { |
||
369 | if (true === $schemaRelative) { |
||
370 | $relative->scheme = null; |
||
371 | } |
||
372 | |||
373 | return $relative; |
||
374 | } |
||
375 | $relative->scheme = null; |
||
376 | $relative->host = null; |
||
377 | $relative->userinfo = null; |
||
378 | $relative->port = null; |
||
379 | |||
380 | // Compare path |
||
381 | $baseSegments = explode('/', $base->path); |
||
382 | $relativeSegments = explode('/', $relative->path); |
||
383 | $len = min(count($baseSegments), count($relativeSegments)) - 1; // do not move beyond last segment |
||
384 | |||
385 | $pos = 0; |
||
386 | |||
387 | while (($baseSegments[$pos] === $relativeSegments[$pos]) && ($pos < $len)) { |
||
388 | $pos++; |
||
389 | } |
||
390 | |||
391 | $relative->path = ''; |
||
392 | $numBaseSegments = count($baseSegments) - $pos - 1; |
||
393 | if ($numBaseSegments > 0) { |
||
394 | $relative->path .= str_repeat('../', $numBaseSegments); |
||
395 | } |
||
396 | |||
397 | if (($baseSegments[$pos] !== $relativeSegments[$pos]) || |
||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
398 | ((null === $relative->query) && (null === $relative->fragment)) || |
||
399 | ($base->path === '') { |
||
400 | // if the two paths differ or if there's neither a query component nor a fragment |
||
401 | // or there is no base path, we need to consider this IRI's path |
||
402 | |||
403 | if (($relative->path === '') && (false !== strpos($relativeSegments[$pos], ':'))) { |
||
0 ignored issues
–
show
|
|||
404 | // if the first path segment contains a colon, we need to |
||
405 | // prepend a ./ to distinguish it from an absolute IRI |
||
406 | $relative->path .= './'; |
||
407 | } |
||
408 | |||
409 | $relative->path .= implode('/', array_slice($relativeSegments, $pos)); |
||
410 | |||
411 | // .. and ensure that the resulting path isn't empty |
||
412 | if (($relative->path === '')) { |
||
413 | $relative->path .= './'; |
||
414 | } |
||
415 | } |
||
416 | |||
417 | if ($relative->query !== $base->query) { |
||
418 | return $relative; |
||
419 | } |
||
420 | |||
421 | if (null !== $relative->fragment) { |
||
422 | $relative->query = null; |
||
423 | } |
||
424 | |||
425 | return $relative; |
||
426 | } |
||
427 | |||
428 | /** |
||
429 | * Convert an IRI to a relative IRI reference using this IRI as base |
||
430 | * |
||
431 | * This method provides a more convenient interface than the |
||
432 | * {@link IRI::relativeTo()} method if the base IRI stays the same while |
||
433 | * the IRIs to convert to relative IRI references change. |
||
434 | * |
||
435 | * @param string|IRI $iri The IRI to convert to a relative reference |
||
436 | * @param bool Defines whether schema-relative IRIs such |
||
437 | * as `//example.com` should be created (`true`) |
||
438 | * or not (`false`). |
||
439 | * |
||
440 | * @throws \InvalidArgumentException If an invalid IRI is passed. |
||
441 | * |
||
442 | * @see \ML\IRI\IRI::relativeTo() |
||
443 | * |
||
444 | * @return IRI The relative IRI reference |
||
445 | */ |
||
446 | public function baseFor($iri, $schemaRelative = false) |
||
447 | { |
||
448 | if (false === ($iri instanceof IRI)) { |
||
449 | $iri = new IRI($iri); |
||
450 | } |
||
451 | |||
452 | return $iri->relativeTo($this, $schemaRelative); |
||
453 | } |
||
454 | |||
455 | /** |
||
456 | * Get a string representation of this IRI object |
||
457 | * |
||
458 | * @return string A string representation of this IRI instance. |
||
459 | * |
||
460 | * @api |
||
461 | */ |
||
462 | public function __toString() |
||
463 | { |
||
464 | $result = ''; |
||
465 | |||
466 | if ($this->scheme) { |
||
467 | $result .= $this->scheme . ':'; |
||
468 | } |
||
469 | |||
470 | if (null !== ($authority = $this->getAuthority())) { |
||
471 | $result .= '//' . $authority; |
||
472 | } |
||
473 | |||
474 | $result .= $this->path; |
||
475 | |||
476 | if (null !== $this->query) { |
||
477 | $result .= '?' . $this->query; |
||
478 | } |
||
479 | |||
480 | if (null !== $this->fragment) { |
||
481 | $result .= '#' . $this->fragment; |
||
482 | } |
||
483 | |||
484 | return $result; |
||
485 | } |
||
486 | |||
487 | /** |
||
488 | * Parse an IRI into it's components |
||
489 | * |
||
490 | * This is done according to |
||
491 | * {@link http://tools.ietf.org/html/rfc3986#section-3.1 RFC3986}. |
||
492 | * |
||
493 | * @param string $iri The IRI to parse. |
||
494 | */ |
||
495 | protected function parse($iri) |
||
496 | { |
||
497 | // Parse IRI by using the regular expression as specified by |
||
498 | // http://tools.ietf.org/html/rfc3986#appendix-B |
||
499 | $regex = '|^((?P<scheme>[^:/?#]+):)?' . |
||
500 | '((?P<doubleslash>//)(?P<authority>[^/?#]*))?(?P<path>[^?#]*)' . |
||
501 | '((?P<querydef>\?)(?P<query>[^#]*))?(#(?P<fragment>.*))?|'; |
||
502 | preg_match($regex, $iri, $match); |
||
503 | |||
504 | // Extract scheme |
||
505 | if (false === empty($match['scheme'])) { |
||
506 | $this->scheme = $match['scheme']; |
||
507 | } |
||
508 | |||
509 | // Parse authority (http://tools.ietf.org/html/rfc3986#section-3.2) |
||
510 | if ('//' === $match['doubleslash']) { |
||
511 | if (0 === strlen($match['authority'])) { |
||
512 | $this->host = ''; |
||
513 | } else { |
||
514 | $authority = $match['authority']; |
||
515 | |||
516 | // Split authority into userinfo and host |
||
517 | // (use last @ to ignore unescaped @ symbols) |
||
518 | if (false !== ($pos = strrpos($authority, '@'))) { |
||
519 | $this->userinfo = substr($authority, 0, $pos); |
||
520 | $authority = substr($authority, $pos + 1); |
||
521 | } |
||
522 | |||
523 | // Split authority into host and port |
||
524 | $hostEnd = 0; |
||
525 | if (('[' === $authority[0]) && (false !== ($pos = strpos($authority, ']')))) { |
||
526 | $hostEnd = $pos; |
||
527 | } |
||
528 | |||
529 | if ((false !== ($pos = strrpos($authority, ':'))) && ($pos > $hostEnd)) { |
||
530 | $this->host = substr($authority, 0, $pos); |
||
531 | $this->port = substr($authority, $pos + 1); |
||
532 | } else { |
||
533 | $this->host = $authority; |
||
534 | } |
||
535 | } |
||
536 | } |
||
537 | |||
538 | // Extract path (http://tools.ietf.org/html/rfc3986#section-3.3) |
||
539 | // The path is always present but might be empty |
||
540 | $this->path = $match['path']; |
||
541 | |||
542 | // Extract query (http://tools.ietf.org/html/rfc3986#section-3.4) |
||
543 | if (false === empty($match['querydef'])) { |
||
544 | $this->query = $match['query']; |
||
545 | } |
||
546 | |||
547 | // Extract fragment (http://tools.ietf.org/html/rfc3986#section-3.5) |
||
548 | if (isset($match['fragment'])) { |
||
549 | $this->fragment = $match['fragment']; |
||
550 | } |
||
551 | } |
||
552 | |||
553 | /** |
||
554 | * Remove dot-segments |
||
555 | * |
||
556 | * This method removes the special "." and ".." complete path segments |
||
557 | * from an IRI. |
||
558 | * |
||
559 | * @param string $input The IRI from which dot segments should be removed. |
||
560 | * |
||
561 | * @return string The IRI with all dot-segments removed. |
||
562 | * |
||
563 | * @link http://tools.ietf.org/html/rfc3986#section-5.2.4 |
||
564 | */ |
||
565 | private static function removeDotSegments($input) |
||
566 | { |
||
567 | $output = ''; |
||
568 | |||
569 | while (strlen($input) > 0) { |
||
570 | if (('../' === substr($input, 0, 3)) || ('./' === substr($input, 0, 2))) { |
||
571 | $input = substr($input, strpos($input, '/')); |
||
572 | } elseif ('/./' === substr($input, 0, 3)) { |
||
573 | $input = substr($input, 2); |
||
574 | } elseif ('/.' === $input) { |
||
575 | $input = '/'; |
||
576 | } elseif (('/../' === substr($input, 0, 4)) || ('/..' === $input)) { |
||
577 | if ($input == '/..') { |
||
578 | $input = '/'; |
||
579 | } else { |
||
580 | $input = substr($input, 3); |
||
581 | } |
||
582 | |||
583 | if (false !== ($end = strrpos($output, '/'))) { |
||
584 | $output = substr($output, 0, $end); |
||
585 | } else { |
||
586 | $output = ''; |
||
587 | } |
||
588 | } elseif (('..' === $input) || ('.' === $input)) { |
||
589 | $input = ''; |
||
590 | } else { |
||
591 | if (false === ($end = strpos($input, '/', 1))) { |
||
592 | $output .= $input; |
||
593 | $input = ''; |
||
594 | } else { |
||
595 | $output .= substr($input, 0, $end); |
||
596 | $input = substr($input, $end); |
||
597 | } |
||
598 | } |
||
599 | } |
||
600 | return $output; |
||
601 | } |
||
602 | } |
||
603 |