Passed
Push — master ( e86001...4d22d7 )
by Ismayil
23:23
created

Parser::getOEmbedData()   C

Complexity

Conditions 8
Paths 11

Size

Total Lines 45
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 34
CRAP Score 8.0014

Importance

Changes 0
Metric Value
dl 0
loc 45
ccs 34
cts 35
cp 0.9714
rs 5.3846
c 0
b 0
f 0
cc 8
eloc 32
nc 11
nop 1
crap 8.0014
1
<?php
2
3
namespace hypeJunction;
4
5
use DOMDocument;
6
use Exception;
7
use GuzzleHttp\ClientInterface;
8
use GuzzleHttp\Psr7\Response;
9
10
/**
11
 * Parses HTTP resource into a serialable array of metatags
12
 */
13
class Parser {
14
15
	/**
16
	 * @var ClientInterface
17
	 */
18
	private $client;
19
20
	/**
21
	 * @var array
22
	 */
23
	private static $cache;
24
25
	/**
26
	 * Constructor
27
	 * @param ClientInterface $client HTTP Client
28
	 */
29
	public function __construct(ClientInterface $client) {
30
		$this->client = $client;
31
	}
32
33
	/**
34
	 * Parses a URL into a an array of metatags
35
	 *
36
	 * @param string $url URL to parse
37
	 * @return array
38
	 */
39 1
	public function parse($url = '') {
40
41 1
		$data = $this->getImageData($url);
42 1
		if (!$data) {
43 1
			$data = $this->getOEmbedData($url);
44 1
		}
45 1
		if (!$data) {
46 1
			$data = $this->getDOMData($url);
47 1
			if (is_array($data) && !empty($data['oembed_url'])) {
48 1
				foreach ($data['oembed_url'] as $oembed_url) {
49 1
					$oembed_data = $this->parse($oembed_url);
50 1
					if (!empty($oembed_data) && is_array($oembed_data)) {
51 1
						$oembed_data['oembed_url'] = $oembed_data['url'];
52 1
						unset($oembed_data['url']);
53 1
						$data = array_merge($data, $oembed_data);
54 1
					}
55 1
				}
56 1
			}
57 1
		}
58
59 1
		if (!is_array($data)) {
60 1
			$data = array();
61 1
		}
62
63 1
		if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
64 1
			$data['thumbnail_url'] = $data['thumbnails'][0];
65 1
		}
66
67 1
		return $data;
68
	}
69
70
	/**
71
	 * Parses image metatags
72
	 *
73
	 * @param string $url URL of the image
74
	 * @return array|false
75
	 */
76 1
	public function getImageData($url = '') {
77 1
		if (!$this->isImage($url)) {
78 1
			return false;
79
		}
80
81
		return array(
82 1
			'type' => 'photo',
83 1
			'url' => $url,
84 1
			'thumbnails' => array($url),
85 1
		);
86
	}
87
88
	/**
89
	 * Parses OEmbed data
90
	 *
91
	 * @param  string $url URL of the image
92
	 * @return array|false
93
	 */
94 2
	public function getOEmbedData($url = '') {
95
96 2
		if (!$this->isJSON($url) && !$this->isXML($url)) {
97 1
			return false;
98
		}
99
100
		$meta = array(
101 2
			'url' => $url,
102 2
		);
103
104 2
		$content = $this->read($url);
105 2
		if (!$content) {
106
			return $meta;
107
		}
108
109 2
		$data = new \stdClass();
110 2
		if ($this->isJSON($url)) {
111 1
			$data = json_decode($content);
112 2
		} else if ($this->isXML($url)) {
113 1
			$data = simplexml_load_string($content);
114 1
		}
115
116
		$props = array(
117 2
			'type',
118 2
			'version',
119 2
			'title',
120 2
			'author_name',
121 2
			'author_url',
122 2
			'provider_name',
123 2
			'provider_url',
124 2
			'cache_age',
125 2
			'thumbnail_url',
126 2
			'thumbnail_width',
127 2
			'thumbnail_height',
128 2
			'width',
129 2
			'height',
130 2
			'html',
131 2
		);
132 2
		foreach ($props as $key) {
133 2
			if (!empty($data->$key)) {
134 2
				$meta[$key] = (string) $data->$key;
135 2
			}
136 2
		}
137 2
		return $meta;
138
	}
139
140
	/**
141
	 * Parses metatags from DOM
142
	 *
143
	 * @param  string $url URL
144
	 * @return array|false
145
	 */
146 1
	public function getDOMData($url = '') {
147
148 1
		if (!$this->isHTML($url)) {
149 1
			return false;
150
		}
151
152 1
		$doc = $this->getDOM($url);
153 1
		if (!$doc) {
154
			return false;
155
		}
156
157
		$defaults = array(
158 1
			'url' => $url,
159 1
		);
160
161 1
		$link_tags = $this->parseLinkTags($doc);
162 1
		$meta_tags = $this->parseMetaTags($doc);
163 1
		$img_tags = $this->parseImgTags($doc);
164
165 1
		$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
166
167 1
		if (empty($meta['title'])) {
168
			$meta['title'] = $this->parseTitle($doc);
169
		}
170
171
172 1
		return $meta;
173
	}
174
175
	/**
176
	 * Check if URL exists and is reachable by making an HTTP request to retrieve header information
177
	 *
178
	 * @param string $url URL of the resource
179
	 * @return boolean
180
	 */
181 1
	public function exists($url = '') {
182 1
		$response = $this->request($url);
183 1
		if ($response instanceof Response) {
184 1
			return $response->getStatusCode() == 200;
185
		}
186
		return false;
187
	}
188
189
	/**
190
	 * Validate URL
191
	 * 
192
	 * @param string $url URL to validate
193
	 * @return bool
194
	 */
195
	public function isValidUrl($url = '') {
196
		// based on http://php.net/manual/en/function.filter-var.php#104160
197
		// adapted by @mrclay in https://github.com/mrclay/Elgg-leaf/blob/62bf31c0ccdaab549a7e585a4412443e09821db3/engine/lib/output.php
198
		$res = filter_var($url, FILTER_VALIDATE_URL);
199
		if ($res) {
200
			return $res;
201
		}
202
		// Check if it has unicode chars.
203
		$l = mb_strlen($url);
204
		if (strlen($url) == $l) {
205
			return $res;
206
		}
207
		// Replace wide chars by “X”.
208
		$s = '';
209
		for ($i = 0; $i < $l; ++$i) {
210
			$ch = elgg_substr($url, $i, 1);
211
			$s .= (strlen($ch) > 1) ? 'X' : $ch;
212
		}
213
		// Re-check now.
214
		return filter_var($s, FILTER_VALIDATE_URL) ? $url : false;
0 ignored issues
show
Bug Compatibility introduced by
The expression filter_var($s, FILTER_VA...TE_URL) ? $url : false; of type string|false adds the type string to the return on line 214 which is incompatible with the return type documented by hypeJunction\Parser::isValidUrl of type boolean.
Loading history...
215
	}
216
217
	/**
218
	 * Returns head of the resource
219
	 *
220
	 * @param string $url URL of the resource
221
	 * @return Response|false
222
	 */
223 1
	public function request($url = '') {
224 1
		$url = str_replace(' ', '%20', $url);
225 1
		if (!$this->isValidUrl($url)) {
226
			return false;
227
		}
228 1
		if (!isset(self::$cache[$url])) {
229
			try {
230 1
				$response = $this->client->request('GET', $url);
231 1
			} catch (Exception $e) {
232
				$response = false;
233
				error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
234
			}
235 1
			self::$cache[$url] = $response;
236 1
		}
237
238 1
		return self::$cache[$url];
239
	}
240
241
	/**
242
	 * Get contents of the page
243
	 *
244
	 * @param string $url URL of the resource
245
	 * @return string
246
	 */
247 1
	public function read($url = '') {
248 1
		$body = '';
249 1
		if (!$this->exists($url)) {
250 1
			return $body;
251
		}
252
253 1
		$response = $this->request($url);
254 1
		$body = (string) $response->getBody();
255 1
		return $body;
256
	}
257
258
	/**
259
	 * Checks if resource is an html page
260
	 *
261
	 * @param string $url URL of the resource
262
	 * @return boolean
263
	 */
264 1
	public function isHTML($url = '') {
265 1
		$mime = $this->getContentType($url);
266 1
		return strpos($mime, 'text/html') !== false;
267
	}
268
269
	/**
270
	 * Checks if resource is JSON
271
	 *
272
	 * @param string $url URL of the resource
273
	 * @return boolean
274
	 */
275 1
	public function isJSON($url = '') {
276 1
		$mime = $this->getContentType($url);
277 1
		return strpos($mime, 'json') !== false;
278
	}
279
280
	/**
281
	 * Checks if resource is XML
282
	 *
283
	 * @param string $url URL of the resource
284
	 * @return boolean
285
	 */
286 1
	public function isXML($url = '') {
287 1
		$mime = $this->getContentType($url);
288 1
		return strpos($mime, 'xml') !== false;
289
	}
290
291
	/**
292
	 * Checks if resource is an image
293
	 *
294
	 * @param string $url URL of the resource
295
	 * @return boolean
296
	 */
297 1
	public function isImage($url = '') {
298 1
		$mime = $this->getContentType($url);
299 1
		if ($mime) {
300 1
			list($simple, ) = explode('/', $mime);
301 1
			return ($simple == 'image');
302
		}
303
304 1
		return false;
305
	}
306
307
	/**
308
	 * Get mime type of the URL content
309
	 *
310
	 * @param string $url URL of the resource
311
	 * @return string
312
	 */
313 1
	public function getContentType($url = '') {
314 1
		$response = $this->request($url);
315 1
		if ($response instanceof Response) {
316 1
			$header = $response->getHeader('Content-Type');
317 1
			if (is_array($header) && !empty($header)) {
318 1
				$parts = explode(';', $header[0]);
319 1
				return trim($parts[0]);
320
			}
321 1
		}
322 1
		return '';
323
	}
324
325
	/**
326
	 * Returns HTML contents of the page
327
	 *
328
	 * @param string $url URL of the resource
329
	 * @return string
330
	 */
331 1
	public function getHTML($url = '') {
332 1
		if (!$this->isHTML($url)) {
333 1
			return '';
334
		}
335 1
		return $this->read($url);
336
	}
337
338
	/**
339
	 * Returns HTML contents of the page as a DOMDocument
340
	 *
341
	 * @param string $url URL of the resource
342
	 * @return DOMDocument|false
343
	 */
344 1
	public function getDOM($url = '') {
345 1
		$html = $this->getHTML($url);
346 1
		if (empty($html)) {
347 1
			return false;
348
		}
349 1
		$doc = new DOMDocument();
350
		
351 1
		libxml_use_internal_errors(true);
352
		
353 1
		if (is_callable('mb_convert_encoding')) {
354 1
			$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
355 1
		} else {
356
			$doc->loadHTML($html);
357
		}
358 1
		if (!$doc->documentURI) {
359 1
			$doc->documentURI = $url;
360 1
		}
361
		
362 1
		libxml_clear_errors();
363
		
364 1
		return $doc;
365
	}
366
367
	/**
368
	 * Parses document title
369
	 *
370
	 * @param DOMDocument $doc Document
371
	 * @return string
372
	 */
373 1
	public function parseTitle(DOMDocument $doc) {
374 1
		$node = $doc->getElementsByTagName('title');
375 1
		$title = $node->item(0)->nodeValue;
376 1
		return ($title) ?: '';
377
	}
378
379
	/**
380
	 * Parses <link> tags
381
	 *
382
	 * @param DOMDocument $doc Document
383
	 * @return array
384
	 */
385 1
	public function parseLinkTags(DOMDocument $doc) {
386
387
		$meta = array(
388 1
			'icons' => [],
389 1
			'thumbnails' => [],
390 1
		);
391
392 1
		$nodes = $doc->getElementsByTagName('link');
393 1
		foreach ($nodes as $node) {
394 1
			$rel = $node->getAttribute('rel');
395 1
			$href = $node->getAttribute('href');
396
397
			switch ($rel) {
398
399 1
				case 'icon' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
400 1
					$image_url = $this->getAbsoluteURL($doc, $href);
401 1
					if ($this->isImage($image_url)) {
0 ignored issues
show
Security Bug introduced by
It seems like $image_url defined by $this->getAbsoluteURL($doc, $href) on line 400 can also be of type false; however, hypeJunction\Parser::isImage() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
402 1
						$meta['icons'][] = $image_url;
403 1
					}
404 1
					break;
405
406 1
				case 'canonical' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
407 1
					$meta['canonical'] = $this->getAbsoluteURL($doc, $href);
408 1
					break;
409
410 1
				case 'alternate' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
411 1
					$type = $node->getAttribute('type');
412 1
					if (in_array($type, array(
413 1
								'application/json+oembed',
414 1
								'text/json+oembed',
415 1
								'application/xml+oembed',
416
								'text/xml+oembed'
417 1
							))) {
418 1
						$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
419 1
					}
420 1
					break;
421
			}
422 1
		}
423
424 1
		return $meta;
425
	}
426
427
	/**
428
	 * Parses <meta> tags
429
	 *
430
	 * @param DOMDocument $doc Document
431
	 * @return array
432
	 */
433 1
	public function parseMetaTags(DOMDocument $doc) {
434
435 1
		$meta = array();
436
437 1
		$nodes = $doc->getElementsByTagName('meta');
438 1
		if (!empty($nodes)) {
439 1
			foreach ($nodes as $node) {
440 1
				$name = $node->getAttribute('name');
441 1
				if (!$name) {
442 1
					$name = $node->getAttribute('property');
443 1
				}
444 1
				if (!$name) {
445 1
					continue;
446
				}
447
448 1
				$name = strtolower($name);
449
				
450 1
				if ($name == 'og:image:url') {
451
					$name = 'og:image';
452
				}
453
454 1
				$content = $node->getAttribute('content');
455 1
				if (isset($meta['metatags'][$name])) {
456 1
					if (!is_array($meta['metatags'][$name])) {
457 1
						$meta['metatags'][$name] = array($meta['metatags'][$name]);
458 1
					}
459 1
					$meta['metatags'][$name][] = $content;
460 1
				} else {
461 1
					$meta['metatags'][$name] = $content;
462
				}
463
464
				switch ($name) {
465
466 1
					case 'title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
467 1
					case 'og:title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
468 1
					case 'twitter:title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
469 1
						if (empty($meta['title'])) {
470 1
							$meta['title'] = $content;
471 1
						}
472 1
						break;
473
474 1
					case 'og:type' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
475 1
						if (empty($meta['type'])) {
476 1
							$meta['type'] = $content;
477 1
						}
478 1
						break;
479
480 1
					case 'description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
481 1
					case 'og:description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
482 1
					case 'twitter:description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
483 1
						if (empty($meta['description'])) {
484 1
							$meta['description'] = $content;
485 1
						}
486 1
						break;
487
488 1
					case 'keywords' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
489 1
						if (is_string($content)) {
490 1
							$content = explode(',', $content);
491 1
							$content = array_map('trim', $content);
492 1
						}
493 1
						$meta['tags'] = $content;
494 1
						break;
495
496 1
					case 'og:site_name' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
497 1
					case 'twitter:site' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
498 1
						if (empty($meta['provider_name'])) {
499 1
							$meta['provider_name'] = $content;
500 1
						}
501 1
						break;
502
503 1
					case 'og:image' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
504 1
					case 'twitter:image' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
505 1
						$image_url = $this->getAbsoluteURL($doc, $content);
506 1
						if ($this->isImage($image_url)) {
0 ignored issues
show
Security Bug introduced by
It seems like $image_url defined by $this->getAbsoluteURL($doc, $content) on line 505 can also be of type false; however, hypeJunction\Parser::isImage() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
507 1
							$meta['thumbnails'][] = $image_url;
508 1
						}
509 1
						break;
510
				}
511 1
			}
512 1
		}
513
514 1
		return $meta;
515
	}
516
517
	/**
518
	 * Parses <img> tags
519
	 *
520
	 * @param DOMDocument $doc Document
521
	 * @return array
522
	 */
523 1
	public function parseImgTags(DOMDocument $doc) {
524
525
		$meta = array(
526 1
			'thumbnails' => [],
527 1
		);
528
529 1
		$nodes = $doc->getElementsByTagName('img');
530 1
		foreach ($nodes as $node) {
531 1
			$src = $node->getAttribute('src');
532 1
			$image_url = $this->getAbsoluteURL($doc, $src);
533 1
			if ($this->isImage($image_url)) {
0 ignored issues
show
Security Bug introduced by
It seems like $image_url defined by $this->getAbsoluteURL($doc, $src) on line 532 can also be of type false; however, hypeJunction\Parser::isImage() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
534 1
				$meta['thumbnails'][] = $image_url;
535 1
			}
536 1
		}
537
538 1
		return $meta;
539
	}
540
541
	/**
542
	 * Normalizes relative URLs
543
	 *
544
	 * @param DOMDocument $doc  Document
545
	 * @param string      $href URL to normalize
546
	 * @return string|false
547
	 */
548 1
	public function getAbsoluteURL(DOMDocument $doc, $href = '') {
549
550 1
		if (preg_match("/^data:/i", $href)) {
551
			// data URIs can not be resolved
552
			return false;
553
		}
554
555
		// Check if $url is absolute
556 1
		if (parse_url($href, PHP_URL_HOST)) {
557 1
			return $href;
558
		}
559
560 1
		$uri = trim($doc->documentURI ?: '', '/');
561
562 1
		$scheme = parse_url($uri, PHP_URL_SCHEME);
563 1
		$host = parse_url($uri, PHP_URL_HOST);
564
565 1
		if (substr($href, 0, 1) === "/") {
566
			// URL is relative to site root
567 1
			return "$scheme://$host$href";
568
		}
569
570
		// URL is relative to page
571 1
		$path = parse_url($uri, PHP_URL_PATH);
572
573 1
		return "$scheme://$host$path/$href";
574
	}
575
576
}
577