Parser::isJSON()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
crap 1
1
<?php
2
3
namespace hypeJunction;
4
5
use DOMDocument;
6
use Exception;
7
use GuzzleHttp\ClientInterface;
8
use GuzzleHttp\Psr7\Response;
9
10
/**
11
 * Parses HTTP resource into a serialable array of metatags
12
 */
13
class Parser {
14
15
	/**
16
	 * @var ClientInterface
17
	 */
18
	private $client;
19
20
	/**
21
	 * @var array
22
	 */
23
	private static $cache;
24
25
	/**
26
	 * @var array
27
	 */
28
	private $urls = [];
0 ignored issues
show
Unused Code introduced by
The property $urls is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
29
30
	/**
31
	 * Constructor
32
	 * @param ClientInterface $client HTTP Client
33
	 */
34
	public function __construct(ClientInterface $client) {
35
		$this->client = $client;
36
	}
37
38
	/**
39
	 * Parses a URL into a an array of metatags
40
	 *
41
	 * @param string $url URL to parse
42
	 * @return array
43
	 */
44 2
	public function parse($url = '') {
45 2
		$data = $this->getImageData($url);
46 2
		if (!$data) {
47 2
			$data = $this->getOEmbedData($url);
48 2
		}
49 2
		if (!$data) {
50 2
			$data = $this->getDOMData($url);
51 2
			if (is_array($data) && !empty($data['oembed_url'])) {
52 2
				foreach ($data['oembed_url'] as $oembed_url) {
53 2
					$oembed_data = $this->getOEmbedData($oembed_url);
54 2
					if (!empty($oembed_data) && is_array($oembed_data)) {
55
						$oembed_data['oembed_url'] = $oembed_data['url'];
56
						unset($oembed_data['url']);
57
						$data = array_merge($data, $oembed_data);
58
					}
59 2
				}
60 2
			}
61 2
		}
62
63 2
		if (!is_array($data)) {
64 1
			$data = array();
65 1
		}
66
67 2
		if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
68 1
			$data['thumbnail_url'] = $data['thumbnails'][0];
69 1
		}
70
71 2
		return $data;
72
	}
73
74
	/**
75
	 * Parses image metatags
76
	 *
77
	 * @param string $url URL of the image
78
	 * @return array|false
79
	 */
80 1
	public function getImageData($url = '') {
81 1
		if (!$this->isImage($url)) {
82 1
			return false;
83
		}
84
85
		return array(
86 1
			'type' => 'photo',
87 1
			'url' => $url,
88 1
			'thumbnails' => array($url),
89 1
		);
90
	}
91
92
	/**
93
	 * Parses OEmbed data
94
	 *
95
	 * @param  string $url URL of the image
96
	 * @return array|false
97
	 */
98 2
	public function getOEmbedData($url = '') {
99
100 2
		if (!$this->isJSON($url) && !$this->isXML($url)) {
101 1
			return false;
102
		}
103
104
		$meta = array(
105 2
			'url' => $url,
106 2
		);
107
108 2
		$content = $this->read($url);
109 2
		if (!$content) {
110
			return $meta;
111
		}
112
113 2
		$data = new \stdClass();
114 2
		if ($this->isJSON($url)) {
115 1
			$data = json_decode($content);
116 2
		} else if ($this->isXML($url)) {
117 1
			$data = simplexml_load_string($content);
118 1
		}
119
120
		$props = array(
121 2
			'type',
122 2
			'version',
123 2
			'title',
124 2
			'author_name',
125 2
			'author_url',
126 2
			'provider_name',
127 2
			'provider_url',
128 2
			'cache_age',
129 2
			'thumbnail_url',
130 2
			'thumbnail_width',
131 2
			'thumbnail_height',
132 2
			'width',
133 2
			'height',
134 2
			'html',
135 2
		);
136 2
		foreach ($props as $key) {
137 2
			if (!empty($data->$key)) {
138 2
				$meta[$key] = (string) $data->$key;
139 2
			}
140 2
		}
141 2
		return $meta;
142
	}
143
144
	/**
145
	 * Parses metatags from DOM
146
	 *
147
	 * @param  string $url URL
148
	 * @return array|false
149
	 */
150 1
	public function getDOMData($url = '') {
151
152 1
		if (!$this->isHTML($url)) {
153 1
			return false;
154
		}
155
156 1
		$doc = $this->getDOM($url);
157 1
		if (!$doc) {
158
			return false;
159
		}
160
161
		$defaults = array(
162 1
			'url' => $url,
163 1
		);
164
165 1
		$link_tags = $this->parseLinkTags($doc);
166 1
		$meta_tags = $this->parseMetaTags($doc);
167 1
		$img_tags = $this->parseImgTags($doc);
168
169 1
		$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
170
171 1
		if (empty($meta['title'])) {
172
			$meta['title'] = $this->parseTitle($doc);
173
		}
174
175
176 1
		return $meta;
177
	}
178
179
	/**
180
	 * Check if URL exists and is reachable by making an HTTP request to retrieve header information
181
	 *
182
	 * @param string $url URL of the resource
183
	 * @return boolean
184
	 */
185 1
	public function exists($url = '') {
186 1
		$response = $this->request($url);
187 1
		if ($response instanceof Response) {
188 1
			return $response->getStatusCode() == 200;
189
		}
190
		return false;
191
	}
192
193
	/**
194
	 * Validate URL
195
	 *
196
	 * @param string $url URL to validate
197
	 * @return bool
198
	 */
199
	public function isValidUrl($url = '') {
200
		// based on http://php.net/manual/en/function.filter-var.php#104160
201
		// adapted by @mrclay in https://github.com/mrclay/Elgg-leaf/blob/62bf31c0ccdaab549a7e585a4412443e09821db3/engine/lib/output.php
202
		$res = filter_var($url, FILTER_VALIDATE_URL);
203
		if ($res) {
204
			return $res;
205
		}
206
		// Check if it has unicode chars.
207
		$l = mb_strlen($url);
208
		if (strlen($url) == $l) {
209
			return $res;
210
		}
211
		// Replace wide chars by “X”.
212
		$s = '';
213
		for ($i = 0; $i < $l; ++$i) {
214
			$ch = elgg_substr($url, $i, 1);
215
			$s .= (strlen($ch) > 1) ? 'X' : $ch;
216
		}
217
		// Re-check now.
218
		return filter_var($s, FILTER_VALIDATE_URL) ? $url : false;
0 ignored issues
show
Bug Compatibility introduced by
The expression filter_var($s, FILTER_VA...TE_URL) ? $url : false; of type string|false adds the type string to the return on line 218 which is incompatible with the return type documented by hypeJunction\Parser::isValidUrl of type boolean.
Loading history...
219
	}
220
221
	/**
222
	 * Returns head of the resource
223
	 *
224
	 * @param string $url URL of the resource
225
	 * @return Response|false
226
	 */
227 1
	public function request($url = '') {
228 1
		$url = str_replace(' ', '%20', $url);
229 1
		if (!$this->isValidUrl($url)) {
230
			return false;
231
		}
232 1
		if (!isset(self::$cache[$url])) {
233
			try {
234 1
				$response = $this->client->request('GET', $url);
235 1
			} catch (Exception $e) {
236
				$response = false;
237
				error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
238
			}
239 1
			self::$cache[$url] = $response;
240 1
		}
241
242 1
		return self::$cache[$url];
243
	}
244
245
	/**
246
	 * Get contents of the page
247
	 *
248
	 * @param string $url URL of the resource
249
	 * @return string
250
	 */
251 1
	public function read($url = '') {
252 1
		$body = '';
253 1
		if (!$this->exists($url)) {
254 1
			return $body;
255
		}
256
257 1
		$response = $this->request($url);
258 1
		$body = (string) $response->getBody();
259 1
		return $body;
260
	}
261
262
	/**
263
	 * Checks if resource is an html page
264
	 *
265
	 * @param string $url URL of the resource
266
	 * @return boolean
267
	 */
268 1
	public function isHTML($url = '') {
269 1
		$mime = $this->getContentType($url);
270 1
		return strpos($mime, 'text/html') !== false;
271
	}
272
273
	/**
274
	 * Checks if resource is JSON
275
	 *
276
	 * @param string $url URL of the resource
277
	 * @return boolean
278
	 */
279 1
	public function isJSON($url = '') {
280 1
		$mime = $this->getContentType($url);
281 1
		return strpos($mime, 'json') !== false;
282
	}
283
284
	/**
285
	 * Checks if resource is XML
286
	 *
287
	 * @param string $url URL of the resource
288
	 * @return boolean
289
	 */
290 1
	public function isXML($url = '') {
291 1
		$mime = $this->getContentType($url);
292 1
		return strpos($mime, 'xml') !== false;
293
	}
294
295
	/**
296
	 * Checks if resource is an image
297
	 *
298
	 * @param string $url URL of the resource
299
	 * @return boolean
300
	 */
301 1
	public function isImage($url = '') {
302 1
		$mime = $this->getContentType($url);
303 1
		if ($mime) {
304 1
			list($simple, ) = explode('/', $mime);
305 1
			return ($simple == 'image');
306
		}
307
308 1
		return false;
309
	}
310
311
	/**
312
	 * Get mime type of the URL content
313
	 *
314
	 * @param string $url URL of the resource
315
	 * @return string
316
	 */
317 1
	public function getContentType($url = '') {
318 1
		$response = $this->request($url);
319 1
		if ($response instanceof Response) {
320 1
			$header = $response->getHeader('Content-Type');
321 1
			if (is_array($header) && !empty($header)) {
322 1
				$parts = explode(';', $header[0]);
323 1
				return trim($parts[0]);
324
			}
325 1
		}
326 1
		return '';
327
	}
328
329
	/**
330
	 * Returns HTML contents of the page
331
	 *
332
	 * @param string $url URL of the resource
333
	 * @return string
334
	 */
335 1
	public function getHTML($url = '') {
336 1
		if (!$this->isHTML($url)) {
337 1
			return '';
338
		}
339 1
		return $this->read($url);
340
	}
341
342
	/**
343
	 * Returns HTML contents of the page as a DOMDocument
344
	 *
345
	 * @param string $url URL of the resource
346
	 * @return DOMDocument|false
347
	 */
348 1
	public function getDOM($url = '') {
349 1
		$html = $this->getHTML($url);
350 1
		if (empty($html)) {
351 1
			return false;
352
		}
353 1
		$doc = new DOMDocument();
354
355 1
		libxml_use_internal_errors(true);
356
357 1
		if (is_callable('mb_convert_encoding')) {
358 1
			$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
359 1
		} else {
360
			$doc->loadHTML($html);
361
		}
362 1
		if (!$doc->documentURI) {
363 1
			$doc->documentURI = $url;
364 1
		}
365
366 1
		libxml_clear_errors();
367
368 1
		return $doc;
369
	}
370
371
	/**
372
	 * Parses document title
373
	 *
374
	 * @param DOMDocument $doc Document
375
	 * @return string
376
	 */
377 1
	public function parseTitle(DOMDocument $doc) {
378 1
		$node = $doc->getElementsByTagName('title');
379 1
		$title = $node->item(0)->nodeValue;
380 1
		return ($title) ?: '';
381
	}
382
383
	/**
384
	 * Parses <link> tags
385
	 *
386
	 * @param DOMDocument $doc Document
387
	 * @return array
388
	 */
389 1
	public function parseLinkTags(DOMDocument $doc) {
390
391
		$meta = array(
392 1
			'icons' => [],
393 1
			'thumbnails' => [],
394 1
		);
395
396 1
		$nodes = $doc->getElementsByTagName('link');
397 1
		foreach ($nodes as $node) {
398 1
			$rel = $node->getAttribute('rel');
399 1
			$href = $node->getAttribute('href');
400
401
			switch ($rel) {
402
403 1
				case 'icon' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
404 1
					$image_url = $this->getAbsoluteURL($doc, $href);
405 1
					if ($this->isImage($image_url)) {
0 ignored issues
show
Security Bug introduced by
It seems like $image_url defined by $this->getAbsoluteURL($doc, $href) on line 404 can also be of type false; however, hypeJunction\Parser::isImage() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
406 1
						$meta['icons'][] = $image_url;
407 1
					}
408 1
					break;
409
410 1
				case 'canonical' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
411 1
					$meta['canonical'] = $this->getAbsoluteURL($doc, $href);
412 1
					break;
413
414 1
				case 'alternate' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
415 1
					$type = $node->getAttribute('type');
416 1
					if (in_array($type, array(
417 1
						'application/json+oembed',
418 1
						'text/json+oembed',
419 1
						'application/xml+oembed',
420
						'text/xml+oembed'
421 1
					))) {
422 1
						$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
423 1
					}
424 1
					break;
425
			}
426 1
		}
427
428 1
		return $meta;
429
	}
430
431
	/**
432
	 * Parses <meta> tags
433
	 *
434
	 * @param DOMDocument $doc Document
435
	 * @return array
436
	 */
437 1
	public function parseMetaTags(DOMDocument $doc) {
438
439 1
		$meta = array();
440
441 1
		$nodes = $doc->getElementsByTagName('meta');
442 1
		if (!empty($nodes)) {
443 1
			foreach ($nodes as $node) {
444 1
				$name = $node->getAttribute('name');
445 1
				if (!$name) {
446 1
					$name = $node->getAttribute('property');
447 1
				}
448 1
				if (!$name) {
449 1
					continue;
450
				}
451
452 1
				$name = strtolower($name);
453
454 1
				if ($name == 'og:image:url' || $name == 'og:image:secure_url') {
455 1
					$name = 'og:image';
456 1
				}
457
458 1
				$content = $node->getAttribute('content');
459 1
				if (isset($meta['metatags'][$name])) {
460 1
					if (!is_array($meta['metatags'][$name])) {
461 1
						$meta['metatags'][$name] = array($meta['metatags'][$name]);
462 1
					}
463 1
					$meta['metatags'][$name][] = $content;
464 1
				} else {
465 1
					$meta['metatags'][$name] = $content;
466
				}
467
468
				switch ($name) {
469
470 1
					case 'title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
471 1
					case 'og:title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
472 1
					case 'twitter:title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
473 1
						if (empty($meta['title'])) {
474 1
							$meta['title'] = $content;
475 1
						}
476 1
						break;
477
478 1
					case 'og:type' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
479 1
						if (empty($meta['type'])) {
480 1
							$meta['type'] = $content;
481 1
						}
482 1
						break;
483
484 1
					case 'description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
485 1
					case 'og:description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
486 1
					case 'twitter:description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
487 1
						if (empty($meta['description'])) {
488 1
							$meta['description'] = $content;
489 1
						}
490 1
						break;
491
492 1
					case 'keywords' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
493 1
						if (is_string($content)) {
494 1
							$content = explode(',', $content);
495 1
							$content = array_map('trim', $content);
496 1
						}
497 1
						$meta['tags'] = $content;
498 1
						break;
499
500 1
					case 'og:site_name' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
501 1
					case 'twitter:site' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
502 1
						if (empty($meta['provider_name'])) {
503 1
							$meta['provider_name'] = $content;
504 1
						}
505 1
						break;
506
507 1
					case 'og:image' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
508 1
					case 'twitter:image' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
509 1
						$image_url = $this->getAbsoluteURL($doc, $content);
510 1
						if ($this->isImage($image_url)) {
0 ignored issues
show
Security Bug introduced by
It seems like $image_url defined by $this->getAbsoluteURL($doc, $content) on line 509 can also be of type false; however, hypeJunction\Parser::isImage() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
511 1
							$meta['thumbnails'][] = $image_url;
512 1
						}
513 1
						break;
514
				}
515 1
			}
516 1
		}
517
518 1
		return $meta;
519
	}
520
521
	/**
522
	 * Parses <img> tags
523
	 *
524
	 * @param DOMDocument $doc Document
525
	 * @return array
526
	 */
527 1
	public function parseImgTags(DOMDocument $doc) {
528
529
		$meta = array(
530 1
			'thumbnails' => [],
531 1
		);
532
533 1
		$nodes = $doc->getElementsByTagName('img');
534 1
		foreach ($nodes as $node) {
535 1
			$src = $node->getAttribute('src');
536 1
			$image_url = $this->getAbsoluteURL($doc, $src);
537 1
			if ($this->isImage($image_url)) {
0 ignored issues
show
Security Bug introduced by
It seems like $image_url defined by $this->getAbsoluteURL($doc, $src) on line 536 can also be of type false; however, hypeJunction\Parser::isImage() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
538 1
				$meta['thumbnails'][] = $image_url;
539 1
			}
540 1
		}
541
542 1
		return $meta;
543
	}
544
545
	/**
546
	 * Normalizes relative URLs
547
	 *
548
	 * @param DOMDocument $doc  Document
549
	 * @param string      $href URL to normalize
550
	 * @return string|false
551
	 */
552 1
	public function getAbsoluteURL(DOMDocument $doc, $href = '') {
553
554 1
		if (preg_match("/^data:/i", $href)) {
555
			// data URIs can not be resolved
556
			return false;
557
		}
558
559
		// Check if $url is absolute
560 1
		if (parse_url($href, PHP_URL_HOST)) {
561 1
			return $href;
562
		}
563
564 1
		$uri = trim($doc->documentURI ?: '', '/');
565
566 1
		$scheme = parse_url($uri, PHP_URL_SCHEME);
567 1
		$host = parse_url($uri, PHP_URL_HOST);
568
569 1
		if (substr($href, 0, 1) === "/") {
570
			// URL is relative to site root
571 1
			return "$scheme://$host$href";
572
		}
573
574
		// URL is relative to page
575 1
		$path = parse_url($uri, PHP_URL_PATH);
576
577 1
		return "$scheme://$host$path/$href";
578
	}
579
580
}