1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace hypeJunction; |
4
|
|
|
|
5
|
|
|
use DOMDocument; |
6
|
|
|
use Exception; |
7
|
|
|
use GuzzleHttp\ClientInterface; |
8
|
|
|
use GuzzleHttp\Psr7\Response; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Parses HTTP resource into a serialable array of metatags |
12
|
|
|
*/ |
13
|
|
|
class Parser { |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @var ClientInterface |
17
|
|
|
*/ |
18
|
|
|
private $client; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var array |
22
|
|
|
*/ |
23
|
|
|
private static $cache; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* Constructor |
27
|
|
|
* @param ClientInterface $client HTTP Client |
28
|
|
|
*/ |
29
|
|
|
public function __construct(ClientInterface $client) { |
30
|
|
|
$this->client = $client; |
31
|
|
|
} |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Parses a URL into a an array of metatags |
35
|
|
|
* |
36
|
|
|
* @param string $url URL to parse |
37
|
|
|
* @return array |
38
|
|
|
*/ |
39
|
1 |
|
public function parse($url = '') { |
40
|
|
|
|
41
|
1 |
|
$data = $this->getImageData($url); |
42
|
1 |
|
if (!$data) { |
43
|
1 |
|
$data = $this->getOEmbedData($url); |
44
|
1 |
|
} |
45
|
1 |
|
if (!$data) { |
46
|
1 |
|
$data = $this->getDOMData($url); |
47
|
1 |
|
if (is_array($data) && !empty($data['oembed_url'])) { |
48
|
1 |
|
foreach ($data['oembed_url'] as $oembed_url) { |
49
|
1 |
|
$oembed_data = $this->parse($oembed_url); |
50
|
1 |
|
if (!empty($oembed_data) && is_array($oembed_data)) { |
51
|
|
|
$oembed_data['oembed_url'] = $oembed_data['url']; |
52
|
|
|
unset($oembed_data['url']); |
53
|
|
|
$data = array_merge($data, $oembed_data); |
54
|
|
|
} |
55
|
1 |
|
} |
56
|
1 |
|
} |
57
|
1 |
|
} |
58
|
|
|
|
59
|
1 |
|
if (!is_array($data)) { |
60
|
1 |
|
$data = array(); |
61
|
1 |
|
} |
62
|
|
|
|
63
|
1 |
|
if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) { |
64
|
1 |
|
$data['thumbnail_url'] = $data['thumbnails'][0]; |
65
|
1 |
|
} |
66
|
|
|
|
67
|
1 |
|
return $data; |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Parses image metatags |
72
|
|
|
* |
73
|
|
|
* @param string $url URL of the image |
74
|
|
|
* @return array|false |
75
|
|
|
*/ |
76
|
1 |
|
public function getImageData($url = '') { |
77
|
1 |
|
if (!$this->isImage($url)) { |
78
|
1 |
|
return false; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
return array( |
82
|
1 |
|
'type' => 'photo', |
83
|
1 |
|
'url' => $url, |
84
|
1 |
|
'thumbnails' => array($url), |
85
|
1 |
|
); |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Parses OEmbed data |
90
|
|
|
* |
91
|
|
|
* @param string $url URL of the image |
92
|
|
|
* @return array|false |
93
|
|
|
*/ |
94
|
2 |
|
public function getOEmbedData($url = '') { |
95
|
|
|
|
96
|
2 |
|
if (!$this->isJSON($url) && !$this->isXML($url)) { |
97
|
1 |
|
return false; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
$meta = array( |
101
|
2 |
|
'url' => $url, |
102
|
2 |
|
); |
103
|
|
|
|
104
|
2 |
|
$content = $this->read($url); |
105
|
2 |
|
if (!$content) { |
106
|
|
|
return $meta; |
107
|
|
|
} |
108
|
|
|
|
109
|
2 |
|
$data = new \stdClass(); |
110
|
2 |
|
if ($this->isJSON($url)) { |
111
|
1 |
|
$data = json_decode($content); |
112
|
2 |
|
} else if ($this->isXML($url)) { |
113
|
1 |
|
$data = simplexml_load_string($content); |
114
|
1 |
|
} |
115
|
|
|
|
116
|
|
|
$props = array( |
117
|
2 |
|
'type', |
118
|
2 |
|
'version', |
119
|
2 |
|
'title', |
120
|
2 |
|
'author_name', |
121
|
2 |
|
'author_url', |
122
|
2 |
|
'provider_name', |
123
|
2 |
|
'provider_url', |
124
|
2 |
|
'cache_age', |
125
|
2 |
|
'thumbnail_url', |
126
|
2 |
|
'thumbnail_width', |
127
|
2 |
|
'thumbnail_height', |
128
|
2 |
|
'width', |
129
|
2 |
|
'height', |
130
|
2 |
|
'html', |
131
|
2 |
|
); |
132
|
2 |
|
foreach ($props as $key) { |
133
|
2 |
|
if (!empty($data->$key)) { |
134
|
2 |
|
$meta[$key] = (string) $data->$key; |
135
|
2 |
|
} |
136
|
2 |
|
} |
137
|
2 |
|
return $meta; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Parses metatags from DOM |
142
|
|
|
* |
143
|
|
|
* @param string $url URL |
144
|
|
|
* @return array|false |
145
|
|
|
*/ |
146
|
1 |
|
public function getDOMData($url = '') { |
147
|
|
|
|
148
|
1 |
|
if (!$this->isHTML($url)) { |
149
|
1 |
|
return false; |
150
|
|
|
} |
151
|
|
|
|
152
|
1 |
|
$doc = $this->getDOM($url); |
153
|
1 |
|
if (!$doc) { |
154
|
|
|
return false; |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
$defaults = array( |
158
|
1 |
|
'url' => $url, |
159
|
1 |
|
); |
160
|
|
|
|
161
|
1 |
|
$link_tags = $this->parseLinkTags($doc); |
162
|
1 |
|
$meta_tags = $this->parseMetaTags($doc); |
163
|
1 |
|
$img_tags = $this->parseImgTags($doc); |
164
|
|
|
|
165
|
1 |
|
$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags); |
166
|
|
|
|
167
|
1 |
|
if (empty($meta['title'])) { |
168
|
|
|
$meta['title'] = $this->parseTitle($doc); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
|
172
|
1 |
|
return $meta; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* Check if URL exists and is reachable by making an HTTP request to retrieve header information |
177
|
|
|
* |
178
|
|
|
* @param string $url URL of the resource |
179
|
|
|
* @return boolean |
180
|
|
|
*/ |
181
|
1 |
|
public function exists($url = '') { |
182
|
1 |
|
$response = $this->request($url); |
183
|
1 |
|
if ($response instanceof Response) { |
184
|
1 |
|
return $response->getStatusCode() == 200; |
185
|
|
|
} |
186
|
|
|
return false; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* Validate URL |
191
|
|
|
* |
192
|
|
|
* @param string $url URL to validate |
193
|
|
|
* @return bool |
194
|
|
|
*/ |
195
|
|
|
public function isValidUrl($url = '') { |
196
|
|
|
// based on http://php.net/manual/en/function.filter-var.php#104160 |
197
|
|
|
// adapted by @mrclay in https://github.com/mrclay/Elgg-leaf/blob/62bf31c0ccdaab549a7e585a4412443e09821db3/engine/lib/output.php |
198
|
|
|
$res = filter_var($url, FILTER_VALIDATE_URL); |
199
|
|
|
if ($res) { |
200
|
|
|
return $res; |
201
|
|
|
} |
202
|
|
|
// Check if it has unicode chars. |
203
|
|
|
$l = mb_strlen($url); |
204
|
|
|
if (strlen($url) == $l) { |
205
|
|
|
return $res; |
206
|
|
|
} |
207
|
|
|
// Replace wide chars by “X”. |
208
|
|
|
$s = ''; |
209
|
|
|
for ($i = 0; $i < $l; ++$i) { |
210
|
|
|
$ch = elgg_substr($url, $i, 1); |
211
|
|
|
$s .= (strlen($ch) > 1) ? 'X' : $ch; |
212
|
|
|
} |
213
|
|
|
// Re-check now. |
214
|
|
|
return filter_var($s, FILTER_VALIDATE_URL) ? $url : false; |
|
|
|
|
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* Returns head of the resource |
219
|
|
|
* |
220
|
|
|
* @param string $url URL of the resource |
221
|
|
|
* @return Response|false |
222
|
|
|
*/ |
223
|
1 |
|
public function request($url = '') { |
224
|
1 |
|
$url = str_replace(' ', '%20', $url); |
225
|
1 |
|
if (!$this->isValidUrl($url)) { |
226
|
|
|
return false; |
227
|
|
|
} |
228
|
1 |
|
if (!isset(self::$cache[$url])) { |
229
|
|
|
try { |
230
|
1 |
|
$response = $this->client->request('GET', $url); |
231
|
1 |
|
} catch (Exception $e) { |
232
|
|
|
$response = false; |
233
|
|
|
error_log("Parser Error for HEAD request ($url): {$e->getMessage()}"); |
234
|
|
|
} |
235
|
1 |
|
self::$cache[$url] = $response; |
236
|
1 |
|
} |
237
|
|
|
|
238
|
1 |
|
return self::$cache[$url]; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
/** |
242
|
|
|
* Get contents of the page |
243
|
|
|
* |
244
|
|
|
* @param string $url URL of the resource |
245
|
|
|
* @return string |
246
|
|
|
*/ |
247
|
1 |
|
public function read($url = '') { |
248
|
1 |
|
$body = ''; |
249
|
1 |
|
if (!$this->exists($url)) { |
250
|
1 |
|
return $body; |
251
|
|
|
} |
252
|
|
|
|
253
|
1 |
|
$response = $this->request($url); |
254
|
1 |
|
$body = (string) $response->getBody(); |
255
|
1 |
|
return $body; |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
/** |
259
|
|
|
* Checks if resource is an html page |
260
|
|
|
* |
261
|
|
|
* @param string $url URL of the resource |
262
|
|
|
* @return boolean |
263
|
|
|
*/ |
264
|
1 |
|
public function isHTML($url = '') { |
265
|
1 |
|
$mime = $this->getContentType($url); |
266
|
1 |
|
return strpos($mime, 'text/html') !== false; |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
/** |
270
|
|
|
* Checks if resource is JSON |
271
|
|
|
* |
272
|
|
|
* @param string $url URL of the resource |
273
|
|
|
* @return boolean |
274
|
|
|
*/ |
275
|
1 |
|
public function isJSON($url = '') { |
276
|
1 |
|
$mime = $this->getContentType($url); |
277
|
1 |
|
return strpos($mime, 'json') !== false; |
278
|
|
|
} |
279
|
|
|
|
280
|
|
|
/** |
281
|
|
|
* Checks if resource is XML |
282
|
|
|
* |
283
|
|
|
* @param string $url URL of the resource |
284
|
|
|
* @return boolean |
285
|
|
|
*/ |
286
|
1 |
|
public function isXML($url = '') { |
287
|
1 |
|
$mime = $this->getContentType($url); |
288
|
1 |
|
return strpos($mime, 'xml') !== false; |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
/** |
292
|
|
|
* Checks if resource is an image |
293
|
|
|
* |
294
|
|
|
* @param string $url URL of the resource |
295
|
|
|
* @return boolean |
296
|
|
|
*/ |
297
|
1 |
|
public function isImage($url = '') { |
298
|
1 |
|
$mime = $this->getContentType($url); |
299
|
1 |
|
if ($mime) { |
300
|
1 |
|
list($simple, ) = explode('/', $mime); |
301
|
1 |
|
return ($simple == 'image'); |
302
|
|
|
} |
303
|
|
|
|
304
|
1 |
|
return false; |
305
|
|
|
} |
306
|
|
|
|
307
|
|
|
/** |
308
|
|
|
* Get mime type of the URL content |
309
|
|
|
* |
310
|
|
|
* @param string $url URL of the resource |
311
|
|
|
* @return string |
312
|
|
|
*/ |
313
|
1 |
|
public function getContentType($url = '') { |
314
|
1 |
|
$response = $this->request($url); |
315
|
1 |
|
if ($response instanceof Response) { |
316
|
1 |
|
$header = $response->getHeader('Content-Type'); |
317
|
1 |
|
if (is_array($header) && !empty($header)) { |
318
|
1 |
|
$parts = explode(';', $header[0]); |
319
|
1 |
|
return trim($parts[0]); |
320
|
|
|
} |
321
|
1 |
|
} |
322
|
1 |
|
return ''; |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/** |
326
|
|
|
* Returns HTML contents of the page |
327
|
|
|
* |
328
|
|
|
* @param string $url URL of the resource |
329
|
|
|
* @return string |
330
|
|
|
*/ |
331
|
1 |
|
public function getHTML($url = '') { |
332
|
1 |
|
if (!$this->isHTML($url)) { |
333
|
1 |
|
return ''; |
334
|
|
|
} |
335
|
1 |
|
return $this->read($url); |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
/** |
339
|
|
|
* Returns HTML contents of the page as a DOMDocument |
340
|
|
|
* |
341
|
|
|
* @param string $url URL of the resource |
342
|
|
|
* @return DOMDocument|false |
343
|
|
|
*/ |
344
|
1 |
|
public function getDOM($url = '') { |
345
|
1 |
|
$html = $this->getHTML($url); |
346
|
1 |
|
if (empty($html)) { |
347
|
1 |
|
return false; |
348
|
|
|
} |
349
|
1 |
|
$doc = new DOMDocument(); |
350
|
|
|
|
351
|
1 |
|
libxml_use_internal_errors(true); |
352
|
|
|
|
353
|
1 |
|
if (is_callable('mb_convert_encoding')) { |
354
|
1 |
|
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); |
355
|
1 |
|
} else { |
356
|
|
|
$doc->loadHTML($html); |
357
|
|
|
} |
358
|
1 |
|
if (!$doc->documentURI) { |
359
|
1 |
|
$doc->documentURI = $url; |
360
|
1 |
|
} |
361
|
|
|
|
362
|
1 |
|
libxml_clear_errors(); |
363
|
|
|
|
364
|
1 |
|
return $doc; |
365
|
|
|
} |
366
|
|
|
|
367
|
|
|
/** |
368
|
|
|
* Parses document title |
369
|
|
|
* |
370
|
|
|
* @param DOMDocument $doc Document |
371
|
|
|
* @return string |
372
|
|
|
*/ |
373
|
1 |
|
public function parseTitle(DOMDocument $doc) { |
374
|
1 |
|
$node = $doc->getElementsByTagName('title'); |
375
|
1 |
|
$title = $node->item(0)->nodeValue; |
376
|
1 |
|
return ($title) ?: ''; |
377
|
|
|
} |
378
|
|
|
|
379
|
|
|
/** |
380
|
|
|
* Parses <link> tags |
381
|
|
|
* |
382
|
|
|
* @param DOMDocument $doc Document |
383
|
|
|
* @return array |
384
|
|
|
*/ |
385
|
1 |
|
public function parseLinkTags(DOMDocument $doc) { |
386
|
|
|
|
387
|
|
|
$meta = array( |
388
|
1 |
|
'icons' => [], |
389
|
1 |
|
'thumbnails' => [], |
390
|
1 |
|
); |
391
|
|
|
|
392
|
1 |
|
$nodes = $doc->getElementsByTagName('link'); |
393
|
1 |
|
foreach ($nodes as $node) { |
394
|
1 |
|
$rel = $node->getAttribute('rel'); |
395
|
1 |
|
$href = $node->getAttribute('href'); |
396
|
|
|
|
397
|
|
|
switch ($rel) { |
398
|
|
|
|
399
|
1 |
|
case 'icon' : |
|
|
|
|
400
|
1 |
|
$image_url = $this->getAbsoluteURL($doc, $href); |
401
|
1 |
|
if ($this->isImage($image_url)) { |
|
|
|
|
402
|
1 |
|
$meta['icons'][] = $image_url; |
403
|
1 |
|
} |
404
|
1 |
|
break; |
405
|
|
|
|
406
|
1 |
|
case 'canonical' : |
|
|
|
|
407
|
1 |
|
$meta['canonical'] = $this->getAbsoluteURL($doc, $href); |
408
|
1 |
|
break; |
409
|
|
|
|
410
|
1 |
|
case 'alternate' : |
|
|
|
|
411
|
1 |
|
$type = $node->getAttribute('type'); |
412
|
1 |
|
if (in_array($type, array( |
413
|
1 |
|
'application/json+oembed', |
414
|
1 |
|
'text/json+oembed', |
415
|
1 |
|
'application/xml+oembed', |
416
|
|
|
'text/xml+oembed' |
417
|
1 |
|
))) { |
418
|
1 |
|
$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href); |
419
|
1 |
|
} |
420
|
1 |
|
break; |
421
|
|
|
} |
422
|
1 |
|
} |
423
|
|
|
|
424
|
1 |
|
return $meta; |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
/** |
428
|
|
|
* Parses <meta> tags |
429
|
|
|
* |
430
|
|
|
* @param DOMDocument $doc Document |
431
|
|
|
* @return array |
432
|
|
|
*/ |
433
|
1 |
|
public function parseMetaTags(DOMDocument $doc) { |
434
|
|
|
|
435
|
1 |
|
$meta = array(); |
436
|
|
|
|
437
|
1 |
|
$nodes = $doc->getElementsByTagName('meta'); |
438
|
1 |
|
if (!empty($nodes)) { |
439
|
1 |
|
foreach ($nodes as $node) { |
440
|
1 |
|
$name = $node->getAttribute('name'); |
441
|
1 |
|
if (!$name) { |
442
|
1 |
|
$name = $node->getAttribute('property'); |
443
|
1 |
|
} |
444
|
1 |
|
if (!$name) { |
445
|
1 |
|
continue; |
446
|
|
|
} |
447
|
|
|
|
448
|
1 |
|
$name = strtolower($name); |
449
|
|
|
|
450
|
1 |
|
if ($name == 'og:image:url' || $name == 'og:image:secure_url') { |
451
|
1 |
|
$name = 'og:image'; |
452
|
1 |
|
} |
453
|
|
|
|
454
|
1 |
|
$content = $node->getAttribute('content'); |
455
|
1 |
|
if (isset($meta['metatags'][$name])) { |
456
|
1 |
|
if (!is_array($meta['metatags'][$name])) { |
457
|
1 |
|
$meta['metatags'][$name] = array($meta['metatags'][$name]); |
458
|
1 |
|
} |
459
|
1 |
|
$meta['metatags'][$name][] = $content; |
460
|
1 |
|
} else { |
461
|
1 |
|
$meta['metatags'][$name] = $content; |
462
|
|
|
} |
463
|
|
|
|
464
|
|
|
switch ($name) { |
465
|
|
|
|
466
|
1 |
|
case 'title' : |
|
|
|
|
467
|
1 |
|
case 'og:title' : |
|
|
|
|
468
|
1 |
|
case 'twitter:title' : |
|
|
|
|
469
|
1 |
|
if (empty($meta['title'])) { |
470
|
1 |
|
$meta['title'] = $content; |
471
|
1 |
|
} |
472
|
1 |
|
break; |
473
|
|
|
|
474
|
1 |
|
case 'og:type' : |
|
|
|
|
475
|
1 |
|
if (empty($meta['type'])) { |
476
|
1 |
|
$meta['type'] = $content; |
477
|
1 |
|
} |
478
|
1 |
|
break; |
479
|
|
|
|
480
|
1 |
|
case 'description' : |
|
|
|
|
481
|
1 |
|
case 'og:description' : |
|
|
|
|
482
|
1 |
|
case 'twitter:description' : |
|
|
|
|
483
|
1 |
|
if (empty($meta['description'])) { |
484
|
1 |
|
$meta['description'] = $content; |
485
|
1 |
|
} |
486
|
1 |
|
break; |
487
|
|
|
|
488
|
1 |
|
case 'keywords' : |
|
|
|
|
489
|
1 |
|
if (is_string($content)) { |
490
|
1 |
|
$content = explode(',', $content); |
491
|
1 |
|
$content = array_map('trim', $content); |
492
|
1 |
|
} |
493
|
1 |
|
$meta['tags'] = $content; |
494
|
1 |
|
break; |
495
|
|
|
|
496
|
1 |
|
case 'og:site_name' : |
|
|
|
|
497
|
1 |
|
case 'twitter:site' : |
|
|
|
|
498
|
1 |
|
if (empty($meta['provider_name'])) { |
499
|
1 |
|
$meta['provider_name'] = $content; |
500
|
1 |
|
} |
501
|
1 |
|
break; |
502
|
|
|
|
503
|
1 |
|
case 'og:image' : |
|
|
|
|
504
|
1 |
|
case 'twitter:image' : |
|
|
|
|
505
|
1 |
|
$image_url = $this->getAbsoluteURL($doc, $content); |
506
|
1 |
|
if ($this->isImage($image_url)) { |
|
|
|
|
507
|
1 |
|
$meta['thumbnails'][] = $image_url; |
508
|
1 |
|
} |
509
|
1 |
|
break; |
510
|
|
|
} |
511
|
1 |
|
} |
512
|
1 |
|
} |
513
|
|
|
|
514
|
1 |
|
return $meta; |
515
|
|
|
} |
516
|
|
|
|
517
|
|
|
/** |
518
|
|
|
* Parses <img> tags |
519
|
|
|
* |
520
|
|
|
* @param DOMDocument $doc Document |
521
|
|
|
* @return array |
522
|
|
|
*/ |
523
|
1 |
|
public function parseImgTags(DOMDocument $doc) { |
524
|
|
|
|
525
|
|
|
$meta = array( |
526
|
1 |
|
'thumbnails' => [], |
527
|
1 |
|
); |
528
|
|
|
|
529
|
1 |
|
$nodes = $doc->getElementsByTagName('img'); |
530
|
1 |
|
foreach ($nodes as $node) { |
531
|
1 |
|
$src = $node->getAttribute('src'); |
532
|
1 |
|
$image_url = $this->getAbsoluteURL($doc, $src); |
533
|
1 |
|
if ($this->isImage($image_url)) { |
|
|
|
|
534
|
1 |
|
$meta['thumbnails'][] = $image_url; |
535
|
1 |
|
} |
536
|
1 |
|
} |
537
|
|
|
|
538
|
1 |
|
return $meta; |
539
|
|
|
} |
540
|
|
|
|
541
|
|
|
/** |
542
|
|
|
* Normalizes relative URLs |
543
|
|
|
* |
544
|
|
|
* @param DOMDocument $doc Document |
545
|
|
|
* @param string $href URL to normalize |
546
|
|
|
* @return string|false |
547
|
|
|
*/ |
548
|
1 |
|
public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
549
|
|
|
|
550
|
1 |
|
if (preg_match("/^data:/i", $href)) { |
551
|
|
|
// data URIs can not be resolved |
552
|
|
|
return false; |
553
|
|
|
} |
554
|
|
|
|
555
|
|
|
// Check if $url is absolute |
556
|
1 |
|
if (parse_url($href, PHP_URL_HOST)) { |
557
|
1 |
|
return $href; |
558
|
|
|
} |
559
|
|
|
|
560
|
1 |
|
$uri = trim($doc->documentURI ?: '', '/'); |
561
|
|
|
|
562
|
1 |
|
$scheme = parse_url($uri, PHP_URL_SCHEME); |
563
|
1 |
|
$host = parse_url($uri, PHP_URL_HOST); |
564
|
|
|
|
565
|
1 |
|
if (substr($href, 0, 1) === "/") { |
566
|
|
|
// URL is relative to site root |
567
|
1 |
|
return "$scheme://$host$href"; |
568
|
|
|
} |
569
|
|
|
|
570
|
|
|
// URL is relative to page |
571
|
1 |
|
$path = parse_url($uri, PHP_URL_PATH); |
572
|
|
|
|
573
|
1 |
|
return "$scheme://$host$path/$href"; |
574
|
|
|
} |
575
|
|
|
|
576
|
|
|
} |
577
|
|
|
|