1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace hypeJunction; |
4
|
|
|
|
5
|
|
|
use DOMDocument; |
6
|
|
|
use Exception; |
7
|
|
|
use GuzzleHttp\ClientInterface; |
8
|
|
|
use GuzzleHttp\Psr7\Response; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Parses HTTP resource into a serialable array of metatags |
12
|
|
|
*/ |
13
|
|
|
class Parser { |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @var ClientInterface |
17
|
|
|
*/ |
18
|
|
|
private $client; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var array |
22
|
|
|
*/ |
23
|
|
|
private static $cache; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var array |
27
|
|
|
*/ |
28
|
|
|
private $urls = []; |
|
|
|
|
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* Constructor |
32
|
|
|
* @param ClientInterface $client HTTP Client |
33
|
|
|
*/ |
34
|
|
|
public function __construct(ClientInterface $client) { |
35
|
|
|
$this->client = $client; |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Parses a URL into a an array of metatags |
40
|
|
|
* |
41
|
|
|
* @param string $url URL to parse |
42
|
|
|
* @return array |
43
|
|
|
*/ |
44
|
2 |
|
public function parse($url = '') { |
45
|
2 |
|
$data = $this->getImageData($url); |
46
|
2 |
|
if (!$data) { |
47
|
2 |
|
$data = $this->getOEmbedData($url); |
48
|
2 |
|
} |
49
|
2 |
|
if (!$data) { |
50
|
2 |
|
$data = $this->getDOMData($url); |
51
|
2 |
|
if (is_array($data) && !empty($data['oembed_url'])) { |
52
|
2 |
|
foreach ($data['oembed_url'] as $oembed_url) { |
53
|
2 |
|
$oembed_data = $this->getOEmbedData($oembed_url); |
54
|
2 |
|
if (!empty($oembed_data) && is_array($oembed_data)) { |
55
|
|
|
$oembed_data['oembed_url'] = $oembed_data['url']; |
56
|
|
|
unset($oembed_data['url']); |
57
|
|
|
$data = array_merge($data, $oembed_data); |
58
|
|
|
} |
59
|
2 |
|
} |
60
|
2 |
|
} |
61
|
2 |
|
} |
62
|
|
|
|
63
|
2 |
|
if (!is_array($data)) { |
64
|
1 |
|
$data = array(); |
65
|
1 |
|
} |
66
|
|
|
|
67
|
2 |
|
if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) { |
68
|
1 |
|
$data['thumbnail_url'] = $data['thumbnails'][0]; |
69
|
1 |
|
} |
70
|
|
|
|
71
|
2 |
|
return $data; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Parses image metatags |
76
|
|
|
* |
77
|
|
|
* @param string $url URL of the image |
78
|
|
|
* @return array|false |
79
|
|
|
*/ |
80
|
1 |
|
public function getImageData($url = '') { |
81
|
1 |
|
if (!$this->isImage($url)) { |
82
|
1 |
|
return false; |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
return array( |
86
|
1 |
|
'type' => 'photo', |
87
|
1 |
|
'url' => $url, |
88
|
1 |
|
'thumbnails' => array($url), |
89
|
1 |
|
); |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Parses OEmbed data |
94
|
|
|
* |
95
|
|
|
* @param string $url URL of the image |
96
|
|
|
* @return array|false |
97
|
|
|
*/ |
98
|
2 |
|
public function getOEmbedData($url = '') { |
99
|
|
|
|
100
|
2 |
|
if (!$this->isJSON($url) && !$this->isXML($url)) { |
101
|
1 |
|
return false; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
$meta = array( |
105
|
2 |
|
'url' => $url, |
106
|
2 |
|
); |
107
|
|
|
|
108
|
2 |
|
$content = $this->read($url); |
109
|
2 |
|
if (!$content) { |
110
|
|
|
return $meta; |
111
|
|
|
} |
112
|
|
|
|
113
|
2 |
|
$data = new \stdClass(); |
114
|
2 |
|
if ($this->isJSON($url)) { |
115
|
1 |
|
$data = json_decode($content); |
116
|
2 |
|
} else if ($this->isXML($url)) { |
117
|
1 |
|
$data = simplexml_load_string($content); |
118
|
1 |
|
} |
119
|
|
|
|
120
|
|
|
$props = array( |
121
|
2 |
|
'type', |
122
|
2 |
|
'version', |
123
|
2 |
|
'title', |
124
|
2 |
|
'author_name', |
125
|
2 |
|
'author_url', |
126
|
2 |
|
'provider_name', |
127
|
2 |
|
'provider_url', |
128
|
2 |
|
'cache_age', |
129
|
2 |
|
'thumbnail_url', |
130
|
2 |
|
'thumbnail_width', |
131
|
2 |
|
'thumbnail_height', |
132
|
2 |
|
'width', |
133
|
2 |
|
'height', |
134
|
2 |
|
'html', |
135
|
2 |
|
); |
136
|
2 |
|
foreach ($props as $key) { |
137
|
2 |
|
if (!empty($data->$key)) { |
138
|
2 |
|
$meta[$key] = (string) $data->$key; |
139
|
2 |
|
} |
140
|
2 |
|
} |
141
|
2 |
|
return $meta; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* Parses metatags from DOM |
146
|
|
|
* |
147
|
|
|
* @param string $url URL |
148
|
|
|
* @return array|false |
149
|
|
|
*/ |
150
|
1 |
|
public function getDOMData($url = '') { |
151
|
|
|
|
152
|
1 |
|
if (!$this->isHTML($url)) { |
153
|
1 |
|
return false; |
154
|
|
|
} |
155
|
|
|
|
156
|
1 |
|
$doc = $this->getDOM($url); |
157
|
1 |
|
if (!$doc) { |
158
|
|
|
return false; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
$defaults = array( |
162
|
1 |
|
'url' => $url, |
163
|
1 |
|
); |
164
|
|
|
|
165
|
1 |
|
$link_tags = $this->parseLinkTags($doc); |
166
|
1 |
|
$meta_tags = $this->parseMetaTags($doc); |
167
|
1 |
|
$img_tags = $this->parseImgTags($doc); |
168
|
|
|
|
169
|
1 |
|
$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags); |
170
|
|
|
|
171
|
1 |
|
if (empty($meta['title'])) { |
172
|
|
|
$meta['title'] = $this->parseTitle($doc); |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
|
176
|
1 |
|
return $meta; |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
/** |
180
|
|
|
* Check if URL exists and is reachable by making an HTTP request to retrieve header information |
181
|
|
|
* |
182
|
|
|
* @param string $url URL of the resource |
183
|
|
|
* @return boolean |
184
|
|
|
*/ |
185
|
1 |
|
public function exists($url = '') { |
186
|
1 |
|
$response = $this->request($url); |
187
|
1 |
|
if ($response instanceof Response) { |
188
|
1 |
|
return $response->getStatusCode() == 200; |
189
|
|
|
} |
190
|
|
|
return false; |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
/** |
194
|
|
|
* Validate URL |
195
|
|
|
* |
196
|
|
|
* @param string $url URL to validate |
197
|
|
|
* @return bool |
198
|
|
|
*/ |
199
|
|
|
public function isValidUrl($url = '') { |
200
|
|
|
// based on http://php.net/manual/en/function.filter-var.php#104160 |
201
|
|
|
// adapted by @mrclay in https://github.com/mrclay/Elgg-leaf/blob/62bf31c0ccdaab549a7e585a4412443e09821db3/engine/lib/output.php |
202
|
|
|
$res = filter_var($url, FILTER_VALIDATE_URL); |
203
|
|
|
if ($res) { |
204
|
|
|
return $res; |
205
|
|
|
} |
206
|
|
|
// Check if it has unicode chars. |
207
|
|
|
$l = mb_strlen($url); |
208
|
|
|
if (strlen($url) == $l) { |
209
|
|
|
return $res; |
210
|
|
|
} |
211
|
|
|
// Replace wide chars by “X”. |
212
|
|
|
$s = ''; |
213
|
|
|
for ($i = 0; $i < $l; ++$i) { |
214
|
|
|
$ch = elgg_substr($url, $i, 1); |
215
|
|
|
$s .= (strlen($ch) > 1) ? 'X' : $ch; |
216
|
|
|
} |
217
|
|
|
// Re-check now. |
218
|
|
|
return filter_var($s, FILTER_VALIDATE_URL) ? $url : false; |
|
|
|
|
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* Returns head of the resource |
223
|
|
|
* |
224
|
|
|
* @param string $url URL of the resource |
225
|
|
|
* @return Response|false |
226
|
|
|
*/ |
227
|
1 |
|
public function request($url = '') { |
228
|
1 |
|
$url = str_replace(' ', '%20', $url); |
229
|
1 |
|
if (!$this->isValidUrl($url)) { |
230
|
|
|
return false; |
231
|
|
|
} |
232
|
1 |
|
if (!isset(self::$cache[$url])) { |
233
|
|
|
try { |
234
|
1 |
|
$response = $this->client->request('GET', $url); |
235
|
1 |
|
} catch (Exception $e) { |
236
|
|
|
$response = false; |
237
|
|
|
error_log("Parser Error for HEAD request ($url): {$e->getMessage()}"); |
238
|
|
|
} |
239
|
1 |
|
self::$cache[$url] = $response; |
240
|
1 |
|
} |
241
|
|
|
|
242
|
1 |
|
return self::$cache[$url]; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
/** |
246
|
|
|
* Get contents of the page |
247
|
|
|
* |
248
|
|
|
* @param string $url URL of the resource |
249
|
|
|
* @return string |
250
|
|
|
*/ |
251
|
1 |
|
public function read($url = '') { |
252
|
1 |
|
$body = ''; |
253
|
1 |
|
if (!$this->exists($url)) { |
254
|
1 |
|
return $body; |
255
|
|
|
} |
256
|
|
|
|
257
|
1 |
|
$response = $this->request($url); |
258
|
1 |
|
$body = (string) $response->getBody(); |
259
|
1 |
|
return $body; |
260
|
|
|
} |
261
|
|
|
|
262
|
|
|
/** |
263
|
|
|
* Checks if resource is an html page |
264
|
|
|
* |
265
|
|
|
* @param string $url URL of the resource |
266
|
|
|
* @return boolean |
267
|
|
|
*/ |
268
|
1 |
|
public function isHTML($url = '') { |
269
|
1 |
|
$mime = $this->getContentType($url); |
270
|
1 |
|
return strpos($mime, 'text/html') !== false; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
/** |
274
|
|
|
* Checks if resource is JSON |
275
|
|
|
* |
276
|
|
|
* @param string $url URL of the resource |
277
|
|
|
* @return boolean |
278
|
|
|
*/ |
279
|
1 |
|
public function isJSON($url = '') { |
280
|
1 |
|
$mime = $this->getContentType($url); |
281
|
1 |
|
return strpos($mime, 'json') !== false; |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
/** |
285
|
|
|
* Checks if resource is XML |
286
|
|
|
* |
287
|
|
|
* @param string $url URL of the resource |
288
|
|
|
* @return boolean |
289
|
|
|
*/ |
290
|
1 |
|
public function isXML($url = '') { |
291
|
1 |
|
$mime = $this->getContentType($url); |
292
|
1 |
|
return strpos($mime, 'xml') !== false; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Checks if resource is an image |
297
|
|
|
* |
298
|
|
|
* @param string $url URL of the resource |
299
|
|
|
* @return boolean |
300
|
|
|
*/ |
301
|
1 |
|
public function isImage($url = '') { |
302
|
1 |
|
$mime = $this->getContentType($url); |
303
|
1 |
|
if ($mime) { |
304
|
1 |
|
list($simple, ) = explode('/', $mime); |
305
|
1 |
|
return ($simple == 'image'); |
306
|
|
|
} |
307
|
|
|
|
308
|
1 |
|
return false; |
309
|
|
|
} |
310
|
|
|
|
311
|
|
|
/** |
312
|
|
|
* Get mime type of the URL content |
313
|
|
|
* |
314
|
|
|
* @param string $url URL of the resource |
315
|
|
|
* @return string |
316
|
|
|
*/ |
317
|
1 |
|
public function getContentType($url = '') { |
318
|
1 |
|
$response = $this->request($url); |
319
|
1 |
|
if ($response instanceof Response) { |
320
|
1 |
|
$header = $response->getHeader('Content-Type'); |
321
|
1 |
|
if (is_array($header) && !empty($header)) { |
322
|
1 |
|
$parts = explode(';', $header[0]); |
323
|
1 |
|
return trim($parts[0]); |
324
|
|
|
} |
325
|
1 |
|
} |
326
|
1 |
|
return ''; |
327
|
|
|
} |
328
|
|
|
|
329
|
|
|
/** |
330
|
|
|
* Returns HTML contents of the page |
331
|
|
|
* |
332
|
|
|
* @param string $url URL of the resource |
333
|
|
|
* @return string |
334
|
|
|
*/ |
335
|
1 |
|
public function getHTML($url = '') { |
336
|
1 |
|
if (!$this->isHTML($url)) { |
337
|
1 |
|
return ''; |
338
|
|
|
} |
339
|
1 |
|
return $this->read($url); |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
/** |
343
|
|
|
* Returns HTML contents of the page as a DOMDocument |
344
|
|
|
* |
345
|
|
|
* @param string $url URL of the resource |
346
|
|
|
* @return DOMDocument|false |
347
|
|
|
*/ |
348
|
1 |
|
public function getDOM($url = '') { |
349
|
1 |
|
$html = $this->getHTML($url); |
350
|
1 |
|
if (empty($html)) { |
351
|
1 |
|
return false; |
352
|
|
|
} |
353
|
1 |
|
$doc = new DOMDocument(); |
354
|
|
|
|
355
|
1 |
|
libxml_use_internal_errors(true); |
356
|
|
|
|
357
|
1 |
|
if (is_callable('mb_convert_encoding')) { |
358
|
1 |
|
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); |
359
|
1 |
|
} else { |
360
|
|
|
$doc->loadHTML($html); |
361
|
|
|
} |
362
|
1 |
|
if (!$doc->documentURI) { |
363
|
1 |
|
$doc->documentURI = $url; |
364
|
1 |
|
} |
365
|
|
|
|
366
|
1 |
|
libxml_clear_errors(); |
367
|
|
|
|
368
|
1 |
|
return $doc; |
369
|
|
|
} |
370
|
|
|
|
371
|
|
|
/** |
372
|
|
|
* Parses document title |
373
|
|
|
* |
374
|
|
|
* @param DOMDocument $doc Document |
375
|
|
|
* @return string |
376
|
|
|
*/ |
377
|
1 |
|
public function parseTitle(DOMDocument $doc) { |
378
|
1 |
|
$node = $doc->getElementsByTagName('title'); |
379
|
1 |
|
$title = $node->item(0)->nodeValue; |
380
|
1 |
|
return ($title) ?: ''; |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* Parses <link> tags |
385
|
|
|
* |
386
|
|
|
* @param DOMDocument $doc Document |
387
|
|
|
* @return array |
388
|
|
|
*/ |
389
|
1 |
|
public function parseLinkTags(DOMDocument $doc) { |
390
|
|
|
|
391
|
|
|
$meta = array( |
392
|
1 |
|
'icons' => [], |
393
|
1 |
|
'thumbnails' => [], |
394
|
1 |
|
); |
395
|
|
|
|
396
|
1 |
|
$nodes = $doc->getElementsByTagName('link'); |
397
|
1 |
|
foreach ($nodes as $node) { |
398
|
1 |
|
$rel = $node->getAttribute('rel'); |
399
|
1 |
|
$href = $node->getAttribute('href'); |
400
|
|
|
|
401
|
|
|
switch ($rel) { |
402
|
|
|
|
403
|
1 |
|
case 'icon' : |
|
|
|
|
404
|
1 |
|
$image_url = $this->getAbsoluteURL($doc, $href); |
405
|
1 |
|
if ($this->isImage($image_url)) { |
|
|
|
|
406
|
1 |
|
$meta['icons'][] = $image_url; |
407
|
1 |
|
} |
408
|
1 |
|
break; |
409
|
|
|
|
410
|
1 |
|
case 'canonical' : |
|
|
|
|
411
|
1 |
|
$meta['canonical'] = $this->getAbsoluteURL($doc, $href); |
412
|
1 |
|
break; |
413
|
|
|
|
414
|
1 |
|
case 'alternate' : |
|
|
|
|
415
|
1 |
|
$type = $node->getAttribute('type'); |
416
|
1 |
|
if (in_array($type, array( |
417
|
1 |
|
'application/json+oembed', |
418
|
1 |
|
'text/json+oembed', |
419
|
1 |
|
'application/xml+oembed', |
420
|
|
|
'text/xml+oembed' |
421
|
1 |
|
))) { |
422
|
1 |
|
$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href); |
423
|
1 |
|
} |
424
|
1 |
|
break; |
425
|
|
|
} |
426
|
1 |
|
} |
427
|
|
|
|
428
|
1 |
|
return $meta; |
429
|
|
|
} |
430
|
|
|
|
431
|
|
|
/** |
432
|
|
|
* Parses <meta> tags |
433
|
|
|
* |
434
|
|
|
* @param DOMDocument $doc Document |
435
|
|
|
* @return array |
436
|
|
|
*/ |
437
|
1 |
|
public function parseMetaTags(DOMDocument $doc) { |
438
|
|
|
|
439
|
1 |
|
$meta = array(); |
440
|
|
|
|
441
|
1 |
|
$nodes = $doc->getElementsByTagName('meta'); |
442
|
1 |
|
if (!empty($nodes)) { |
443
|
1 |
|
foreach ($nodes as $node) { |
444
|
1 |
|
$name = $node->getAttribute('name'); |
445
|
1 |
|
if (!$name) { |
446
|
1 |
|
$name = $node->getAttribute('property'); |
447
|
1 |
|
} |
448
|
1 |
|
if (!$name) { |
449
|
1 |
|
continue; |
450
|
|
|
} |
451
|
|
|
|
452
|
1 |
|
$name = strtolower($name); |
453
|
|
|
|
454
|
1 |
|
if ($name == 'og:image:url' || $name == 'og:image:secure_url') { |
455
|
1 |
|
$name = 'og:image'; |
456
|
1 |
|
} |
457
|
|
|
|
458
|
1 |
|
$content = $node->getAttribute('content'); |
459
|
1 |
|
if (isset($meta['metatags'][$name])) { |
460
|
1 |
|
if (!is_array($meta['metatags'][$name])) { |
461
|
1 |
|
$meta['metatags'][$name] = array($meta['metatags'][$name]); |
462
|
1 |
|
} |
463
|
1 |
|
$meta['metatags'][$name][] = $content; |
464
|
1 |
|
} else { |
465
|
1 |
|
$meta['metatags'][$name] = $content; |
466
|
|
|
} |
467
|
|
|
|
468
|
|
|
switch ($name) { |
469
|
|
|
|
470
|
1 |
|
case 'title' : |
|
|
|
|
471
|
1 |
|
case 'og:title' : |
|
|
|
|
472
|
1 |
|
case 'twitter:title' : |
|
|
|
|
473
|
1 |
|
if (empty($meta['title'])) { |
474
|
1 |
|
$meta['title'] = $content; |
475
|
1 |
|
} |
476
|
1 |
|
break; |
477
|
|
|
|
478
|
1 |
|
case 'og:type' : |
|
|
|
|
479
|
1 |
|
if (empty($meta['type'])) { |
480
|
1 |
|
$meta['type'] = $content; |
481
|
1 |
|
} |
482
|
1 |
|
break; |
483
|
|
|
|
484
|
1 |
|
case 'description' : |
|
|
|
|
485
|
1 |
|
case 'og:description' : |
|
|
|
|
486
|
1 |
|
case 'twitter:description' : |
|
|
|
|
487
|
1 |
|
if (empty($meta['description'])) { |
488
|
1 |
|
$meta['description'] = $content; |
489
|
1 |
|
} |
490
|
1 |
|
break; |
491
|
|
|
|
492
|
1 |
|
case 'keywords' : |
|
|
|
|
493
|
1 |
|
if (is_string($content)) { |
494
|
1 |
|
$content = explode(',', $content); |
495
|
1 |
|
$content = array_map('trim', $content); |
496
|
1 |
|
} |
497
|
1 |
|
$meta['tags'] = $content; |
498
|
1 |
|
break; |
499
|
|
|
|
500
|
1 |
|
case 'og:site_name' : |
|
|
|
|
501
|
1 |
|
case 'twitter:site' : |
|
|
|
|
502
|
1 |
|
if (empty($meta['provider_name'])) { |
503
|
1 |
|
$meta['provider_name'] = $content; |
504
|
1 |
|
} |
505
|
1 |
|
break; |
506
|
|
|
|
507
|
1 |
|
case 'og:image' : |
|
|
|
|
508
|
1 |
|
case 'twitter:image' : |
|
|
|
|
509
|
1 |
|
$image_url = $this->getAbsoluteURL($doc, $content); |
510
|
1 |
|
if ($this->isImage($image_url)) { |
|
|
|
|
511
|
1 |
|
$meta['thumbnails'][] = $image_url; |
512
|
1 |
|
} |
513
|
1 |
|
break; |
514
|
|
|
} |
515
|
1 |
|
} |
516
|
1 |
|
} |
517
|
|
|
|
518
|
1 |
|
return $meta; |
519
|
|
|
} |
520
|
|
|
|
521
|
|
|
/** |
522
|
|
|
* Parses <img> tags |
523
|
|
|
* |
524
|
|
|
* @param DOMDocument $doc Document |
525
|
|
|
* @return array |
526
|
|
|
*/ |
527
|
1 |
|
public function parseImgTags(DOMDocument $doc) { |
528
|
|
|
|
529
|
|
|
$meta = array( |
530
|
1 |
|
'thumbnails' => [], |
531
|
1 |
|
); |
532
|
|
|
|
533
|
1 |
|
$nodes = $doc->getElementsByTagName('img'); |
534
|
1 |
|
foreach ($nodes as $node) { |
535
|
1 |
|
$src = $node->getAttribute('src'); |
536
|
1 |
|
$image_url = $this->getAbsoluteURL($doc, $src); |
537
|
1 |
|
if ($this->isImage($image_url)) { |
|
|
|
|
538
|
1 |
|
$meta['thumbnails'][] = $image_url; |
539
|
1 |
|
} |
540
|
1 |
|
} |
541
|
|
|
|
542
|
1 |
|
return $meta; |
543
|
|
|
} |
544
|
|
|
|
545
|
|
|
/** |
546
|
|
|
* Normalizes relative URLs |
547
|
|
|
* |
548
|
|
|
* @param DOMDocument $doc Document |
549
|
|
|
* @param string $href URL to normalize |
550
|
|
|
* @return string|false |
551
|
|
|
*/ |
552
|
1 |
|
public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
553
|
|
|
|
554
|
1 |
|
if (preg_match("/^data:/i", $href)) { |
555
|
|
|
// data URIs can not be resolved |
556
|
|
|
return false; |
557
|
|
|
} |
558
|
|
|
|
559
|
|
|
// Check if $url is absolute |
560
|
1 |
|
if (parse_url($href, PHP_URL_HOST)) { |
561
|
1 |
|
return $href; |
562
|
|
|
} |
563
|
|
|
|
564
|
1 |
|
$uri = trim($doc->documentURI ?: '', '/'); |
565
|
|
|
|
566
|
1 |
|
$scheme = parse_url($uri, PHP_URL_SCHEME); |
567
|
1 |
|
$host = parse_url($uri, PHP_URL_HOST); |
568
|
|
|
|
569
|
1 |
|
if (substr($href, 0, 1) === "/") { |
570
|
|
|
// URL is relative to site root |
571
|
1 |
|
return "$scheme://$host$href"; |
572
|
|
|
} |
573
|
|
|
|
574
|
|
|
// URL is relative to page |
575
|
1 |
|
$path = parse_url($uri, PHP_URL_PATH); |
576
|
|
|
|
577
|
1 |
|
return "$scheme://$host$path/$href"; |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
} |
This check marks private properties in classes that are never used. Those properties can be removed.