Test Setup Failed
Pull Request — master (#4)
by Matt
02:25
created
src/hypeJunction/Parser.php 1 patch
Indentation   +560 added lines, -560 removed lines patch added patch discarded remove patch
@@ -12,570 +12,570 @@
 block discarded – undo
12 12
  */
13 13
 class Parser {
14 14
 
15
-	/**
16
-	 * @var ClientInterface
17
-	 */
18
-	private $client;
19
-
20
-	/**
21
-	 * @var array
22
-	 */
23
-	private static $cache;
24
-	private $urls = [];
25
-
26
-	/**
27
-	 * Constructor
28
-	 * @param ClientInterface $client HTTP Client
29
-	 */
30
-	public function __construct(ClientInterface $client) {
31
-		$this->client = $client;
32
-	}
33
-
34
-	/**
35
-	 * Parses a URL into a an array of metatags
36
-	 *
37
-	 * @param string $url URL to parse
38
-	 * @return array
39
-	 */
40
-	public function parse($url = '') {
41
-
42
-		$data = $this->getImageData($url);
43
-		if (!$data) {
44
-			$data = $this->getOEmbedData($url);
45
-		}
46
-		if (!$data) {
47
-			$data = $this->getDOMData($url);
48
-			if (is_array($data) && !empty($data['oembed_url'])) {
49
-				foreach ($data['oembed_url'] as $oembed_url) {
50
-					if (in_array($oembed_url, $this->urls)) {
51
-						continue;
52
-					}
53
-					$this->urls[] = $oembed_url;
54
-					$oembed_data = $this->parse($oembed_url);
55
-					if (!empty($oembed_data) && is_array($oembed_data)) {
56
-						$oembed_data['oembed_url'] = $oembed_data['url'];
57
-						unset($oembed_data['url']);
58
-						$data = array_merge($data, $oembed_data);
59
-					}
60
-				}
61
-			}
62
-		}
63
-
64
-		if (!is_array($data)) {
65
-			$data = array();
66
-		}
67
-
68
-		if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
69
-			$data['thumbnail_url'] = $data['thumbnails'][0];
70
-		}
71
-
72
-		return $data;
73
-	}
74
-
75
-	/**
76
-	 * Parses image metatags
77
-	 *
78
-	 * @param string $url URL of the image
79
-	 * @return array|false
80
-	 */
81
-	public function getImageData($url = '') {
82
-		if (!$this->isImage($url)) {
83
-			return false;
84
-		}
85
-
86
-		return array(
87
-			'type' => 'photo',
88
-			'url' => $url,
89
-			'thumbnails' => array($url),
90
-		);
91
-	}
92
-
93
-	/**
94
-	 * Parses OEmbed data
95
-	 *
96
-	 * @param  string $url URL of the image
97
-	 * @return array|false
98
-	 */
99
-	public function getOEmbedData($url = '') {
100
-
101
-		if (!$this->isJSON($url) && !$this->isXML($url)) {
102
-			return false;
103
-		}
104
-
105
-		$meta = array(
106
-			'url' => $url,
107
-		);
108
-
109
-		$content = $this->read($url);
110
-		if (!$content) {
111
-			return $meta;
112
-		}
113
-
114
-		$data = new \stdClass();
115
-		if ($this->isJSON($url)) {
116
-			$data = json_decode($content);
117
-		} else if ($this->isXML($url)) {
118
-			$data = simplexml_load_string($content);
119
-		}
120
-
121
-		$props = array(
122
-			'type',
123
-			'version',
124
-			'title',
125
-			'author_name',
126
-			'author_url',
127
-			'provider_name',
128
-			'provider_url',
129
-			'cache_age',
130
-			'thumbnail_url',
131
-			'thumbnail_width',
132
-			'thumbnail_height',
133
-			'width',
134
-			'height',
135
-			'html',
136
-		);
137
-		foreach ($props as $key) {
138
-			if (!empty($data->$key)) {
139
-				$meta[$key] = (string) $data->$key;
140
-			}
141
-		}
142
-		return $meta;
143
-	}
144
-
145
-	/**
146
-	 * Parses metatags from DOM
147
-	 *
148
-	 * @param  string $url URL
149
-	 * @return array|false
150
-	 */
151
-	public function getDOMData($url = '') {
152
-
153
-		if (!$this->isHTML($url)) {
154
-			return false;
155
-		}
156
-
157
-		$doc = $this->getDOM($url);
158
-		if (!$doc) {
159
-			return false;
160
-		}
161
-
162
-		$defaults = array(
163
-			'url' => $url,
164
-		);
165
-
166
-		$link_tags = $this->parseLinkTags($doc);
167
-		$meta_tags = $this->parseMetaTags($doc);
168
-		$img_tags = $this->parseImgTags($doc);
169
-
170
-		$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
171
-
172
-		if (empty($meta['title'])) {
173
-			$meta['title'] = $this->parseTitle($doc);
174
-		}
175
-
176
-
177
-		return $meta;
178
-	}
179
-
180
-	/**
181
-	 * Check if URL exists and is reachable by making an HTTP request to retrieve header information
182
-	 *
183
-	 * @param string $url URL of the resource
184
-	 * @return boolean
185
-	 */
186
-	public function exists($url = '') {
187
-		$response = $this->request($url);
188
-		if ($response instanceof Response) {
189
-			return $response->getStatusCode() == 200;
190
-		}
191
-		return false;
192
-	}
193
-
194
-	/**
195
-	 * Validate URL
196
-	 * 
197
-	 * @param string $url URL to validate
198
-	 * @return bool
199
-	 */
200
-	public function isValidUrl($url = '') {
201
-		// based on http://php.net/manual/en/function.filter-var.php#104160
202
-		// adapted by @mrclay in https://github.com/mrclay/Elgg-leaf/blob/62bf31c0ccdaab549a7e585a4412443e09821db3/engine/lib/output.php
203
-		$res = filter_var($url, FILTER_VALIDATE_URL);
204
-		if ($res) {
205
-			return $res;
206
-		}
207
-		// Check if it has unicode chars.
208
-		$l = mb_strlen($url);
209
-		if (strlen($url) == $l) {
210
-			return $res;
211
-		}
212
-		// Replace wide chars by “X”.
213
-		$s = '';
214
-		for ($i = 0; $i < $l; ++$i) {
215
-			$ch = elgg_substr($url, $i, 1);
216
-			$s .= (strlen($ch) > 1) ? 'X' : $ch;
217
-		}
218
-		// Re-check now.
219
-		return filter_var($s, FILTER_VALIDATE_URL) ? $url : false;
220
-	}
221
-
222
-	/**
223
-	 * Returns head of the resource
224
-	 *
225
-	 * @param string $url URL of the resource
226
-	 * @return Response|false
227
-	 */
228
-	public function request($url = '') {
229
-		$url = str_replace(' ', '%20', $url);
230
-		if (!$this->isValidUrl($url)) {
231
-			return false;
232
-		}
233
-		if (!isset(self::$cache[$url])) {
234
-			try {
235
-				$response = $this->client->request('GET', $url);
236
-			} catch (Exception $e) {
237
-				$response = false;
238
-				error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
239
-			}
240
-			self::$cache[$url] = $response;
241
-		}
242
-
243
-		return self::$cache[$url];
244
-	}
245
-
246
-	/**
247
-	 * Get contents of the page
248
-	 *
249
-	 * @param string $url URL of the resource
250
-	 * @return string
251
-	 */
252
-	public function read($url = '') {
253
-		$body = '';
254
-		if (!$this->exists($url)) {
255
-			return $body;
256
-		}
257
-
258
-		$response = $this->request($url);
259
-		$body = (string) $response->getBody();
260
-		return $body;
261
-	}
262
-
263
-	/**
264
-	 * Checks if resource is an html page
265
-	 *
266
-	 * @param string $url URL of the resource
267
-	 * @return boolean
268
-	 */
269
-	public function isHTML($url = '') {
270
-		$mime = $this->getContentType($url);
271
-		return strpos($mime, 'text/html') !== false;
272
-	}
273
-
274
-	/**
275
-	 * Checks if resource is JSON
276
-	 *
277
-	 * @param string $url URL of the resource
278
-	 * @return boolean
279
-	 */
280
-	public function isJSON($url = '') {
281
-		$mime = $this->getContentType($url);
282
-		return strpos($mime, 'json') !== false;
283
-	}
284
-
285
-	/**
286
-	 * Checks if resource is XML
287
-	 *
288
-	 * @param string $url URL of the resource
289
-	 * @return boolean
290
-	 */
291
-	public function isXML($url = '') {
292
-		$mime = $this->getContentType($url);
293
-		return strpos($mime, 'xml') !== false;
294
-	}
295
-
296
-	/**
297
-	 * Checks if resource is an image
298
-	 *
299
-	 * @param string $url URL of the resource
300
-	 * @return boolean
301
-	 */
302
-	public function isImage($url = '') {
303
-		$mime = $this->getContentType($url);
304
-		if ($mime) {
305
-			list($simple, ) = explode('/', $mime);
306
-			return ($simple == 'image');
307
-		}
308
-
309
-		return false;
310
-	}
311
-
312
-	/**
313
-	 * Get mime type of the URL content
314
-	 *
315
-	 * @param string $url URL of the resource
316
-	 * @return string
317
-	 */
318
-	public function getContentType($url = '') {
319
-		$response = $this->request($url);
320
-		if ($response instanceof Response) {
321
-			$header = $response->getHeader('Content-Type');
322
-			if (is_array($header) && !empty($header)) {
323
-				$parts = explode(';', $header[0]);
324
-				return trim($parts[0]);
325
-			}
326
-		}
327
-		return '';
328
-	}
329
-
330
-	/**
331
-	 * Returns HTML contents of the page
332
-	 *
333
-	 * @param string $url URL of the resource
334
-	 * @return string
335
-	 */
336
-	public function getHTML($url = '') {
337
-		if (!$this->isHTML($url)) {
338
-			return '';
339
-		}
340
-		return $this->read($url);
341
-	}
342
-
343
-	/**
344
-	 * Returns HTML contents of the page as a DOMDocument
345
-	 *
346
-	 * @param string $url URL of the resource
347
-	 * @return DOMDocument|false
348
-	 */
349
-	public function getDOM($url = '') {
350
-		$html = $this->getHTML($url);
351
-		if (empty($html)) {
352
-			return false;
353
-		}
354
-		$doc = new DOMDocument();
15
+    /**
16
+     * @var ClientInterface
17
+     */
18
+    private $client;
19
+
20
+    /**
21
+     * @var array
22
+     */
23
+    private static $cache;
24
+    private $urls = [];
25
+
26
+    /**
27
+     * Constructor
28
+     * @param ClientInterface $client HTTP Client
29
+     */
30
+    public function __construct(ClientInterface $client) {
31
+        $this->client = $client;
32
+    }
33
+
34
+    /**
35
+     * Parses a URL into a an array of metatags
36
+     *
37
+     * @param string $url URL to parse
38
+     * @return array
39
+     */
40
+    public function parse($url = '') {
41
+
42
+        $data = $this->getImageData($url);
43
+        if (!$data) {
44
+            $data = $this->getOEmbedData($url);
45
+        }
46
+        if (!$data) {
47
+            $data = $this->getDOMData($url);
48
+            if (is_array($data) && !empty($data['oembed_url'])) {
49
+                foreach ($data['oembed_url'] as $oembed_url) {
50
+                    if (in_array($oembed_url, $this->urls)) {
51
+                        continue;
52
+                    }
53
+                    $this->urls[] = $oembed_url;
54
+                    $oembed_data = $this->parse($oembed_url);
55
+                    if (!empty($oembed_data) && is_array($oembed_data)) {
56
+                        $oembed_data['oembed_url'] = $oembed_data['url'];
57
+                        unset($oembed_data['url']);
58
+                        $data = array_merge($data, $oembed_data);
59
+                    }
60
+                }
61
+            }
62
+        }
63
+
64
+        if (!is_array($data)) {
65
+            $data = array();
66
+        }
67
+
68
+        if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
69
+            $data['thumbnail_url'] = $data['thumbnails'][0];
70
+        }
71
+
72
+        return $data;
73
+    }
74
+
75
+    /**
76
+     * Parses image metatags
77
+     *
78
+     * @param string $url URL of the image
79
+     * @return array|false
80
+     */
81
+    public function getImageData($url = '') {
82
+        if (!$this->isImage($url)) {
83
+            return false;
84
+        }
85
+
86
+        return array(
87
+            'type' => 'photo',
88
+            'url' => $url,
89
+            'thumbnails' => array($url),
90
+        );
91
+    }
92
+
93
+    /**
94
+     * Parses OEmbed data
95
+     *
96
+     * @param  string $url URL of the image
97
+     * @return array|false
98
+     */
99
+    public function getOEmbedData($url = '') {
100
+
101
+        if (!$this->isJSON($url) && !$this->isXML($url)) {
102
+            return false;
103
+        }
104
+
105
+        $meta = array(
106
+            'url' => $url,
107
+        );
108
+
109
+        $content = $this->read($url);
110
+        if (!$content) {
111
+            return $meta;
112
+        }
113
+
114
+        $data = new \stdClass();
115
+        if ($this->isJSON($url)) {
116
+            $data = json_decode($content);
117
+        } else if ($this->isXML($url)) {
118
+            $data = simplexml_load_string($content);
119
+        }
120
+
121
+        $props = array(
122
+            'type',
123
+            'version',
124
+            'title',
125
+            'author_name',
126
+            'author_url',
127
+            'provider_name',
128
+            'provider_url',
129
+            'cache_age',
130
+            'thumbnail_url',
131
+            'thumbnail_width',
132
+            'thumbnail_height',
133
+            'width',
134
+            'height',
135
+            'html',
136
+        );
137
+        foreach ($props as $key) {
138
+            if (!empty($data->$key)) {
139
+                $meta[$key] = (string) $data->$key;
140
+            }
141
+        }
142
+        return $meta;
143
+    }
144
+
145
+    /**
146
+     * Parses metatags from DOM
147
+     *
148
+     * @param  string $url URL
149
+     * @return array|false
150
+     */
151
+    public function getDOMData($url = '') {
152
+
153
+        if (!$this->isHTML($url)) {
154
+            return false;
155
+        }
156
+
157
+        $doc = $this->getDOM($url);
158
+        if (!$doc) {
159
+            return false;
160
+        }
161
+
162
+        $defaults = array(
163
+            'url' => $url,
164
+        );
165
+
166
+        $link_tags = $this->parseLinkTags($doc);
167
+        $meta_tags = $this->parseMetaTags($doc);
168
+        $img_tags = $this->parseImgTags($doc);
169
+
170
+        $meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
171
+
172
+        if (empty($meta['title'])) {
173
+            $meta['title'] = $this->parseTitle($doc);
174
+        }
175
+
176
+
177
+        return $meta;
178
+    }
179
+
180
+    /**
181
+     * Check if URL exists and is reachable by making an HTTP request to retrieve header information
182
+     *
183
+     * @param string $url URL of the resource
184
+     * @return boolean
185
+     */
186
+    public function exists($url = '') {
187
+        $response = $this->request($url);
188
+        if ($response instanceof Response) {
189
+            return $response->getStatusCode() == 200;
190
+        }
191
+        return false;
192
+    }
193
+
194
+    /**
195
+     * Validate URL
196
+     * 
197
+     * @param string $url URL to validate
198
+     * @return bool
199
+     */
200
+    public function isValidUrl($url = '') {
201
+        // based on http://php.net/manual/en/function.filter-var.php#104160
202
+        // adapted by @mrclay in https://github.com/mrclay/Elgg-leaf/blob/62bf31c0ccdaab549a7e585a4412443e09821db3/engine/lib/output.php
203
+        $res = filter_var($url, FILTER_VALIDATE_URL);
204
+        if ($res) {
205
+            return $res;
206
+        }
207
+        // Check if it has unicode chars.
208
+        $l = mb_strlen($url);
209
+        if (strlen($url) == $l) {
210
+            return $res;
211
+        }
212
+        // Replace wide chars by “X”.
213
+        $s = '';
214
+        for ($i = 0; $i < $l; ++$i) {
215
+            $ch = elgg_substr($url, $i, 1);
216
+            $s .= (strlen($ch) > 1) ? 'X' : $ch;
217
+        }
218
+        // Re-check now.
219
+        return filter_var($s, FILTER_VALIDATE_URL) ? $url : false;
220
+    }
221
+
222
+    /**
223
+     * Returns head of the resource
224
+     *
225
+     * @param string $url URL of the resource
226
+     * @return Response|false
227
+     */
228
+    public function request($url = '') {
229
+        $url = str_replace(' ', '%20', $url);
230
+        if (!$this->isValidUrl($url)) {
231
+            return false;
232
+        }
233
+        if (!isset(self::$cache[$url])) {
234
+            try {
235
+                $response = $this->client->request('GET', $url);
236
+            } catch (Exception $e) {
237
+                $response = false;
238
+                error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
239
+            }
240
+            self::$cache[$url] = $response;
241
+        }
242
+
243
+        return self::$cache[$url];
244
+    }
245
+
246
+    /**
247
+     * Get contents of the page
248
+     *
249
+     * @param string $url URL of the resource
250
+     * @return string
251
+     */
252
+    public function read($url = '') {
253
+        $body = '';
254
+        if (!$this->exists($url)) {
255
+            return $body;
256
+        }
257
+
258
+        $response = $this->request($url);
259
+        $body = (string) $response->getBody();
260
+        return $body;
261
+    }
262
+
263
+    /**
264
+     * Checks if resource is an html page
265
+     *
266
+     * @param string $url URL of the resource
267
+     * @return boolean
268
+     */
269
+    public function isHTML($url = '') {
270
+        $mime = $this->getContentType($url);
271
+        return strpos($mime, 'text/html') !== false;
272
+    }
273
+
274
+    /**
275
+     * Checks if resource is JSON
276
+     *
277
+     * @param string $url URL of the resource
278
+     * @return boolean
279
+     */
280
+    public function isJSON($url = '') {
281
+        $mime = $this->getContentType($url);
282
+        return strpos($mime, 'json') !== false;
283
+    }
284
+
285
+    /**
286
+     * Checks if resource is XML
287
+     *
288
+     * @param string $url URL of the resource
289
+     * @return boolean
290
+     */
291
+    public function isXML($url = '') {
292
+        $mime = $this->getContentType($url);
293
+        return strpos($mime, 'xml') !== false;
294
+    }
295
+
296
+    /**
297
+     * Checks if resource is an image
298
+     *
299
+     * @param string $url URL of the resource
300
+     * @return boolean
301
+     */
302
+    public function isImage($url = '') {
303
+        $mime = $this->getContentType($url);
304
+        if ($mime) {
305
+            list($simple, ) = explode('/', $mime);
306
+            return ($simple == 'image');
307
+        }
308
+
309
+        return false;
310
+    }
311
+
312
+    /**
313
+     * Get mime type of the URL content
314
+     *
315
+     * @param string $url URL of the resource
316
+     * @return string
317
+     */
318
+    public function getContentType($url = '') {
319
+        $response = $this->request($url);
320
+        if ($response instanceof Response) {
321
+            $header = $response->getHeader('Content-Type');
322
+            if (is_array($header) && !empty($header)) {
323
+                $parts = explode(';', $header[0]);
324
+                return trim($parts[0]);
325
+            }
326
+        }
327
+        return '';
328
+    }
329
+
330
+    /**
331
+     * Returns HTML contents of the page
332
+     *
333
+     * @param string $url URL of the resource
334
+     * @return string
335
+     */
336
+    public function getHTML($url = '') {
337
+        if (!$this->isHTML($url)) {
338
+            return '';
339
+        }
340
+        return $this->read($url);
341
+    }
342
+
343
+    /**
344
+     * Returns HTML contents of the page as a DOMDocument
345
+     *
346
+     * @param string $url URL of the resource
347
+     * @return DOMDocument|false
348
+     */
349
+    public function getDOM($url = '') {
350
+        $html = $this->getHTML($url);
351
+        if (empty($html)) {
352
+            return false;
353
+        }
354
+        $doc = new DOMDocument();
355 355
 		
356
-		libxml_use_internal_errors(true);
356
+        libxml_use_internal_errors(true);
357 357
 		
358
-		if (is_callable('mb_convert_encoding')) {
359
-			$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
360
-		} else {
361
-			$doc->loadHTML($html);
362
-		}
363
-		if (!$doc->documentURI) {
364
-			$doc->documentURI = $url;
365
-		}
358
+        if (is_callable('mb_convert_encoding')) {
359
+            $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
360
+        } else {
361
+            $doc->loadHTML($html);
362
+        }
363
+        if (!$doc->documentURI) {
364
+            $doc->documentURI = $url;
365
+        }
366 366
 		
367
-		libxml_clear_errors();
367
+        libxml_clear_errors();
368 368
 		
369
-		return $doc;
370
-	}
371
-
372
-	/**
373
-	 * Parses document title
374
-	 *
375
-	 * @param DOMDocument $doc Document
376
-	 * @return string
377
-	 */
378
-	public function parseTitle(DOMDocument $doc) {
379
-		$node = $doc->getElementsByTagName('title');
380
-		$title = $node->item(0)->nodeValue;
381
-		return ($title) ?: '';
382
-	}
383
-
384
-	/**
385
-	 * Parses <link> tags
386
-	 *
387
-	 * @param DOMDocument $doc Document
388
-	 * @return array
389
-	 */
390
-	public function parseLinkTags(DOMDocument $doc) {
391
-
392
-		$meta = array(
393
-			'icons' => [],
394
-			'thumbnails' => [],
395
-		);
396
-
397
-		$nodes = $doc->getElementsByTagName('link');
398
-		foreach ($nodes as $node) {
399
-			$rel = $node->getAttribute('rel');
400
-			$href = $node->getAttribute('href');
401
-
402
-			switch ($rel) {
403
-
404
-				case 'icon' :
405
-					$image_url = $this->getAbsoluteURL($doc, $href);
406
-					if ($this->isImage($image_url)) {
407
-						$meta['icons'][] = $image_url;
408
-					}
409
-					break;
410
-
411
-				case 'canonical' :
412
-					$meta['canonical'] = $this->getAbsoluteURL($doc, $href);
413
-					break;
414
-
415
-				case 'alternate' :
416
-					$type = $node->getAttribute('type');
417
-					if (in_array($type, array(
418
-								'application/json+oembed',
419
-								'text/json+oembed',
420
-								'application/xml+oembed',
421
-								'text/xml+oembed'
422
-							))) {
423
-						$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
424
-					}
425
-					break;
426
-			}
427
-		}
428
-
429
-		return $meta;
430
-	}
431
-
432
-	/**
433
-	 * Parses <meta> tags
434
-	 *
435
-	 * @param DOMDocument $doc Document
436
-	 * @return array
437
-	 */
438
-	public function parseMetaTags(DOMDocument $doc) {
439
-
440
-		$meta = array();
441
-
442
-		$nodes = $doc->getElementsByTagName('meta');
443
-		if (!empty($nodes)) {
444
-			foreach ($nodes as $node) {
445
-				$name = $node->getAttribute('name');
446
-				if (!$name) {
447
-					$name = $node->getAttribute('property');
448
-				}
449
-				if (!$name) {
450
-					continue;
451
-				}
452
-
453
-				$name = strtolower($name);
369
+        return $doc;
370
+    }
371
+
372
+    /**
373
+     * Parses document title
374
+     *
375
+     * @param DOMDocument $doc Document
376
+     * @return string
377
+     */
378
+    public function parseTitle(DOMDocument $doc) {
379
+        $node = $doc->getElementsByTagName('title');
380
+        $title = $node->item(0)->nodeValue;
381
+        return ($title) ?: '';
382
+    }
383
+
384
+    /**
385
+     * Parses <link> tags
386
+     *
387
+     * @param DOMDocument $doc Document
388
+     * @return array
389
+     */
390
+    public function parseLinkTags(DOMDocument $doc) {
391
+
392
+        $meta = array(
393
+            'icons' => [],
394
+            'thumbnails' => [],
395
+        );
396
+
397
+        $nodes = $doc->getElementsByTagName('link');
398
+        foreach ($nodes as $node) {
399
+            $rel = $node->getAttribute('rel');
400
+            $href = $node->getAttribute('href');
401
+
402
+            switch ($rel) {
403
+
404
+                case 'icon' :
405
+                    $image_url = $this->getAbsoluteURL($doc, $href);
406
+                    if ($this->isImage($image_url)) {
407
+                        $meta['icons'][] = $image_url;
408
+                    }
409
+                    break;
410
+
411
+                case 'canonical' :
412
+                    $meta['canonical'] = $this->getAbsoluteURL($doc, $href);
413
+                    break;
414
+
415
+                case 'alternate' :
416
+                    $type = $node->getAttribute('type');
417
+                    if (in_array($type, array(
418
+                                'application/json+oembed',
419
+                                'text/json+oembed',
420
+                                'application/xml+oembed',
421
+                                'text/xml+oembed'
422
+                            ))) {
423
+                        $meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
424
+                    }
425
+                    break;
426
+            }
427
+        }
428
+
429
+        return $meta;
430
+    }
431
+
432
+    /**
433
+     * Parses <meta> tags
434
+     *
435
+     * @param DOMDocument $doc Document
436
+     * @return array
437
+     */
438
+    public function parseMetaTags(DOMDocument $doc) {
439
+
440
+        $meta = array();
441
+
442
+        $nodes = $doc->getElementsByTagName('meta');
443
+        if (!empty($nodes)) {
444
+            foreach ($nodes as $node) {
445
+                $name = $node->getAttribute('name');
446
+                if (!$name) {
447
+                    $name = $node->getAttribute('property');
448
+                }
449
+                if (!$name) {
450
+                    continue;
451
+                }
452
+
453
+                $name = strtolower($name);
454 454
 				
455
-				if ($name == 'og:image:url' || $name == 'og:image:secure_url') {
456
-					$name = 'og:image';
457
-				}
458
-
459
-				$content = $node->getAttribute('content');
460
-				if (isset($meta['metatags'][$name])) {
461
-					if (!is_array($meta['metatags'][$name])) {
462
-						$meta['metatags'][$name] = array($meta['metatags'][$name]);
463
-					}
464
-					$meta['metatags'][$name][] = $content;
465
-				} else {
466
-					$meta['metatags'][$name] = $content;
467
-				}
468
-
469
-				switch ($name) {
470
-
471
-					case 'title' :
472
-					case 'og:title' :
473
-					case 'twitter:title' :
474
-						if (empty($meta['title'])) {
475
-							$meta['title'] = $content;
476
-						}
477
-						break;
478
-
479
-					case 'og:type' :
480
-						if (empty($meta['type'])) {
481
-							$meta['type'] = $content;
482
-						}
483
-						break;
484
-
485
-					case 'description' :
486
-					case 'og:description' :
487
-					case 'twitter:description' :
488
-						if (empty($meta['description'])) {
489
-							$meta['description'] = $content;
490
-						}
491
-						break;
492
-
493
-					case 'keywords' :
494
-						if (is_string($content)) {
495
-							$content = explode(',', $content);
496
-							$content = array_map('trim', $content);
497
-						}
498
-						$meta['tags'] = $content;
499
-						break;
500
-
501
-					case 'og:site_name' :
502
-					case 'twitter:site' :
503
-						if (empty($meta['provider_name'])) {
504
-							$meta['provider_name'] = $content;
505
-						}
506
-						break;
507
-
508
-					case 'og:image' :
509
-					case 'twitter:image' :
510
-						$image_url = $this->getAbsoluteURL($doc, $content);
511
-						if ($this->isImage($image_url)) {
512
-							$meta['thumbnails'][] = $image_url;
513
-						}
514
-						break;
515
-				}
516
-			}
517
-		}
518
-
519
-		return $meta;
520
-	}
521
-
522
-	/**
523
-	 * Parses <img> tags
524
-	 *
525
-	 * @param DOMDocument $doc Document
526
-	 * @return array
527
-	 */
528
-	public function parseImgTags(DOMDocument $doc) {
529
-
530
-		$meta = array(
531
-			'thumbnails' => [],
532
-		);
533
-
534
-		$nodes = $doc->getElementsByTagName('img');
535
-		foreach ($nodes as $node) {
536
-			$src = $node->getAttribute('src');
537
-			$image_url = $this->getAbsoluteURL($doc, $src);
538
-			if ($this->isImage($image_url)) {
539
-				$meta['thumbnails'][] = $image_url;
540
-			}
541
-		}
542
-
543
-		return $meta;
544
-	}
545
-
546
-	/**
547
-	 * Normalizes relative URLs
548
-	 *
549
-	 * @param DOMDocument $doc  Document
550
-	 * @param string      $href URL to normalize
551
-	 * @return string|false
552
-	 */
553
-	public function getAbsoluteURL(DOMDocument $doc, $href = '') {
554
-
555
-		if (preg_match("/^data:/i", $href)) {
556
-			// data URIs can not be resolved
557
-			return false;
558
-		}
559
-
560
-		// Check if $url is absolute
561
-		if (parse_url($href, PHP_URL_HOST)) {
562
-			return $href;
563
-		}
564
-
565
-		$uri = trim($doc->documentURI ?: '', '/');
566
-
567
-		$scheme = parse_url($uri, PHP_URL_SCHEME);
568
-		$host = parse_url($uri, PHP_URL_HOST);
569
-
570
-		if (substr($href, 0, 1) === "/") {
571
-			// URL is relative to site root
572
-			return "$scheme://$host$href";
573
-		}
574
-
575
-		// URL is relative to page
576
-		$path = parse_url($uri, PHP_URL_PATH);
577
-
578
-		return "$scheme://$host$path/$href";
579
-	}
455
+                if ($name == 'og:image:url' || $name == 'og:image:secure_url') {
456
+                    $name = 'og:image';
457
+                }
458
+
459
+                $content = $node->getAttribute('content');
460
+                if (isset($meta['metatags'][$name])) {
461
+                    if (!is_array($meta['metatags'][$name])) {
462
+                        $meta['metatags'][$name] = array($meta['metatags'][$name]);
463
+                    }
464
+                    $meta['metatags'][$name][] = $content;
465
+                } else {
466
+                    $meta['metatags'][$name] = $content;
467
+                }
468
+
469
+                switch ($name) {
470
+
471
+                    case 'title' :
472
+                    case 'og:title' :
473
+                    case 'twitter:title' :
474
+                        if (empty($meta['title'])) {
475
+                            $meta['title'] = $content;
476
+                        }
477
+                        break;
478
+
479
+                    case 'og:type' :
480
+                        if (empty($meta['type'])) {
481
+                            $meta['type'] = $content;
482
+                        }
483
+                        break;
484
+
485
+                    case 'description' :
486
+                    case 'og:description' :
487
+                    case 'twitter:description' :
488
+                        if (empty($meta['description'])) {
489
+                            $meta['description'] = $content;
490
+                        }
491
+                        break;
492
+
493
+                    case 'keywords' :
494
+                        if (is_string($content)) {
495
+                            $content = explode(',', $content);
496
+                            $content = array_map('trim', $content);
497
+                        }
498
+                        $meta['tags'] = $content;
499
+                        break;
500
+
501
+                    case 'og:site_name' :
502
+                    case 'twitter:site' :
503
+                        if (empty($meta['provider_name'])) {
504
+                            $meta['provider_name'] = $content;
505
+                        }
506
+                        break;
507
+
508
+                    case 'og:image' :
509
+                    case 'twitter:image' :
510
+                        $image_url = $this->getAbsoluteURL($doc, $content);
511
+                        if ($this->isImage($image_url)) {
512
+                            $meta['thumbnails'][] = $image_url;
513
+                        }
514
+                        break;
515
+                }
516
+            }
517
+        }
518
+
519
+        return $meta;
520
+    }
521
+
522
+    /**
523
+     * Parses <img> tags
524
+     *
525
+     * @param DOMDocument $doc Document
526
+     * @return array
527
+     */
528
+    public function parseImgTags(DOMDocument $doc) {
529
+
530
+        $meta = array(
531
+            'thumbnails' => [],
532
+        );
533
+
534
+        $nodes = $doc->getElementsByTagName('img');
535
+        foreach ($nodes as $node) {
536
+            $src = $node->getAttribute('src');
537
+            $image_url = $this->getAbsoluteURL($doc, $src);
538
+            if ($this->isImage($image_url)) {
539
+                $meta['thumbnails'][] = $image_url;
540
+            }
541
+        }
542
+
543
+        return $meta;
544
+    }
545
+
546
+    /**
547
+     * Normalizes relative URLs
548
+     *
549
+     * @param DOMDocument $doc  Document
550
+     * @param string      $href URL to normalize
551
+     * @return string|false
552
+     */
553
+    public function getAbsoluteURL(DOMDocument $doc, $href = '') {
554
+
555
+        if (preg_match("/^data:/i", $href)) {
556
+            // data URIs can not be resolved
557
+            return false;
558
+        }
559
+
560
+        // Check if $url is absolute
561
+        if (parse_url($href, PHP_URL_HOST)) {
562
+            return $href;
563
+        }
564
+
565
+        $uri = trim($doc->documentURI ?: '', '/');
566
+
567
+        $scheme = parse_url($uri, PHP_URL_SCHEME);
568
+        $host = parse_url($uri, PHP_URL_HOST);
569
+
570
+        if (substr($href, 0, 1) === "/") {
571
+            // URL is relative to site root
572
+            return "$scheme://$host$href";
573
+        }
574
+
575
+        // URL is relative to page
576
+        $path = parse_url($uri, PHP_URL_PATH);
577
+
578
+        return "$scheme://$host$path/$href";
579
+    }
580 580
 
581 581
 }
Please login to merge, or discard this patch.