Test Failed
Push — master ( 9c6ff7...d19c43 )
by Ismayil
03:41 queued 01:17
created
src/hypeJunction/Parser.php 1 patch
Indentation   +486 added lines, -486 removed lines patch added patch discarded remove patch
@@ -12,493 +12,493 @@
 block discarded – undo
12 12
  */
13 13
 class Parser {
14 14
 
15
-	/**
16
-	 * @var ClientInterface
17
-	 */
18
-	private $client;
19
-
20
-	/**
21
-	 * @var array
22
-	 */
23
-	private static $cache;
24
-
25
-	/**
26
-	 * Constructor
27
-	 * @param ClientInterface $client HTTP Client
28
-	 */
29
-	public function __construct(ClientInterface $client) {
30
-		$this->client = $client;
31
-	}
32
-
33
-	/**
34
-	 * Parses a URL into a an array of metatags
35
-	 *
36
-	 * @param string $url URL to parse
37
-	 * @return array
38
-	 */
39
-	public function parse($url = '') {
40
-
41
-		$data = $this->getImageData($url);
42
-		if (!$data) {
43
-			$data = $this->getOEmbedData($url);
44
-		}
45
-		if (!$data) {
46
-			$data = $this->getDOMData($url);
47
-			if (is_array($data) && !empty($data['oembed_url'])) {
48
-				foreach ($data['oembed_url'] as $oembed_url) {
49
-					$oembed_data = $this->parse($oembed_url);
50
-					if (!empty($oembed_data) && is_array($oembed_data)) {
51
-						$oembed_data['oembed_url'] = $oembed_data['url'];
52
-						unset($oembed_data['url']);
53
-						$data = array_merge($data, $oembed_data);
54
-					}
55
-				}
56
-			}
57
-		}
15
+    /**
16
+     * @var ClientInterface
17
+     */
18
+    private $client;
19
+
20
+    /**
21
+     * @var array
22
+     */
23
+    private static $cache;
24
+
25
+    /**
26
+     * Constructor
27
+     * @param ClientInterface $client HTTP Client
28
+     */
29
+    public function __construct(ClientInterface $client) {
30
+        $this->client = $client;
31
+    }
32
+
33
+    /**
34
+     * Parses a URL into a an array of metatags
35
+     *
36
+     * @param string $url URL to parse
37
+     * @return array
38
+     */
39
+    public function parse($url = '') {
40
+
41
+        $data = $this->getImageData($url);
42
+        if (!$data) {
43
+            $data = $this->getOEmbedData($url);
44
+        }
45
+        if (!$data) {
46
+            $data = $this->getDOMData($url);
47
+            if (is_array($data) && !empty($data['oembed_url'])) {
48
+                foreach ($data['oembed_url'] as $oembed_url) {
49
+                    $oembed_data = $this->parse($oembed_url);
50
+                    if (!empty($oembed_data) && is_array($oembed_data)) {
51
+                        $oembed_data['oembed_url'] = $oembed_data['url'];
52
+                        unset($oembed_data['url']);
53
+                        $data = array_merge($data, $oembed_data);
54
+                    }
55
+                }
56
+            }
57
+        }
58 58
 	
59
-		if (!is_array($data)) {
60
-			$data = array();
61
-		}
62
-
63
-		if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
64
-			$data['thumbnail_url'] = $data['thumbnails'][0];
65
-		}
66
-
67
-		return $data;
68
-	}
69
-
70
-	/**
71
-	 * Parses image metatags
72
-	 *
73
-	 * @param string $url URL of the image
74
-	 * @return array|false
75
-	 */
76
-	public function getImageData($url = '') {
77
-		if (!$this->isImage($url)) {
78
-			return false;
79
-		}
80
-
81
-		return array(
82
-			'type' => 'photo',
83
-			'url' => $url,
84
-			'thumbnails' => array($url),
85
-		);
86
-	}
87
-
88
-	/**
89
-	 * Parses OEmbed data
90
-	 *
91
-	 * @param  string $url URL of the image
92
-	 * @return array|false
93
-	 */
94
-	public function getOEmbedData($url = '') {
95
-
96
-		if (!$this->isJSON($url) && !$this->isXML($url)) {
97
-			return false;
98
-		}
99
-
100
-		$meta = array(
101
-			'url' => $url,
102
-		);
103
-
104
-		$content = $this->read($url);
105
-		if (!$content) {
106
-			return $meta;
107
-		}
108
-
109
-		$data = new \stdClass();
110
-		if ($this->isJSON($url)) {
111
-			$data = json_decode($content);
112
-		} else if ($this->isXML($url)) {
113
-			$data = simplexml_load_string($content);
114
-		}
115
-
116
-		$props = array(
117
-			'type',
118
-			'version',
119
-			'title',
120
-			'author_name',
121
-			'author_url',
122
-			'provider_name',
123
-			'provider_url',
124
-			'cache_age',
125
-			'thumbnail_url',
126
-			'thumbnail_width',
127
-			'thumbnail_height',
128
-			'width',
129
-			'height',
130
-			'html',
131
-		);
132
-		foreach ($props as $key) {
133
-			if (!empty($data->$key)) {
134
-				$meta[$key] = (string) $data->$key;
135
-			}
136
-		}
137
-		return $meta;
138
-	}
139
-
140
-	/**
141
-	 * Parses metatags from DOM
142
-	 *
143
-	 * @param  string $url URL
144
-	 * @return array|false
145
-	 */
146
-	public function getDOMData($url = '') {
147
-
148
-		if (!$this->isHTML($url)) {
149
-			return false;
150
-		}
151
-
152
-		$doc = $this->getDOM($url);
153
-		$defaults = array(
154
-			'url' => $url,
155
-		);
156
-
157
-		$link_tags = $this->parseLinkTags($doc);
158
-		$meta_tags = $this->parseMetaTags($doc);
159
-		$img_tags = $this->parseImgTags($doc);
160
-
161
-		$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
59
+        if (!is_array($data)) {
60
+            $data = array();
61
+        }
62
+
63
+        if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
64
+            $data['thumbnail_url'] = $data['thumbnails'][0];
65
+        }
66
+
67
+        return $data;
68
+    }
69
+
70
+    /**
71
+     * Parses image metatags
72
+     *
73
+     * @param string $url URL of the image
74
+     * @return array|false
75
+     */
76
+    public function getImageData($url = '') {
77
+        if (!$this->isImage($url)) {
78
+            return false;
79
+        }
80
+
81
+        return array(
82
+            'type' => 'photo',
83
+            'url' => $url,
84
+            'thumbnails' => array($url),
85
+        );
86
+    }
87
+
88
+    /**
89
+     * Parses OEmbed data
90
+     *
91
+     * @param  string $url URL of the image
92
+     * @return array|false
93
+     */
94
+    public function getOEmbedData($url = '') {
95
+
96
+        if (!$this->isJSON($url) && !$this->isXML($url)) {
97
+            return false;
98
+        }
99
+
100
+        $meta = array(
101
+            'url' => $url,
102
+        );
103
+
104
+        $content = $this->read($url);
105
+        if (!$content) {
106
+            return $meta;
107
+        }
108
+
109
+        $data = new \stdClass();
110
+        if ($this->isJSON($url)) {
111
+            $data = json_decode($content);
112
+        } else if ($this->isXML($url)) {
113
+            $data = simplexml_load_string($content);
114
+        }
115
+
116
+        $props = array(
117
+            'type',
118
+            'version',
119
+            'title',
120
+            'author_name',
121
+            'author_url',
122
+            'provider_name',
123
+            'provider_url',
124
+            'cache_age',
125
+            'thumbnail_url',
126
+            'thumbnail_width',
127
+            'thumbnail_height',
128
+            'width',
129
+            'height',
130
+            'html',
131
+        );
132
+        foreach ($props as $key) {
133
+            if (!empty($data->$key)) {
134
+                $meta[$key] = (string) $data->$key;
135
+            }
136
+        }
137
+        return $meta;
138
+    }
139
+
140
+    /**
141
+     * Parses metatags from DOM
142
+     *
143
+     * @param  string $url URL
144
+     * @return array|false
145
+     */
146
+    public function getDOMData($url = '') {
147
+
148
+        if (!$this->isHTML($url)) {
149
+            return false;
150
+        }
151
+
152
+        $doc = $this->getDOM($url);
153
+        $defaults = array(
154
+            'url' => $url,
155
+        );
156
+
157
+        $link_tags = $this->parseLinkTags($doc);
158
+        $meta_tags = $this->parseMetaTags($doc);
159
+        $img_tags = $this->parseImgTags($doc);
160
+
161
+        $meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
162 162
 		
163
-		if (empty($meta['title'])) {
164
-			$meta['title'] = $this->parseTitle($doc);
165
-		}
166
-
167
-
168
-		return $meta;
169
-	}
170
-
171
-	/**
172
-	 * Check if URL exists and is reachable by making an HTTP request to retrieve header information
173
-	 *
174
-	 * @param string $url URL of the resource
175
-	 * @return boolean
176
-	 */
177
-	public function exists($url = '') {
178
-		$response = $this->request($url);
179
-		if ($response instanceof Response) {
180
-			return $response->getStatusCode() == 200;
181
-		}
182
-		return false;
183
-	}
184
-
185
-	/**
186
-	 * Returns head of the resource
187
-	 *
188
-	 * @param string $url URL of the resource
189
-	 * @return Response|false
190
-	 */
191
-	public function request($url = '') {
192
-		if (!filter_var($url, FILTER_VALIDATE_URL)) {
193
-			return false;
194
-		}
195
-		if (!isset(self::$cache[$url])) {
196
-			try {
197
-				$response = $this->client->request('GET', $url);
198
-			} catch (Exception $e) {
199
-				$response = false;
200
-				error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
201
-			}
202
-			self::$cache[$url] = $response;
203
-		}
204
-
205
-		return self::$cache[$url];
206
-	}
207
-
208
-	/**
209
-	 * Get contents of the page
210
-	 *
211
-	 * @param string $url URL of the resource
212
-	 * @return string
213
-	 */
214
-	public function read($url = '') {
215
-		$body = '';
216
-		if (!$this->exists($url)) {
217
-			return $body;
218
-		}
219
-
220
-		$response = $this->request($url);
221
-		$body = (string) $response->getBody();
222
-		return $body;
223
-	}
224
-
225
-	/**
226
-	 * Checks if resource is an html page
227
-	 *
228
-	 * @param string $url URL of the resource
229
-	 * @return boolean
230
-	 */
231
-	public function isHTML($url = '') {
232
-		$mime = $this->getContentType($url);
233
-		return strpos($mime, 'text/html') !== false;
234
-	}
235
-
236
-	/**
237
-	 * Checks if resource is JSON
238
-	 *
239
-	 * @param string $url URL of the resource
240
-	 * @return boolean
241
-	 */
242
-	public function isJSON($url = '') {
243
-		$mime = $this->getContentType($url);
244
-		return strpos($mime, 'json') !== false;
245
-	}
246
-
247
-	/**
248
-	 * Checks if resource is XML
249
-	 *
250
-	 * @param string $url URL of the resource
251
-	 * @return boolean
252
-	 */
253
-	public function isXML($url = '') {
254
-		$mime = $this->getContentType($url);
255
-		return strpos($mime, 'xml') !== false;
256
-	}
257
-
258
-	/**
259
-	 * Checks if resource is an image
260
-	 *
261
-	 * @param string $url URL of the resource
262
-	 * @return boolean
263
-	 */
264
-	public function isImage($url = '') {
265
-		$mime = $this->getContentType($url);
266
-		if ($mime) {
267
-			list($simple, ) = explode('/', $mime);
268
-			return ($simple == 'image');
269
-		}
270
-
271
-		return false;
272
-	}
273
-
274
-	/**
275
-	 * Get mime type of the URL content
276
-	 *
277
-	 * @param string $url URL of the resource
278
-	 * @return string
279
-	 */
280
-	public function getContentType($url = '') {
281
-		$response = $this->request($url);
282
-		if ($response instanceof Response) {
283
-			$header = $response->getHeader('Content-Type');
284
-			if (is_array($header) && !empty($header)) {
285
-				$parts = explode(';', $header[0]);
286
-				return trim($parts[0]);
287
-			}
288
-		}
289
-		return '';
290
-	}
291
-
292
-	/**
293
-	 * Returns HTML contents of the page
294
-	 *
295
-	 * @param string $url URL of the resource
296
-	 * @return string
297
-	 */
298
-	public function getHTML($url = '') {
299
-		if (!$this->isHTML($url)) {
300
-			return '';
301
-		}
302
-		return $this->read($url);
303
-	}
304
-
305
-	/**
306
-	 * Returns HTML contents of the page as a DOMDocument
307
-	 *
308
-	 * @param string $url URL of the resource
309
-	 * @return DOMDocument
310
-	 */
311
-	public function getDOM($url = '') {
312
-		$html = $this->getHTML($url);
313
-		$doc = new DOMDocument();
314
-		$doc->loadHTML($html);
315
-		if (!$doc->documentURI) {
316
-			$doc->documentURI = $url;
317
-		}
318
-		return $doc;
319
-	}
320
-
321
-	/**
322
-	 * Parses document title
323
-	 *
324
-	 * @param DOMDocument $doc Document
325
-	 * @return string
326
-	 */
327
-	public function parseTitle(DOMDocument $doc) {
328
-		$node = $doc->getElementsByTagName('title');
329
-		$title = $node->item(0)->nodeValue;
330
-		return ($title) ? : '';
331
-	}
332
-
333
-	/**
334
-	 * Parses <link> tags
335
-	 *
336
-	 * @param DOMDocument $doc Document
337
-	 * @return array
338
-	 */
339
-	public function parseLinkTags(DOMDocument $doc) {
340
-
341
-		$meta = array();
342
-
343
-		$nodes = $doc->getElementsByTagName('link');
344
-		foreach ($nodes as $node) {
345
-			$rel = $node->getAttribute('rel');
346
-			$href = $node->getAttribute('href');
347
-
348
-			switch ($rel) {
349
-
350
-				case 'icon' :
351
-					$meta['icons'][] = $this->getAbsoluteURL($doc, $href);
352
-					break;
353
-
354
-				case 'canonical' :
355
-					$meta['canonical'] = $this->getAbsoluteURL($doc, $href);
356
-					break;
357
-
358
-				case 'alternate' :
359
-					$type = $node->getAttribute('type');
360
-					if (in_array($type, array(
361
-								'application/json+oembed',
362
-								'text/json+oembed',
363
-								'application/xml+oembed',
364
-								'text/xml+oembed'
365
-							))) {
366
-						$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
367
-					}
368
-					break;
369
-			}
370
-		}
371
-
372
-		return $meta;
373
-	}
374
-
375
-	/**
376
-	 * Parses <meta> tags
377
-	 *
378
-	 * @param DOMDocument $doc Document
379
-	 * @return array
380
-	 */
381
-	public function parseMetaTags(DOMDocument $doc) {
382
-
383
-		$meta = array();
384
-
385
-		$nodes = $doc->getElementsByTagName('meta');
386
-		if (!empty($nodes)) {
387
-			foreach ($nodes as $node) {
388
-				$name = $node->getAttribute('name');
389
-				if (!$name) {
390
-					$name = $node->getAttribute('property');
391
-				}
392
-				if (!$name) {
393
-					continue;
394
-				}
395
-
396
-				$name = strtolower($name);
397
-
398
-				$content = $node->getAttribute('content');
399
-				if (isset($meta['metatags'][$name])) {
400
-					if (!is_array($meta['metatags'][$name])) {
401
-						$meta['metatags'][$name] = array($meta['metatags'][$name]);
402
-					}
403
-					$meta['metatags'][$name][] = $content;
404
-				} else {
405
-					$meta['metatags'][$name] = $content;
406
-				}
407
-
408
-				switch ($name) {
409
-
410
-					case 'title' :
411
-					case 'og:title' :
412
-					case 'twitter:title' :
413
-						if (empty($meta['title'])) {
414
-							$meta['title'] = $content;
415
-						}
416
-						break;
417
-
418
-					case 'og:type' :
419
-						if (empty($meta['type'])) {
420
-							$meta['type'] = $content;
421
-						}
422
-						break;
423
-
424
-					case 'description' :
425
-					case 'og:description' :
426
-					case 'twitter:description' :
427
-						if (empty($meta['description'])) {
428
-							$meta['description'] = $content;
429
-						}
430
-						break;
431
-
432
-					case 'keywords' :
433
-						if (is_string($content)) {
434
-							$content = explode(',', $content);
435
-							$content = array_map('trim', $content);
436
-						}
437
-						$meta['tags'] = $content;
438
-						break;
439
-
440
-					case 'og:site_name' :
441
-					case 'twitter:site' :
442
-						if (empty($meta['provider_name'])) {
443
-							$meta['provider_name'] = $content;
444
-						}
445
-						break;
446
-
447
-					case 'og:image' :
448
-					case 'twitter:image' :
449
-						$meta['thumbnails'][] = $this->getAbsoluteURL($doc, $content);
450
-						break;
451
-				}
452
-			}
453
-		}
454
-
455
-		return $meta;
456
-	}
457
-
458
-	/**
459
-	 * Parses <img> tags
460
-	 *
461
-	 * @param DOMDocument $doc Document
462
-	 * @return array
463
-	 */
464
-	public function parseImgTags(DOMDocument $doc) {
465
-
466
-		$meta = array();
467
-
468
-		$nodes = $doc->getElementsByTagName('img');
469
-		foreach ($nodes as $node) {
470
-			$src = $node->getAttribute('src');
471
-			$meta['thumbnails'][] = $this->getAbsoluteURL($doc, $src);
472
-		}
473
-
474
-		return $meta;
475
-	}
476
-
477
-	/**
478
-	 * Normalizes relative URLs
479
-	 *
480
-	 * @param DOMDocument $doc  Document
481
-	 * @param string      $href URL to normalize
482
-	 * @return string
483
-	 */
484
-	public function getAbsoluteURL(DOMDocument $doc, $href = '') {
485
-
486
-		// Check if $url is absolute
487
-		if (parse_url($href, PHP_URL_HOST)) {
488
-			return $href;
489
-		}
490
-
491
-		$uri = trim($doc->documentURI ? : '', '/');
492
-
493
-		// Check if $url is relative to root
494
-		if (substr($href, 0, 1) === "/") {
495
-			$scheme = parse_url($uri, PHP_URL_SCHEME);
496
-			$host = parse_url($uri, PHP_URL_HOST);
497
-			return "$scheme://$host$href";
498
-		}
499
-
500
-		// $url is relative to page
501
-		return "$uri/$href";
502
-	}
163
+        if (empty($meta['title'])) {
164
+            $meta['title'] = $this->parseTitle($doc);
165
+        }
166
+
167
+
168
+        return $meta;
169
+    }
170
+
171
+    /**
172
+     * Check if URL exists and is reachable by making an HTTP request to retrieve header information
173
+     *
174
+     * @param string $url URL of the resource
175
+     * @return boolean
176
+     */
177
+    public function exists($url = '') {
178
+        $response = $this->request($url);
179
+        if ($response instanceof Response) {
180
+            return $response->getStatusCode() == 200;
181
+        }
182
+        return false;
183
+    }
184
+
185
+    /**
186
+     * Returns head of the resource
187
+     *
188
+     * @param string $url URL of the resource
189
+     * @return Response|false
190
+     */
191
+    public function request($url = '') {
192
+        if (!filter_var($url, FILTER_VALIDATE_URL)) {
193
+            return false;
194
+        }
195
+        if (!isset(self::$cache[$url])) {
196
+            try {
197
+                $response = $this->client->request('GET', $url);
198
+            } catch (Exception $e) {
199
+                $response = false;
200
+                error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
201
+            }
202
+            self::$cache[$url] = $response;
203
+        }
204
+
205
+        return self::$cache[$url];
206
+    }
207
+
208
+    /**
209
+     * Get contents of the page
210
+     *
211
+     * @param string $url URL of the resource
212
+     * @return string
213
+     */
214
+    public function read($url = '') {
215
+        $body = '';
216
+        if (!$this->exists($url)) {
217
+            return $body;
218
+        }
219
+
220
+        $response = $this->request($url);
221
+        $body = (string) $response->getBody();
222
+        return $body;
223
+    }
224
+
225
+    /**
226
+     * Checks if resource is an html page
227
+     *
228
+     * @param string $url URL of the resource
229
+     * @return boolean
230
+     */
231
+    public function isHTML($url = '') {
232
+        $mime = $this->getContentType($url);
233
+        return strpos($mime, 'text/html') !== false;
234
+    }
235
+
236
+    /**
237
+     * Checks if resource is JSON
238
+     *
239
+     * @param string $url URL of the resource
240
+     * @return boolean
241
+     */
242
+    public function isJSON($url = '') {
243
+        $mime = $this->getContentType($url);
244
+        return strpos($mime, 'json') !== false;
245
+    }
246
+
247
+    /**
248
+     * Checks if resource is XML
249
+     *
250
+     * @param string $url URL of the resource
251
+     * @return boolean
252
+     */
253
+    public function isXML($url = '') {
254
+        $mime = $this->getContentType($url);
255
+        return strpos($mime, 'xml') !== false;
256
+    }
257
+
258
+    /**
259
+     * Checks if resource is an image
260
+     *
261
+     * @param string $url URL of the resource
262
+     * @return boolean
263
+     */
264
+    public function isImage($url = '') {
265
+        $mime = $this->getContentType($url);
266
+        if ($mime) {
267
+            list($simple, ) = explode('/', $mime);
268
+            return ($simple == 'image');
269
+        }
270
+
271
+        return false;
272
+    }
273
+
274
+    /**
275
+     * Get mime type of the URL content
276
+     *
277
+     * @param string $url URL of the resource
278
+     * @return string
279
+     */
280
+    public function getContentType($url = '') {
281
+        $response = $this->request($url);
282
+        if ($response instanceof Response) {
283
+            $header = $response->getHeader('Content-Type');
284
+            if (is_array($header) && !empty($header)) {
285
+                $parts = explode(';', $header[0]);
286
+                return trim($parts[0]);
287
+            }
288
+        }
289
+        return '';
290
+    }
291
+
292
+    /**
293
+     * Returns HTML contents of the page
294
+     *
295
+     * @param string $url URL of the resource
296
+     * @return string
297
+     */
298
+    public function getHTML($url = '') {
299
+        if (!$this->isHTML($url)) {
300
+            return '';
301
+        }
302
+        return $this->read($url);
303
+    }
304
+
305
+    /**
306
+     * Returns HTML contents of the page as a DOMDocument
307
+     *
308
+     * @param string $url URL of the resource
309
+     * @return DOMDocument
310
+     */
311
+    public function getDOM($url = '') {
312
+        $html = $this->getHTML($url);
313
+        $doc = new DOMDocument();
314
+        $doc->loadHTML($html);
315
+        if (!$doc->documentURI) {
316
+            $doc->documentURI = $url;
317
+        }
318
+        return $doc;
319
+    }
320
+
321
+    /**
322
+     * Parses document title
323
+     *
324
+     * @param DOMDocument $doc Document
325
+     * @return string
326
+     */
327
+    public function parseTitle(DOMDocument $doc) {
328
+        $node = $doc->getElementsByTagName('title');
329
+        $title = $node->item(0)->nodeValue;
330
+        return ($title) ? : '';
331
+    }
332
+
333
+    /**
334
+     * Parses <link> tags
335
+     *
336
+     * @param DOMDocument $doc Document
337
+     * @return array
338
+     */
339
+    public function parseLinkTags(DOMDocument $doc) {
340
+
341
+        $meta = array();
342
+
343
+        $nodes = $doc->getElementsByTagName('link');
344
+        foreach ($nodes as $node) {
345
+            $rel = $node->getAttribute('rel');
346
+            $href = $node->getAttribute('href');
347
+
348
+            switch ($rel) {
349
+
350
+                case 'icon' :
351
+                    $meta['icons'][] = $this->getAbsoluteURL($doc, $href);
352
+                    break;
353
+
354
+                case 'canonical' :
355
+                    $meta['canonical'] = $this->getAbsoluteURL($doc, $href);
356
+                    break;
357
+
358
+                case 'alternate' :
359
+                    $type = $node->getAttribute('type');
360
+                    if (in_array($type, array(
361
+                                'application/json+oembed',
362
+                                'text/json+oembed',
363
+                                'application/xml+oembed',
364
+                                'text/xml+oembed'
365
+                            ))) {
366
+                        $meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
367
+                    }
368
+                    break;
369
+            }
370
+        }
371
+
372
+        return $meta;
373
+    }
374
+
375
+    /**
376
+     * Parses <meta> tags
377
+     *
378
+     * @param DOMDocument $doc Document
379
+     * @return array
380
+     */
381
+    public function parseMetaTags(DOMDocument $doc) {
382
+
383
+        $meta = array();
384
+
385
+        $nodes = $doc->getElementsByTagName('meta');
386
+        if (!empty($nodes)) {
387
+            foreach ($nodes as $node) {
388
+                $name = $node->getAttribute('name');
389
+                if (!$name) {
390
+                    $name = $node->getAttribute('property');
391
+                }
392
+                if (!$name) {
393
+                    continue;
394
+                }
395
+
396
+                $name = strtolower($name);
397
+
398
+                $content = $node->getAttribute('content');
399
+                if (isset($meta['metatags'][$name])) {
400
+                    if (!is_array($meta['metatags'][$name])) {
401
+                        $meta['metatags'][$name] = array($meta['metatags'][$name]);
402
+                    }
403
+                    $meta['metatags'][$name][] = $content;
404
+                } else {
405
+                    $meta['metatags'][$name] = $content;
406
+                }
407
+
408
+                switch ($name) {
409
+
410
+                    case 'title' :
411
+                    case 'og:title' :
412
+                    case 'twitter:title' :
413
+                        if (empty($meta['title'])) {
414
+                            $meta['title'] = $content;
415
+                        }
416
+                        break;
417
+
418
+                    case 'og:type' :
419
+                        if (empty($meta['type'])) {
420
+                            $meta['type'] = $content;
421
+                        }
422
+                        break;
423
+
424
+                    case 'description' :
425
+                    case 'og:description' :
426
+                    case 'twitter:description' :
427
+                        if (empty($meta['description'])) {
428
+                            $meta['description'] = $content;
429
+                        }
430
+                        break;
431
+
432
+                    case 'keywords' :
433
+                        if (is_string($content)) {
434
+                            $content = explode(',', $content);
435
+                            $content = array_map('trim', $content);
436
+                        }
437
+                        $meta['tags'] = $content;
438
+                        break;
439
+
440
+                    case 'og:site_name' :
441
+                    case 'twitter:site' :
442
+                        if (empty($meta['provider_name'])) {
443
+                            $meta['provider_name'] = $content;
444
+                        }
445
+                        break;
446
+
447
+                    case 'og:image' :
448
+                    case 'twitter:image' :
449
+                        $meta['thumbnails'][] = $this->getAbsoluteURL($doc, $content);
450
+                        break;
451
+                }
452
+            }
453
+        }
454
+
455
+        return $meta;
456
+    }
457
+
458
+    /**
459
+     * Parses <img> tags
460
+     *
461
+     * @param DOMDocument $doc Document
462
+     * @return array
463
+     */
464
+    public function parseImgTags(DOMDocument $doc) {
465
+
466
+        $meta = array();
467
+
468
+        $nodes = $doc->getElementsByTagName('img');
469
+        foreach ($nodes as $node) {
470
+            $src = $node->getAttribute('src');
471
+            $meta['thumbnails'][] = $this->getAbsoluteURL($doc, $src);
472
+        }
473
+
474
+        return $meta;
475
+    }
476
+
477
+    /**
478
+     * Normalizes relative URLs
479
+     *
480
+     * @param DOMDocument $doc  Document
481
+     * @param string      $href URL to normalize
482
+     * @return string
483
+     */
484
+    public function getAbsoluteURL(DOMDocument $doc, $href = '') {
485
+
486
+        // Check if $url is absolute
487
+        if (parse_url($href, PHP_URL_HOST)) {
488
+            return $href;
489
+        }
490
+
491
+        $uri = trim($doc->documentURI ? : '', '/');
492
+
493
+        // Check if $url is relative to root
494
+        if (substr($href, 0, 1) === "/") {
495
+            $scheme = parse_url($uri, PHP_URL_SCHEME);
496
+            $host = parse_url($uri, PHP_URL_HOST);
497
+            return "$scheme://$host$href";
498
+        }
499
+
500
+        // $url is relative to page
501
+        return "$uri/$href";
502
+    }
503 503
 
504 504
 }
Please login to merge, or discard this patch.