Passed
Branch master (ab01e1)
by Ismayil
03:03
created
src/hypeJunction/Parser.php 2 patches
Indentation   +485 added lines, -485 removed lines patch added patch discarded remove patch
@@ -12,492 +12,492 @@
 block discarded – undo
12 12
  */
13 13
 class Parser {
14 14
 
15
-	/**
16
-	 * @var ClientInterface
17
-	 */
18
-	private $client;
19
-
20
-	/**
21
-	 * @var array
22
-	 */
23
-	static $cache;
24
-
25
-	/**
26
-	 * Constructor
27
-	 * @param ClientInterface $client HTTP Client
28
-	 */
29
-	public function __construct(ClientInterface $client) {
30
-		$this->client = $client;
31
-	}
32
-
33
-	/**
34
-	 * Parses a URL into a an array of metatags
35
-	 *
36
-	 * @param string $url URL to parse
37
-	 * @return array
38
-	 */
39
-	public function parse($url = '') {
40
-
41
-		$data = $this->getImageData($url);
42
-		if (!$data) {
43
-			$data = $this->getOEmbedData($url);
44
-		}
45
-		if (!$data) {
46
-			$data = $this->getDOMData($url);
47
-			if (is_array($data) && !empty($data['oembed_url'])) {
48
-				foreach ($data['oembed_url'] as $oembed_url) {
49
-					$oembed_data = $this->parse($oembed_url);
50
-					if (!empty($oembed_data) && is_array($oembed_data)) {
51
-						$oembed_data['oembed_url'] = $oembed_data['url'];
52
-						unset($oembed_data['url']);
53
-						$data = array_merge($data, $oembed_data);
54
-					}
55
-				}
56
-			}
57
-		}
15
+    /**
16
+     * @var ClientInterface
17
+     */
18
+    private $client;
19
+
20
+    /**
21
+     * @var array
22
+     */
23
+    static $cache;
24
+
25
+    /**
26
+     * Constructor
27
+     * @param ClientInterface $client HTTP Client
28
+     */
29
+    public function __construct(ClientInterface $client) {
30
+        $this->client = $client;
31
+    }
32
+
33
+    /**
34
+     * Parses a URL into a an array of metatags
35
+     *
36
+     * @param string $url URL to parse
37
+     * @return array
38
+     */
39
+    public function parse($url = '') {
40
+
41
+        $data = $this->getImageData($url);
42
+        if (!$data) {
43
+            $data = $this->getOEmbedData($url);
44
+        }
45
+        if (!$data) {
46
+            $data = $this->getDOMData($url);
47
+            if (is_array($data) && !empty($data['oembed_url'])) {
48
+                foreach ($data['oembed_url'] as $oembed_url) {
49
+                    $oembed_data = $this->parse($oembed_url);
50
+                    if (!empty($oembed_data) && is_array($oembed_data)) {
51
+                        $oembed_data['oembed_url'] = $oembed_data['url'];
52
+                        unset($oembed_data['url']);
53
+                        $data = array_merge($data, $oembed_data);
54
+                    }
55
+                }
56
+            }
57
+        }
58 58
 	
59
-		if (!is_array($data)) {
60
-			$data = array();
61
-		}
62
-
63
-		if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
64
-			$data['thumbnail_url'] = $data['thumbnails'][0];
65
-		}
66
-
67
-		return $data;
68
-	}
69
-
70
-	/**
71
-	 * Parses image metatags
72
-	 *
73
-	 * @param string $url URL of the image
74
-	 * @return array|false
75
-	 */
76
-	public function getImageData($url = '') {
77
-		if (!$this->isImage($url)) {
78
-			return false;
79
-		}
80
-
81
-		return array(
82
-			'type' => 'photo',
83
-			'url' => $url,
84
-			'thumbnails' => array($url),
85
-		);
86
-	}
87
-
88
-	/**
89
-	 * Parses OEmbed data
90
-	 *
91
-	 * @param  string $url URL of the image
92
-	 * @return array|false
93
-	 */
94
-	public function getOEmbedData($url = '') {
95
-
96
-		if (!$this->isJSON($url) && !$this->isXML($url)) {
97
-			return false;
98
-		}
99
-
100
-		$meta = array(
101
-			'url' => $url,
102
-		);
103
-
104
-		$content = $this->read($url);
105
-		if (!$content) {
106
-			return $meta;
107
-		}
108
-
109
-		if ($this->isJSON($url)) {
110
-			$data = @json_decode($content);
111
-		} else if ($this->isXML($url)) {
112
-			$data = @simplexml_load_string($content);
113
-		}
114
-
115
-		$props = array(
116
-			'type',
117
-			'version',
118
-			'title',
119
-			'author_name',
120
-			'author_url',
121
-			'provider_name',
122
-			'provider_url',
123
-			'cache_age',
124
-			'thumbnail_url',
125
-			'thumbnail_width',
126
-			'thumbnail_height',
127
-			'width',
128
-			'height',
129
-			'html',
130
-		);
131
-		foreach ($props as $key) {
132
-			if (!empty($data->$key)) {
133
-				$meta[$key] = (string) $data->$key;
134
-			}
135
-		}
136
-		return $meta;
137
-	}
138
-
139
-	/**
140
-	 * Parses metatags from DOM
141
-	 *
142
-	 * @param  string $url URL
143
-	 * @return array|false
144
-	 */
145
-	public function getDOMData($url = '') {
146
-
147
-		if (!$this->isHTML($url)) {
148
-			return false;
149
-		}
150
-
151
-		$doc = $this->getDOM($url);
152
-		$defaults = array(
153
-			'url' => $url,
154
-		);
155
-
156
-		$link_tags = $this->parseLinkTags($doc);
157
-		$meta_tags = $this->parseMetaTags($doc);
158
-		$img_tags = $this->parseImgTags($doc);
159
-
160
-		$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
59
+        if (!is_array($data)) {
60
+            $data = array();
61
+        }
62
+
63
+        if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
64
+            $data['thumbnail_url'] = $data['thumbnails'][0];
65
+        }
66
+
67
+        return $data;
68
+    }
69
+
70
+    /**
71
+     * Parses image metatags
72
+     *
73
+     * @param string $url URL of the image
74
+     * @return array|false
75
+     */
76
+    public function getImageData($url = '') {
77
+        if (!$this->isImage($url)) {
78
+            return false;
79
+        }
80
+
81
+        return array(
82
+            'type' => 'photo',
83
+            'url' => $url,
84
+            'thumbnails' => array($url),
85
+        );
86
+    }
87
+
88
+    /**
89
+     * Parses OEmbed data
90
+     *
91
+     * @param  string $url URL of the image
92
+     * @return array|false
93
+     */
94
+    public function getOEmbedData($url = '') {
95
+
96
+        if (!$this->isJSON($url) && !$this->isXML($url)) {
97
+            return false;
98
+        }
99
+
100
+        $meta = array(
101
+            'url' => $url,
102
+        );
103
+
104
+        $content = $this->read($url);
105
+        if (!$content) {
106
+            return $meta;
107
+        }
108
+
109
+        if ($this->isJSON($url)) {
110
+            $data = @json_decode($content);
111
+        } else if ($this->isXML($url)) {
112
+            $data = @simplexml_load_string($content);
113
+        }
114
+
115
+        $props = array(
116
+            'type',
117
+            'version',
118
+            'title',
119
+            'author_name',
120
+            'author_url',
121
+            'provider_name',
122
+            'provider_url',
123
+            'cache_age',
124
+            'thumbnail_url',
125
+            'thumbnail_width',
126
+            'thumbnail_height',
127
+            'width',
128
+            'height',
129
+            'html',
130
+        );
131
+        foreach ($props as $key) {
132
+            if (!empty($data->$key)) {
133
+                $meta[$key] = (string) $data->$key;
134
+            }
135
+        }
136
+        return $meta;
137
+    }
138
+
139
+    /**
140
+     * Parses metatags from DOM
141
+     *
142
+     * @param  string $url URL
143
+     * @return array|false
144
+     */
145
+    public function getDOMData($url = '') {
146
+
147
+        if (!$this->isHTML($url)) {
148
+            return false;
149
+        }
150
+
151
+        $doc = $this->getDOM($url);
152
+        $defaults = array(
153
+            'url' => $url,
154
+        );
155
+
156
+        $link_tags = $this->parseLinkTags($doc);
157
+        $meta_tags = $this->parseMetaTags($doc);
158
+        $img_tags = $this->parseImgTags($doc);
159
+
160
+        $meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
161 161
 		
162
-		if (empty($meta['title'])) {
163
-			$meta['title'] = $this->parseTitle($doc);
164
-		}
165
-
166
-
167
-		return $meta;
168
-	}
169
-
170
-	/**
171
-	 * Check if URL exists and is reachable by making an HTTP request to retrieve header information
172
-	 *
173
-	 * @param string $url URL of the resource
174
-	 * @return boolean
175
-	 */
176
-	public function exists($url = '') {
177
-		$response = $this->request($url);
178
-		if ($response instanceof Response) {
179
-			return $response->getStatusCode() == 200;
180
-		}
181
-		return false;
182
-	}
183
-
184
-	/**
185
-	 * Returns head of the resource
186
-	 *
187
-	 * @param string $url URL of the resource
188
-	 * @return Response|false
189
-	 */
190
-	public function request($url = '') {
191
-		if (!filter_var($url, FILTER_VALIDATE_URL)) {
192
-			return false;
193
-		}
194
-		if (!isset(self::$cache[$url])) {
195
-			try {
196
-				$response = $this->client->request('GET', $url);
197
-			} catch (Exception $e) {
198
-				$response = false;
199
-				error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
200
-			}
201
-			self::$cache[$url] = $response;
202
-		}
203
-
204
-		return self::$cache[$url];
205
-	}
206
-
207
-	/**
208
-	 * Get contents of the page
209
-	 *
210
-	 * @param string $url URL of the resource
211
-	 * @return string
212
-	 */
213
-	public function read($url = '') {
214
-		$body = '';
215
-		if (!$this->exists($url)) {
216
-			return $body;
217
-		}
218
-
219
-		$response = $this->request($url);
220
-		$body = (string) $response->getBody();
221
-		return $body;
222
-	}
223
-
224
-	/**
225
-	 * Checks if resource is an html page
226
-	 *
227
-	 * @param string $url URL of the resource
228
-	 * @return boolean
229
-	 */
230
-	public function isHTML($url = '') {
231
-		$mime = $this->getContentType($url);
232
-		return strpos($mime, 'text/html') !== false;
233
-	}
234
-
235
-	/**
236
-	 * Checks if resource is JSON
237
-	 *
238
-	 * @param string $url URL of the resource
239
-	 * @return boolean
240
-	 */
241
-	public function isJSON($url = '') {
242
-		$mime = $this->getContentType($url);
243
-		return strpos($mime, 'json') !== false;
244
-	}
245
-
246
-	/**
247
-	 * Checks if resource is XML
248
-	 *
249
-	 * @param string $url URL of the resource
250
-	 * @return boolean
251
-	 */
252
-	public function isXML($url = '') {
253
-		$mime = $this->getContentType($url);
254
-		return strpos($mime, 'xml') !== false;
255
-	}
256
-
257
-	/**
258
-	 * Checks if resource is an image
259
-	 *
260
-	 * @param string $url URL of the resource
261
-	 * @return boolean
262
-	 */
263
-	public function isImage($url = '') {
264
-		$mime = $this->getContentType($url);
265
-		if ($mime) {
266
-			list($simple, ) = explode('/', $mime);
267
-			return ($simple == 'image');
268
-		}
269
-
270
-		return false;
271
-	}
272
-
273
-	/**
274
-	 * Get mime type of the URL content
275
-	 *
276
-	 * @param string $url URL of the resource
277
-	 * @return string
278
-	 */
279
-	public function getContentType($url = '') {
280
-		$response = $this->request($url);
281
-		if ($response instanceof Response) {
282
-			$header = $response->getHeader('Content-Type');
283
-			if (is_array($header) && !empty($header)) {
284
-				$parts = explode(';', $header[0]);
285
-				return trim($parts[0]);
286
-			}
287
-		}
288
-		return '';
289
-	}
290
-
291
-	/**
292
-	 * Returns HTML contents of the page
293
-	 *
294
-	 * @param string $url URL of the resource
295
-	 * @return string
296
-	 */
297
-	public function getHTML($url = '') {
298
-		if (!$this->isHTML($url)) {
299
-			return '';
300
-		}
301
-		return $this->read($url);
302
-	}
303
-
304
-	/**
305
-	 * Returns HTML contents of the page as a DOMDocument
306
-	 *
307
-	 * @param string $url URL of the resource
308
-	 * @return DOMDocument
309
-	 */
310
-	public function getDOM($url = '') {
311
-		$html = $this->getHTML($url);
312
-		$doc = new DOMDocument();
313
-		@$doc->loadHTML($html);
314
-		if (!$doc->documentURI) {
315
-			$doc->documentURI = $url;
316
-		}
317
-		return $doc;
318
-	}
319
-
320
-	/**
321
-	 * Parses document title
322
-	 *
323
-	 * @param DOMDocument $doc Document
324
-	 * @return string
325
-	 */
326
-	public function parseTitle(DOMDocument $doc) {
327
-		$node = $doc->getElementsByTagName('title');
328
-		$title = $node->item(0)->nodeValue;
329
-		return ($title) ? : '';
330
-	}
331
-
332
-	/**
333
-	 * Parses <link> tags
334
-	 *
335
-	 * @param DOMDocument $doc Document
336
-	 * @return array
337
-	 */
338
-	public function parseLinkTags(DOMDocument $doc) {
339
-
340
-		$meta = array();
341
-
342
-		$nodes = $doc->getElementsByTagName('link');
343
-		foreach ($nodes as $node) {
344
-			$rel = $node->getAttribute('rel');
345
-			$href = $node->getAttribute('href');
346
-
347
-			switch ($rel) {
348
-
349
-				case 'icon' :
350
-					$meta['icons'][] = $this->getAbsoluteURL($doc, $href);
351
-					break;
352
-
353
-				case 'canonical' :
354
-					$meta['canonical'] = $this->getAbsoluteURL($doc, $href);
355
-					break;
356
-
357
-				case 'alternate' :
358
-					$type = $node->getAttribute('type');
359
-					if (in_array($type, array(
360
-								'application/json+oembed',
361
-								'text/json+oembed',
362
-								'application/xml+oembed',
363
-								'text/xml+oembed'
364
-							))) {
365
-						$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
366
-					}
367
-					break;
368
-			}
369
-		}
370
-
371
-		return $meta;
372
-	}
373
-
374
-	/**
375
-	 * Parses <meta> tags
376
-	 *
377
-	 * @param DOMDocument $doc Document
378
-	 * @return array
379
-	 */
380
-	public function parseMetaTags(DOMDocument $doc) {
381
-
382
-		$meta = array();
383
-
384
-		$nodes = $doc->getElementsByTagName('meta');
385
-		if (!empty($nodes)) {
386
-			foreach ($nodes as $node) {
387
-				$name = $node->getAttribute('name');
388
-				if (!$name) {
389
-					$name = $node->getAttribute('property');
390
-				}
391
-				if (!$name) {
392
-					continue;
393
-				}
394
-
395
-				$name = strtolower($name);
396
-
397
-				$content = $node->getAttribute('content');
398
-				if (isset($meta['metatags'][$name])) {
399
-					if (!is_array($meta['metatags'][$name])) {
400
-						$meta['metatags'][$name] = array($meta['metatags'][$name]);
401
-					}
402
-					$meta['metatags'][$name][] = $content;
403
-				} else {
404
-					$meta['metatags'][$name] = $content;
405
-				}
406
-
407
-				switch ($name) {
408
-
409
-					case 'title' :
410
-					case 'og:title' :
411
-					case 'twitter:title' :
412
-						if (empty($meta['title'])) {
413
-							$meta['title'] = $content;
414
-						}
415
-						break;
416
-
417
-					case 'og:type' :
418
-						if (empty($meta['type'])) {
419
-							$meta['type'] = $content;
420
-						}
421
-						break;
422
-
423
-					case 'description' :
424
-					case 'og:description' :
425
-					case 'twitter:description' :
426
-						if (empty($meta['description'])) {
427
-							$meta['description'] = $content;
428
-						}
429
-						break;
430
-
431
-					case 'keywords' :
432
-						if (is_string($content)) {
433
-							$content = explode(',', $content);
434
-							$content = array_map('trim', $content);
435
-						}
436
-						$meta['tags'] = $content;
437
-						break;
438
-
439
-					case 'og:site_name' :
440
-					case 'twitter:site' :
441
-						if (empty($meta['provider_name'])) {
442
-							$meta['provider_name'] = $content;
443
-						}
444
-						break;
445
-
446
-					case 'og:image' :
447
-					case 'twitter:image' :
448
-						$meta['thumbnails'][] = $this->getAbsoluteURL($doc, $content);
449
-						break;
450
-				}
451
-			}
452
-		}
453
-
454
-		return $meta;
455
-	}
456
-
457
-	/**
458
-	 * Parses <img> tags
459
-	 *
460
-	 * @param DOMDocument $doc Document
461
-	 * @return array
462
-	 */
463
-	public function parseImgTags(DOMDocument $doc) {
464
-
465
-		$meta = array();
466
-
467
-		$nodes = $doc->getElementsByTagName('img');
468
-		foreach ($nodes as $node) {
469
-			$src = $node->getAttribute('src');
470
-			$meta['thumbnails'][] = $this->getAbsoluteURL($doc, $src);
471
-		}
472
-
473
-		return $meta;
474
-	}
475
-
476
-	/**
477
-	 * Normalizes relative URLs
478
-	 *
479
-	 * @param DOMDocument $doc  Document
480
-	 * @param string      $href URL to normalize
481
-	 * @return string
482
-	 */
483
-	public function getAbsoluteURL(DOMDocument $doc, $href = '') {
484
-
485
-		// Check if $url is absolute
486
-		if (parse_url($href, PHP_URL_HOST)) {
487
-			return $href;
488
-		}
489
-
490
-		$uri = trim($doc->documentURI ? : '', '/');
491
-
492
-		// Check if $url is relative to root
493
-		if (substr($href, 0, 1) === "/") {
494
-			$scheme = parse_url($uri, PHP_URL_SCHEME);
495
-			$host = parse_url($uri, PHP_URL_HOST);
496
-			return "$scheme://$host$href";
497
-		}
498
-
499
-		// $url is relative to page
500
-		return "$uri/$href";
501
-	}
162
+        if (empty($meta['title'])) {
163
+            $meta['title'] = $this->parseTitle($doc);
164
+        }
165
+
166
+
167
+        return $meta;
168
+    }
169
+
170
+    /**
171
+     * Check if URL exists and is reachable by making an HTTP request to retrieve header information
172
+     *
173
+     * @param string $url URL of the resource
174
+     * @return boolean
175
+     */
176
+    public function exists($url = '') {
177
+        $response = $this->request($url);
178
+        if ($response instanceof Response) {
179
+            return $response->getStatusCode() == 200;
180
+        }
181
+        return false;
182
+    }
183
+
184
+    /**
185
+     * Returns head of the resource
186
+     *
187
+     * @param string $url URL of the resource
188
+     * @return Response|false
189
+     */
190
+    public function request($url = '') {
191
+        if (!filter_var($url, FILTER_VALIDATE_URL)) {
192
+            return false;
193
+        }
194
+        if (!isset(self::$cache[$url])) {
195
+            try {
196
+                $response = $this->client->request('GET', $url);
197
+            } catch (Exception $e) {
198
+                $response = false;
199
+                error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
200
+            }
201
+            self::$cache[$url] = $response;
202
+        }
203
+
204
+        return self::$cache[$url];
205
+    }
206
+
207
+    /**
208
+     * Get contents of the page
209
+     *
210
+     * @param string $url URL of the resource
211
+     * @return string
212
+     */
213
+    public function read($url = '') {
214
+        $body = '';
215
+        if (!$this->exists($url)) {
216
+            return $body;
217
+        }
218
+
219
+        $response = $this->request($url);
220
+        $body = (string) $response->getBody();
221
+        return $body;
222
+    }
223
+
224
+    /**
225
+     * Checks if resource is an html page
226
+     *
227
+     * @param string $url URL of the resource
228
+     * @return boolean
229
+     */
230
+    public function isHTML($url = '') {
231
+        $mime = $this->getContentType($url);
232
+        return strpos($mime, 'text/html') !== false;
233
+    }
234
+
235
+    /**
236
+     * Checks if resource is JSON
237
+     *
238
+     * @param string $url URL of the resource
239
+     * @return boolean
240
+     */
241
+    public function isJSON($url = '') {
242
+        $mime = $this->getContentType($url);
243
+        return strpos($mime, 'json') !== false;
244
+    }
245
+
246
+    /**
247
+     * Checks if resource is XML
248
+     *
249
+     * @param string $url URL of the resource
250
+     * @return boolean
251
+     */
252
+    public function isXML($url = '') {
253
+        $mime = $this->getContentType($url);
254
+        return strpos($mime, 'xml') !== false;
255
+    }
256
+
257
+    /**
258
+     * Checks if resource is an image
259
+     *
260
+     * @param string $url URL of the resource
261
+     * @return boolean
262
+     */
263
+    public function isImage($url = '') {
264
+        $mime = $this->getContentType($url);
265
+        if ($mime) {
266
+            list($simple, ) = explode('/', $mime);
267
+            return ($simple == 'image');
268
+        }
269
+
270
+        return false;
271
+    }
272
+
273
+    /**
274
+     * Get mime type of the URL content
275
+     *
276
+     * @param string $url URL of the resource
277
+     * @return string
278
+     */
279
+    public function getContentType($url = '') {
280
+        $response = $this->request($url);
281
+        if ($response instanceof Response) {
282
+            $header = $response->getHeader('Content-Type');
283
+            if (is_array($header) && !empty($header)) {
284
+                $parts = explode(';', $header[0]);
285
+                return trim($parts[0]);
286
+            }
287
+        }
288
+        return '';
289
+    }
290
+
291
+    /**
292
+     * Returns HTML contents of the page
293
+     *
294
+     * @param string $url URL of the resource
295
+     * @return string
296
+     */
297
+    public function getHTML($url = '') {
298
+        if (!$this->isHTML($url)) {
299
+            return '';
300
+        }
301
+        return $this->read($url);
302
+    }
303
+
304
+    /**
305
+     * Returns HTML contents of the page as a DOMDocument
306
+     *
307
+     * @param string $url URL of the resource
308
+     * @return DOMDocument
309
+     */
310
+    public function getDOM($url = '') {
311
+        $html = $this->getHTML($url);
312
+        $doc = new DOMDocument();
313
+        @$doc->loadHTML($html);
314
+        if (!$doc->documentURI) {
315
+            $doc->documentURI = $url;
316
+        }
317
+        return $doc;
318
+    }
319
+
320
+    /**
321
+     * Parses document title
322
+     *
323
+     * @param DOMDocument $doc Document
324
+     * @return string
325
+     */
326
+    public function parseTitle(DOMDocument $doc) {
327
+        $node = $doc->getElementsByTagName('title');
328
+        $title = $node->item(0)->nodeValue;
329
+        return ($title) ? : '';
330
+    }
331
+
332
+    /**
333
+     * Parses <link> tags
334
+     *
335
+     * @param DOMDocument $doc Document
336
+     * @return array
337
+     */
338
+    public function parseLinkTags(DOMDocument $doc) {
339
+
340
+        $meta = array();
341
+
342
+        $nodes = $doc->getElementsByTagName('link');
343
+        foreach ($nodes as $node) {
344
+            $rel = $node->getAttribute('rel');
345
+            $href = $node->getAttribute('href');
346
+
347
+            switch ($rel) {
348
+
349
+                case 'icon' :
350
+                    $meta['icons'][] = $this->getAbsoluteURL($doc, $href);
351
+                    break;
352
+
353
+                case 'canonical' :
354
+                    $meta['canonical'] = $this->getAbsoluteURL($doc, $href);
355
+                    break;
356
+
357
+                case 'alternate' :
358
+                    $type = $node->getAttribute('type');
359
+                    if (in_array($type, array(
360
+                                'application/json+oembed',
361
+                                'text/json+oembed',
362
+                                'application/xml+oembed',
363
+                                'text/xml+oembed'
364
+                            ))) {
365
+                        $meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
366
+                    }
367
+                    break;
368
+            }
369
+        }
370
+
371
+        return $meta;
372
+    }
373
+
374
+    /**
375
+     * Parses <meta> tags
376
+     *
377
+     * @param DOMDocument $doc Document
378
+     * @return array
379
+     */
380
+    public function parseMetaTags(DOMDocument $doc) {
381
+
382
+        $meta = array();
383
+
384
+        $nodes = $doc->getElementsByTagName('meta');
385
+        if (!empty($nodes)) {
386
+            foreach ($nodes as $node) {
387
+                $name = $node->getAttribute('name');
388
+                if (!$name) {
389
+                    $name = $node->getAttribute('property');
390
+                }
391
+                if (!$name) {
392
+                    continue;
393
+                }
394
+
395
+                $name = strtolower($name);
396
+
397
+                $content = $node->getAttribute('content');
398
+                if (isset($meta['metatags'][$name])) {
399
+                    if (!is_array($meta['metatags'][$name])) {
400
+                        $meta['metatags'][$name] = array($meta['metatags'][$name]);
401
+                    }
402
+                    $meta['metatags'][$name][] = $content;
403
+                } else {
404
+                    $meta['metatags'][$name] = $content;
405
+                }
406
+
407
+                switch ($name) {
408
+
409
+                    case 'title' :
410
+                    case 'og:title' :
411
+                    case 'twitter:title' :
412
+                        if (empty($meta['title'])) {
413
+                            $meta['title'] = $content;
414
+                        }
415
+                        break;
416
+
417
+                    case 'og:type' :
418
+                        if (empty($meta['type'])) {
419
+                            $meta['type'] = $content;
420
+                        }
421
+                        break;
422
+
423
+                    case 'description' :
424
+                    case 'og:description' :
425
+                    case 'twitter:description' :
426
+                        if (empty($meta['description'])) {
427
+                            $meta['description'] = $content;
428
+                        }
429
+                        break;
430
+
431
+                    case 'keywords' :
432
+                        if (is_string($content)) {
433
+                            $content = explode(',', $content);
434
+                            $content = array_map('trim', $content);
435
+                        }
436
+                        $meta['tags'] = $content;
437
+                        break;
438
+
439
+                    case 'og:site_name' :
440
+                    case 'twitter:site' :
441
+                        if (empty($meta['provider_name'])) {
442
+                            $meta['provider_name'] = $content;
443
+                        }
444
+                        break;
445
+
446
+                    case 'og:image' :
447
+                    case 'twitter:image' :
448
+                        $meta['thumbnails'][] = $this->getAbsoluteURL($doc, $content);
449
+                        break;
450
+                }
451
+            }
452
+        }
453
+
454
+        return $meta;
455
+    }
456
+
457
+    /**
458
+     * Parses <img> tags
459
+     *
460
+     * @param DOMDocument $doc Document
461
+     * @return array
462
+     */
463
+    public function parseImgTags(DOMDocument $doc) {
464
+
465
+        $meta = array();
466
+
467
+        $nodes = $doc->getElementsByTagName('img');
468
+        foreach ($nodes as $node) {
469
+            $src = $node->getAttribute('src');
470
+            $meta['thumbnails'][] = $this->getAbsoluteURL($doc, $src);
471
+        }
472
+
473
+        return $meta;
474
+    }
475
+
476
+    /**
477
+     * Normalizes relative URLs
478
+     *
479
+     * @param DOMDocument $doc  Document
480
+     * @param string      $href URL to normalize
481
+     * @return string
482
+     */
483
+    public function getAbsoluteURL(DOMDocument $doc, $href = '') {
484
+
485
+        // Check if $url is absolute
486
+        if (parse_url($href, PHP_URL_HOST)) {
487
+            return $href;
488
+        }
489
+
490
+        $uri = trim($doc->documentURI ? : '', '/');
491
+
492
+        // Check if $url is relative to root
493
+        if (substr($href, 0, 1) === "/") {
494
+            $scheme = parse_url($uri, PHP_URL_SCHEME);
495
+            $host = parse_url($uri, PHP_URL_HOST);
496
+            return "$scheme://$host$href";
497
+        }
498
+
499
+        // $url is relative to page
500
+        return "$uri/$href";
501
+    }
502 502
 
503 503
 }
Please login to merge, or discard this patch.
Spacing   +3 added lines, -3 removed lines patch added patch discarded remove patch
@@ -263,7 +263,7 @@  discard block
 block discarded – undo
263 263
 	public function isImage($url = '') {
264 264
 		$mime = $this->getContentType($url);
265 265
 		if ($mime) {
266
-			list($simple, ) = explode('/', $mime);
266
+			list($simple,) = explode('/', $mime);
267 267
 			return ($simple == 'image');
268 268
 		}
269 269
 
@@ -326,7 +326,7 @@  discard block
 block discarded – undo
326 326
 	public function parseTitle(DOMDocument $doc) {
327 327
 		$node = $doc->getElementsByTagName('title');
328 328
 		$title = $node->item(0)->nodeValue;
329
-		return ($title) ? : '';
329
+		return ($title) ?: '';
330 330
 	}
331 331
 
332 332
 	/**
@@ -487,7 +487,7 @@  discard block
 block discarded – undo
487 487
 			return $href;
488 488
 		}
489 489
 
490
-		$uri = trim($doc->documentURI ? : '', '/');
490
+		$uri = trim($doc->documentURI ?: '', '/');
491 491
 
492 492
 		// Check if $url is relative to root
493 493
 		if (substr($href, 0, 1) === "/") {
Please login to merge, or discard this patch.