Passed
Push — master ( e50892...2ef246 )
by Ismayil
03:19
created

Parser::isImage()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 9
ccs 6
cts 6
cp 1
rs 9.6666
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 1
crap 2
1
<?php
2
3
namespace hypeJunction;
4
5
use DOMDocument;
6
use Exception;
7
use GuzzleHttp\ClientInterface;
8
use GuzzleHttp\Psr7\Response;
9
10
/**
11
 * Parses HTTP resource into a serialable array of metatags
12
 */
13
class Parser {
14
15
	/**
16
	 * @var ClientInterface
17
	 */
18
	private $client;
19
20
	/**
21
	 * @var array
22
	 */
23
	private static $cache;
24
25
	/**
26
	 * Constructor
27
	 * @param ClientInterface $client HTTP Client
28
	 */
29
	public function __construct(ClientInterface $client) {
30
		$this->client = $client;
31
	}
32
33
	/**
34
	 * Parses a URL into a an array of metatags
35
	 *
36
	 * @param string $url URL to parse
37
	 * @return array
38
	 */
39 1
	public function parse($url = '') {
40
41 1
		$data = $this->getImageData($url);
42 1
		if (!$data) {
43 1
			$data = $this->getOEmbedData($url);
44 1
		}
45 1
		if (!$data) {
46 1
			$data = $this->getDOMData($url);
47 1
			if (is_array($data) && !empty($data['oembed_url'])) {
48 1
				foreach ($data['oembed_url'] as $oembed_url) {
49 1
					$oembed_data = $this->parse($oembed_url);
50 1
					if (!empty($oembed_data) && is_array($oembed_data)) {
51
						$oembed_data['oembed_url'] = $oembed_data['url'];
52
						unset($oembed_data['url']);
53
						$data = array_merge($data, $oembed_data);
54
					}
55 1
				}
56 1
			}
57 1
		}
58
	
59 1
		if (!is_array($data)) {
60 1
			$data = array();
61 1
		}
62
63 1
		if (empty($data['thumbnail_url']) && !empty($data['thumbnails'])) {
64 1
			$data['thumbnail_url'] = $data['thumbnails'][0];
65 1
		}
66
67 1
		return $data;
68
	}
69
70
	/**
71
	 * Parses image metatags
72
	 *
73
	 * @param string $url URL of the image
74
	 * @return array|false
75
	 */
76 1
	public function getImageData($url = '') {
77 1
		if (!$this->isImage($url)) {
78 1
			return false;
79
		}
80
81
		return array(
82 1
			'type' => 'photo',
83 1
			'url' => $url,
84 1
			'thumbnails' => array($url),
85 1
		);
86
	}
87
88
	/**
89
	 * Parses OEmbed data
90
	 *
91
	 * @param  string $url URL of the image
92
	 * @return array|false
93
	 */
94 2
	public function getOEmbedData($url = '') {
95
96 2
		if (!$this->isJSON($url) && !$this->isXML($url)) {
97 1
			return false;
98
		}
99
100
		$meta = array(
101 2
			'url' => $url,
102 2
		);
103
104 2
		$content = $this->read($url);
105 2
		if (!$content) {
106
			return $meta;
107
		}
108
109 2
		$data = new \stdClass();
110 2
		if ($this->isJSON($url)) {
111 1
			$data = json_decode($content);
112 2
		} else if ($this->isXML($url)) {
113 1
			$data = simplexml_load_string($content);
114 1
		}
115
116
		$props = array(
117 2
			'type',
118 2
			'version',
119 2
			'title',
120 2
			'author_name',
121 2
			'author_url',
122 2
			'provider_name',
123 2
			'provider_url',
124 2
			'cache_age',
125 2
			'thumbnail_url',
126 2
			'thumbnail_width',
127 2
			'thumbnail_height',
128 2
			'width',
129 2
			'height',
130 2
			'html',
131 2
		);
132 2
		foreach ($props as $key) {
133 2
			if (!empty($data->$key)) {
134 2
				$meta[$key] = (string) $data->$key;
135 2
			}
136 2
		}
137 2
		return $meta;
138
	}
139
140
	/**
141
	 * Parses metatags from DOM
142
	 *
143
	 * @param  string $url URL
144
	 * @return array|false
145
	 */
146 1
	public function getDOMData($url = '') {
147
148 1
		if (!$this->isHTML($url)) {
149 1
			return false;
150
		}
151
152 1
		$doc = $this->getDOM($url);
153 1
		if (!$doc) {
154
			return false;
155
		}
156
		
157
		$defaults = array(
158 1
			'url' => $url,
159 1
		);
160
161 1
		$link_tags = $this->parseLinkTags($doc);
162 1
		$meta_tags = $this->parseMetaTags($doc);
163 1
		$img_tags = $this->parseImgTags($doc);
164
165 1
		$meta = array_merge_recursive($defaults, $link_tags, $meta_tags, $img_tags);
166
		
167 1
		if (empty($meta['title'])) {
168
			$meta['title'] = $this->parseTitle($doc);
169
		}
170
171
172 1
		return $meta;
173
	}
174
175
	/**
176
	 * Check if URL exists and is reachable by making an HTTP request to retrieve header information
177
	 *
178
	 * @param string $url URL of the resource
179
	 * @return boolean
180
	 */
181 1
	public function exists($url = '') {
182 1
		$response = $this->request($url);
183 1
		if ($response instanceof Response) {
184 1
			return $response->getStatusCode() == 200;
185
		}
186
		return false;
187
	}
188
189
	/**
190
	 * Returns head of the resource
191
	 *
192
	 * @param string $url URL of the resource
193
	 * @return Response|false
194
	 */
195 1
	public function request($url = '') {
196 1
		if (!filter_var($url, FILTER_VALIDATE_URL)) {
197
			return false;
198
		}
199 1
		if (!isset(self::$cache[$url])) {
200
			try {
201 1
				$response = $this->client->request('GET', $url);
202 1
			} catch (Exception $e) {
203
				$response = false;
204
				error_log("Parser Error for HEAD request ($url): {$e->getMessage()}");
205
			}
206 1
			self::$cache[$url] = $response;
207 1
		}
208
209 1
		return self::$cache[$url];
210
	}
211
212
	/**
213
	 * Get contents of the page
214
	 *
215
	 * @param string $url URL of the resource
216
	 * @return string
217
	 */
218 1
	public function read($url = '') {
219 1
		$body = '';
220 1
		if (!$this->exists($url)) {
221 1
			return $body;
222
		}
223
224 1
		$response = $this->request($url);
225 1
		$body = (string) $response->getBody();
226 1
		return $body;
227
	}
228
229
	/**
230
	 * Checks if resource is an html page
231
	 *
232
	 * @param string $url URL of the resource
233
	 * @return boolean
234
	 */
235 1
	public function isHTML($url = '') {
236 1
		$mime = $this->getContentType($url);
237 1
		return strpos($mime, 'text/html') !== false;
238
	}
239
240
	/**
241
	 * Checks if resource is JSON
242
	 *
243
	 * @param string $url URL of the resource
244
	 * @return boolean
245
	 */
246 1
	public function isJSON($url = '') {
247 1
		$mime = $this->getContentType($url);
248 1
		return strpos($mime, 'json') !== false;
249
	}
250
251
	/**
252
	 * Checks if resource is XML
253
	 *
254
	 * @param string $url URL of the resource
255
	 * @return boolean
256
	 */
257 1
	public function isXML($url = '') {
258 1
		$mime = $this->getContentType($url);
259 1
		return strpos($mime, 'xml') !== false;
260
	}
261
262
	/**
263
	 * Checks if resource is an image
264
	 *
265
	 * @param string $url URL of the resource
266
	 * @return boolean
267
	 */
268 1
	public function isImage($url = '') {
269 1
		$mime = $this->getContentType($url);
270 1
		if ($mime) {
271 1
			list($simple,) = explode('/', $mime);
272 1
			return ($simple == 'image');
273
		}
274
275 1
		return false;
276
	}
277
278
	/**
279
	 * Get mime type of the URL content
280
	 *
281
	 * @param string $url URL of the resource
282
	 * @return string
283
	 */
284 1
	public function getContentType($url = '') {
285 1
		$response = $this->request($url);
286 1
		if ($response instanceof Response) {
287 1
			$header = $response->getHeader('Content-Type');
288 1
			if (is_array($header) && !empty($header)) {
289 1
				$parts = explode(';', $header[0]);
290 1
				return trim($parts[0]);
291
			}
292 1
		}
293 1
		return '';
294
	}
295
296
	/**
297
	 * Returns HTML contents of the page
298
	 *
299
	 * @param string $url URL of the resource
300
	 * @return string
301
	 */
302 1
	public function getHTML($url = '') {
303 1
		if (!$this->isHTML($url)) {
304 1
			return '';
305
		}
306 1
		return $this->read($url);
307
	}
308
309
	/**
310
	 * Returns HTML contents of the page as a DOMDocument
311
	 *
312
	 * @param string $url URL of the resource
313
	 * @return DOMDocument|false
314
	 */
315 1
	public function getDOM($url = '') {
316 1
		$html = $this->getHTML($url);
317 1
		if (empty($html)) {
318 1
			return false;
319
		}
320 1
		$doc = new DOMDocument();
321 1
		if (is_callable('mb_convert_encoding')) {
322 1
			$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
323 1
		} else {
324
			$doc->loadHTML($html);
325
		}
326 1
		if (!$doc->documentURI) {
327 1
			$doc->documentURI = $url;
328 1
		}
329 1
		return $doc;
330
	}
331
332
	/**
333
	 * Parses document title
334
	 *
335
	 * @param DOMDocument $doc Document
336
	 * @return string
337
	 */
338 1
	public function parseTitle(DOMDocument $doc) {
339 1
		$node = $doc->getElementsByTagName('title');
340 1
		$title = $node->item(0)->nodeValue;
341 1
		return ($title) ?: '';
342
	}
343
344
	/**
345
	 * Parses <link> tags
346
	 *
347
	 * @param DOMDocument $doc Document
348
	 * @return array
349
	 */
350 1
	public function parseLinkTags(DOMDocument $doc) {
351
352 1
		$meta = array();
353
354 1
		$nodes = $doc->getElementsByTagName('link');
355 1
		foreach ($nodes as $node) {
356 1
			$rel = $node->getAttribute('rel');
357 1
			$href = $node->getAttribute('href');
358
359
			switch ($rel) {
360
361 1
				case 'icon' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
362 1
					$meta['icons'][] = $this->getAbsoluteURL($doc, $href);
363 1
					break;
364
365 1
				case 'canonical' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
366 1
					$meta['canonical'] = $this->getAbsoluteURL($doc, $href);
367 1
					break;
368
369 1
				case 'alternate' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
370 1
					$type = $node->getAttribute('type');
371 1
					if (in_array($type, array(
372 1
								'application/json+oembed',
373 1
								'text/json+oembed',
374 1
								'application/xml+oembed',
375
								'text/xml+oembed'
376 1
							))) {
377 1
						$meta['oembed_url'][] = $this->getAbsoluteURL($doc, $href);
378 1
					}
379 1
					break;
380
			}
381 1
		}
382
383 1
		return $meta;
384
	}
385
386
	/**
387
	 * Parses <meta> tags
388
	 *
389
	 * @param DOMDocument $doc Document
390
	 * @return array
391
	 */
392 1
	public function parseMetaTags(DOMDocument $doc) {
393
394 1
		$meta = array();
395
396 1
		$nodes = $doc->getElementsByTagName('meta');
397 1
		if (!empty($nodes)) {
398 1
			foreach ($nodes as $node) {
399 1
				$name = $node->getAttribute('name');
400 1
				if (!$name) {
401 1
					$name = $node->getAttribute('property');
402 1
				}
403 1
				if (!$name) {
404 1
					continue;
405
				}
406
407 1
				$name = strtolower($name);
408
409 1
				$content = $node->getAttribute('content');
410 1
				if (isset($meta['metatags'][$name])) {
411 1
					if (!is_array($meta['metatags'][$name])) {
412 1
						$meta['metatags'][$name] = array($meta['metatags'][$name]);
413 1
					}
414 1
					$meta['metatags'][$name][] = $content;
415 1
				} else {
416 1
					$meta['metatags'][$name] = $content;
417
				}
418
419
				switch ($name) {
420
421 1
					case 'title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
422 1
					case 'og:title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
423 1
					case 'twitter:title' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
424 1
						if (empty($meta['title'])) {
425 1
							$meta['title'] = $content;
426 1
						}
427 1
						break;
428
429 1
					case 'og:type' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
430 1
						if (empty($meta['type'])) {
431 1
							$meta['type'] = $content;
432 1
						}
433 1
						break;
434
435 1
					case 'description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
436 1
					case 'og:description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
437 1
					case 'twitter:description' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
438 1
						if (empty($meta['description'])) {
439 1
							$meta['description'] = $content;
440 1
						}
441 1
						break;
442
443 1
					case 'keywords' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
444 1
						if (is_string($content)) {
445 1
							$content = explode(',', $content);
446 1
							$content = array_map('trim', $content);
447 1
						}
448 1
						$meta['tags'] = $content;
449 1
						break;
450
451 1
					case 'og:site_name' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
452 1
					case 'twitter:site' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
453 1
						if (empty($meta['provider_name'])) {
454 1
							$meta['provider_name'] = $content;
455 1
						}
456 1
						break;
457
458 1
					case 'og:image' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
459 1
					case 'twitter:image' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
460 1
						$meta['thumbnails'][] = $this->getAbsoluteURL($doc, $content);
461 1
						break;
462
				}
463 1
			}
464 1
		}
465
466 1
		return $meta;
467
	}
468
469
	/**
470
	 * Parses <img> tags
471
	 *
472
	 * @param DOMDocument $doc Document
473
	 * @return array
474
	 */
475 1
	public function parseImgTags(DOMDocument $doc) {
476
477 1
		$meta = array();
478
479 1
		$nodes = $doc->getElementsByTagName('img');
480 1
		foreach ($nodes as $node) {
481 1
			$src = $node->getAttribute('src');
482 1
			$meta['thumbnails'][] = $this->getAbsoluteURL($doc, $src);
483 1
		}
484
485 1
		return $meta;
486
	}
487
488
	/**
489
	 * Normalizes relative URLs
490
	 *
491
	 * @param DOMDocument $doc  Document
492
	 * @param string      $href URL to normalize
493
	 * @return string
494
	 */
495 1
	public function getAbsoluteURL(DOMDocument $doc, $href = '') {
496
497
		// Check if $url is absolute
498 1
		if (parse_url($href, PHP_URL_HOST)) {
499 1
			return $href;
500
		}
501
502 1
		$uri = trim($doc->documentURI ?: '', '/');
503
504
		// Check if $url is relative to root
505 1
		if (substr($href, 0, 1) === "/") {
506 1
			$scheme = parse_url($uri, PHP_URL_SCHEME);
507 1
			$host = parse_url($uri, PHP_URL_HOST);
508 1
			return "$scheme://$host$href";
509
		}
510
511
		// $url is relative to page
512 1
		return "$uri/$href";
513
	}
514
515
}
516