Completed
Push — master ( 641a91...fc9f6a )
by
unknown
10:16
created

IconService   F

Complexity

Total Complexity 60

Size/Duplication

Total Lines 408
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 0
loc 408
rs 3.6
c 1
b 0
f 0
wmc 60
lcom 1
cbo 0

9 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 15 7
B getFaviconUrl() 0 54 9
C parseLinkElement() 0 58 11
C downloadFavicon() 0 68 13
A downloadAs() 0 33 5
A getExtension() 0 8 2
A getExtensionFromMimeType() 0 14 3
A urlType() 0 14 6
A debug() 0 10 4

How to fix   Complexity   

Complex Class

Complex classes like IconService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use IconService, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * Nextcloud - passman
4
 *
5
 * @copyright Copyright (c) 2016, Sander Brand ([email protected])
6
 * @copyright Copyright (c) 2016, Marcos Zuriaga Miguel ([email protected])
7
 * @license GNU AGPL version 3 or any later version
8
 *
9
 * This program is free software: you can redistribute it and/or modify
10
 * it under the terms of the GNU Affero General Public License as
11
 * published by the Free Software Foundation, either version 3 of the
12
 * License, or (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU Affero General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Affero General Public License
20
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
 */
23
24
namespace OCA\Passman\Service;
25
26
27
class IconService {
28
	// URL types
29
	const URL_TYPE_ABSOLUTE = 1;
30
	const URL_TYPE_ABSOLUTE_SCHEME = 2;
31
	const URL_TYPE_ABSOLUTE_PATH = 3;
32
	const URL_TYPE_RELATIVE = 4;
33
	const URL_TYPE_EMBED_BASE64 = 5;
34
35
	/**
36
	 * @var string Page URL
37
	 */
38
	public $url;
39
40
	/**
41
	 * @var string Page URL, after prospective redirects
42
	 */
43
	public $pageUrl;
44
45
	/**
46
	 * @var string Site root URL (homepage), based on $pageUrl
47
	 */
48
	public $siteUrl;
49
50
	/**
51
	 * @var string full URI to favicon
52
	 */
53
	public $icoUrl;
54
55
	/**
56
	 * @var string favicon type (file extension, ex: ico|gif|png)
57
	 */
58
	public $icoType;
59
60
	/**
61
	 * @var string favicon url determination method (default /favicon.ico or found in head>link tag)
62
	 */
63
	public $findMethod;
64
65
	/**
66
	 * @var string details, in case of failure
67
	 */
68
	public $error;
69
70
	/**
71
	 * @var bool tell if the favicon exists (set after calling IconService)
72
	 */
73
	public $icoExists;
74
75
	/**
76
	 * @var string md5 of $icoData
77
	 */
78
	public $icoMd5;
79
80
	/**
81
	 * @var string favicon binary data
82
	 */
83
	public $icoData;
84
85
	/**
86
	 * @var array Additional debug info
87
	 */
88
	public $debugInfo;
89
90
	/**
91
	 * @var string HTTP proxy (ex: localhost:8888)
92
	 */
93
	protected $httpProxy;
94
95
	/**
96
	 * @var bool SSL verify peer (default: true)
97
	 */
98
	protected $sslVerify;
99
100
	/**
101
	 * Create a new IconService object, search & download favicon if $auto is true
102
	 *
103
	 * @param string $url Page URL
104
	 * @param array $options Optional settings
105
	 * @param bool $auto Search & download favicon on instantiation
106
	 */
107
	public function __construct($url, $options = null, $auto = true) {
108
		if (!$url) {
109
			throw new \InvalidArgumentException("url is empty");
110
		}
111
		if (self::urlType($url) != self::URL_TYPE_ABSOLUTE) {
112
			throw new \InvalidArgumentException("'" . $url . "' is not an absolute url");
113
		}
114
		$this->url = $url;
115
		$this->httpProxy = isset($options['httpProxy']) ? $options['httpProxy'] : null;
116
		$this->sslVerify = isset($options['sslVerify']) && $options['sslVerify'] === false ? false : true;
117
		if ($auto) {
118
			$this->getFaviconUrl();
119
			$this->downloadFavicon();
120
		}
121
	}
122
123
	/**
124
	 * Download page and search html to find favicon URL. Returns favicon URL.
125
	 * HTML parsing is achieved using regular expressions (http://blog.codinghorror.com/parsing-html-the-cthulhu-way/)
126
	 * to get it work on all kinds of web documents (including non w3c compliance), which an XML parser can't do.
127
	 */
128
	public function getFaviconUrl() {
129
		// If already executed, don't need to search again
130
		if (!empty($this->icoUrl)) {
131
			return $this->icoUrl;
132
		}
133
134
		// Check URL to search
135
		if (empty($this->url)) {
136
			throw new \Exception("url is empty");
137
		}
138
139
		// Removing fragment (hash) from URL
140
		$url = $this->url;
141
		$urlInfo = parse_url($this->url);
142
		if (isset($urlInfo['fragment'])) {
143
			$url = str_replace('#' . $urlInfo['fragment'], '', $url);
144
		}
145
146
		// Downloading the page
147
		$info = [];
148
		$html = $this->downloadAs($url, $info);
149
		if ($info['curl_errno'] !== CURLE_OK) {
150
			$this->error = $info['curl_error'];
151
			$this->debugInfo['document_curl_errno'] = $info['curl_errno'];
152
			return false;
153
		}
154
155
		// Saving final URL (after prospective redirects) and get root URL
156
		$this->pageUrl = $info['effective_url'];
157
		$pageUrlInfo = parse_url($this->pageUrl);
158
		if (!empty($pageUrlInfo['scheme']) && !empty($pageUrlInfo['host'])) {
159
			$this->siteUrl = $pageUrlInfo['scheme'] . '://' . $pageUrlInfo['host'] . '/';
160
		}
161
162
		// Default favicon URL
163
		$this->icoUrl = $this->siteUrl . 'favicon.ico';
164
		$this->findMethod = 'default';
165
166
		// HTML <head> tag extraction
167
		preg_match('#^(.*)<\s*body#isU', $html, $matches);
168
		$htmlHead = isset($matches[1]) ? $matches[1] : $html;
169
170
		// HTML <base> tag href extraction
171
		$base_href = null;
172
		if (preg_match('#<base[^>]+href=(["\'])([^>]+)\1#i', $htmlHead, $matches)) {
173
			$base_href = rtrim($matches[2], '/') . '/';
174
			$this->debugInfo['base_href'] = $base_href;
175
		}
176
177
		// HTML <link> icon tag analysis
178
		$this->parseLinkElement($htmlHead, $pageUrlInfo, $base_href);
179
180
		return $this->icoUrl;
181
	}
182
183
184
	private function parseLinkElement($htmlHead, $pageUrlInfo, $base_href){
185
		if (preg_match('#<\s*link[^>]*(rel=(["\'])[^>\2]*icon[^>\2]*\2)[^>]*>#i', $htmlHead, $matches)) {
186
			$link_tag = $matches[0];
187
			$this->debugInfo['link_tag'] = $link_tag;
188
189
			// HTML <link> icon tag href analysis
190
			if (preg_match('#href\s*=\s*(["\'])(.*?)\1#i', $link_tag, $matches)) {
191
				$ico_href = trim($matches[2]);
192
				$this->debugInfo['ico_href'] = $ico_href;
193
				$this->findMethod = 'head';
194
195
				// Building full absolute URL
196
				$urlType = self::urlType($ico_href);
197
				switch ($urlType) {
198
					case self::URL_TYPE_ABSOLUTE:
199
						$this->findMethod .= ' absolute';
200
						$this->icoUrl = $ico_href;
201
						$this->icoType = self::getExtension($this->icoUrl);
202
						break;
203
					case self::URL_TYPE_ABSOLUTE_SCHEME:
204
						$this->findMethod .= ' absolute_scheme';
205
						$this->icoUrl = $pageUrlInfo['scheme'] . ':' . $ico_href;
206
						$this->icoType = self::getExtension($this->icoUrl);
207
						break;
208
					case self::URL_TYPE_ABSOLUTE_PATH:
209
						$this->findMethod .= ' absolute_path';
210
						$this->icoUrl = rtrim($this->siteUrl, '/') . '/' . ltrim($ico_href, '/');
211
						$this->findMethod .= ' without base href';
212
						if (isset($base_href)) {
213
							$baseHrefType = self::urlType($base_href);
214
							if ($baseHrefType != self::URL_TYPE_ABSOLUTE) {
215
								throw new \Exception("Base href is not an absolute URL");
216
							}
217
							$baseUrlInfo = parse_url($base_href);
218
							$this->icoUrl = $baseUrlInfo['scheme'] . '://' . $baseUrlInfo['host'] . $ico_href;
219
							$this->findMethod .= ' with base href';
220
						}
221
						$this->icoType = self::getExtension($this->icoUrl);
222
						break;
223
					case self::URL_TYPE_RELATIVE:
224
						$this->findMethod .= ' relative';
225
						$path = preg_replace('#/[^/]+?$#i', '/', $pageUrlInfo['path']);
226
						$this->icoUrl = $pageUrlInfo['scheme'] . '://' . $pageUrlInfo['host'] . $path . $ico_href;
227
						$this->findMethod .= ' without base href';
228
						if (isset($base_href)) {
229
							$this->icoUrl = $base_href . $ico_href;
230
							$this->findMethod .= ' with base href';
231
						}
232
						$this->icoType = self::getExtension($this->icoUrl);
233
						break;
234
					case self::URL_TYPE_EMBED_BASE64:
235
						$this->findMethod .= ' base64';
236
						$this->icoUrl = $ico_href;
237
						break;
238
				}
239
			}
240
		}
241
	}
242
243
	/**
244
	 * Download the favicon if available
245
	 */
246
	public function downloadFavicon() {
247
		// Check params
248
		if (empty($this->icoUrl)) {
249
			return false;
250
		}
251
252
		// Prevent useless re-download
253
		if (!empty($this->icoData)) {
254
			return false;
255
		}
256
257
		// Base64 embed favicon
258
		if (preg_match('/^\s*data:(.*?);base64,(.*)/i', $this->icoUrl, $matches)) {
259
			$content = base64_decode($matches[2]);
260
			if ($content === false) {
261
				$this->error = "base64 decode error";
262
				return false;
263
			}
264
			$this->icoData = $content;
265
			$this->icoMd5 = md5($content);
266
			$this->icoExists = true;
267
			$this->icoType = self::getExtensionFromMimeType($matches[1]);
268
			return true;
269
		}
270
271
		// Download favicon
272
		$info = [];
273
		$content = $this->downloadAs($this->icoUrl, $info);
274
		$this->debugInfo['favicon_download_metadata'] = $info;
275
276
		// Failover : if getting a 404 with favicon URL found in HTML source, trying with the default favicon URL
277
		$doFailover = $content === false
278
			&& $info['http_code'] == 404
279
			&& $this->findMethod != 'default'
280
			&& !isset($this->debugInfo['failover']);
281
		if ($doFailover) {
282
			$this->debugInfo['failoverBefore_icoUrl'] = $this->icoUrl;
283
			$this->debugInfo['failoverBefore_findMethod'] = $this->findMethod;
284
			$this->icoUrl = $this->siteUrl . 'favicon.ico';
285
			$this->findMethod = 'default';
286
			$this->icoType = self::getExtension($this->icoUrl);
287
			$this->debugInfo['failover'] = true;
288
			return $this->downloadFavicon();
289
		}
290
291
		// Download error
292
		if ($content === false) {
293
			$this->error = 'Favicon download error (HTTP ' . $info['http_code'] . ')';
294
			return false;
295
		}
296
297
		// Check favicon content
298
		if (strlen($content) == 0) {
299
			$this->error = "Empty content";
300
			return false;
301
		}
302
		$textTypes = array('text/html', 'text/plain');
303
		if (in_array($info['content_type'], $textTypes) || preg_match('#(</html>|</b>)#i', $content)) {
304
			$this->error = "Seems to be a text document";
305
			return false;
306
		}
307
308
		// All right baby !
309
		$this->icoData = $content;
310
		$this->icoMd5 = md5($content);
311
		$this->icoExists = true;
312
		return true;
313
	}
314
315
	/**
316
	 * Download URL as Firefox with cURL
317
	 * Details available in $info if provided
318
	 *
319
	 * @param string $url URL to download
320
	 * @param array $info Download metadata
321
	 * @return bool|mixed
322
	 */
323
	public function downloadAs($url, &$info = null) {
324
		$ch = curl_init($url);
325
		curl_setopt($ch, CURLOPT_HEADER, false);
326
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
327
		curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
328
		curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
329
		curl_setopt($ch, CURLOPT_MAXREDIRS, 20); 
330
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0');
331
		curl_setopt($ch, CURLOPT_TIMEOUT, 3);
332
		// Don't check SSL certificate to allow autosigned certificate
333
		if ($this->sslVerify === false) {
334
			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
335
		}
336
337
		// Set HTTP proxy
338
		if ($this->httpProxy) {
339
			curl_setopt($ch, CURLOPT_PROXY, $this->httpProxy);
340
		}
341
342
		$content = curl_exec($ch);
343
		$info['curl_errno'] = curl_errno($ch);
344
		$info['curl_error'] = curl_error($ch);
345
		$info['http_code'] = curl_getinfo($ch, CURLINFO_HTTP_CODE);
346
		$info['effective_url'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
347
		$info['redirect_count'] = curl_getinfo($ch, CURLINFO_REDIRECT_COUNT);
348
		$info['content_type'] = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
349
		curl_close($ch);
350
351
		if ($info['curl_errno'] !== CURLE_OK || in_array($info['http_code'], array(403, 404, 500, 503))) {
352
			return false;
353
		}
354
		return $content;
355
	}
356
357
	/**
358
	 * Return file extension from an URL or a file path
359
	 *
360
	 * @param string $url
361
	 * @return string
362
	 */
363
	public static function getExtension($url) {
364
		if (preg_match('#^(https?|ftp)#i', $url)) {
365
			$purl = parse_url($url);
366
			$url = $purl['path'];
367
		}
368
		$info = pathinfo($url);
369
		return $info['extension'];
370
	}
371
372
	/**
373
	 * Return file extension from MIME type
374
	 *
375
	 * @param string $mimeType
376
	 * @return string
377
	 */
378
	public static function getExtensionFromMimeType($mimeType) {
379
		$typeMapping = array(
380
			'ico' => '#image/(x-icon|ico)#i',
381
			'png' => '#image/png#i',
382
			'gif' => '#image/gif#i',
383
			'jpg' => '#image/jpe?g#i',
384
		);
385
		foreach ($typeMapping as $key => $val) {
386
			if (preg_match($val, $mimeType)) {
387
				return $key;
388
			}
389
		}
390
		return 'ico';
391
	}
392
393
	/**
394
	 * Return URL type, either :
395
	 * - URL_TYPE_ABSOLUTE        ex: http://www.domain.com/images/fav.ico
396
	 * - URL_TYPE_ABSOLUTE_SCHEME ex: //www.domain.com/images/fav.ico
397
	 * - URL_TYPE_ABSOLUTE_PATH   ex: /images/fav.ico
398
	 * - URL_TYPE_RELATIVE        ex: ../images/fav.ico
399
	 * - URL_TYPE_EMBED_BASE64    ex: ...
400
	 *
401
	 * @return int
402
	 */
403
	public static function urlType($url) {
404
		if (empty($url)) {
405
			return false;
406
		}
407
		$urlInfo = parse_url($url);
408
		if (!empty($urlInfo['scheme'])) {
409
			return $urlInfo['scheme'] === 'data' ? self::URL_TYPE_EMBED_BASE64 : self::URL_TYPE_ABSOLUTE;
410
		} elseif (preg_match('#^//#i', $url)) {
411
			return self::URL_TYPE_ABSOLUTE_SCHEME;
412
		} elseif (preg_match('#^/[^/]#i', $url)) {
413
			return self::URL_TYPE_ABSOLUTE_PATH;
414
		}
415
		return self::URL_TYPE_RELATIVE;
416
	}
417
418
	/**
419
	 * Show object printable properties, or return it if $return is true
420
	 *
421
	 * @param boolean $return
422
	 * @return IconService
423
	 */
424
	public function debug($return = false) {
425
		$dump = clone $this;
426
		if (!empty($dump->icoData) && is_string($dump->icoData)) {
427
			$dump->icoData = substr(bin2hex($dump->icoData), 0, 16) . ' ...';
428
		}
429
		if ($return) {
430
			return $dump;
431
		}
432
		print_r($dump);
433
	}
434
}