1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Nextcloud - passman |
4
|
|
|
* |
5
|
|
|
* @copyright Copyright (c) 2016, Sander Brand ([email protected]) |
6
|
|
|
* @copyright Copyright (c) 2016, Marcos Zuriaga Miguel ([email protected]) |
7
|
|
|
* @license GNU AGPL version 3 or any later version |
8
|
|
|
* |
9
|
|
|
* This program is free software: you can redistribute it and/or modify |
10
|
|
|
* it under the terms of the GNU Affero General Public License as |
11
|
|
|
* published by the Free Software Foundation, either version 3 of the |
12
|
|
|
* License, or (at your option) any later version. |
13
|
|
|
* |
14
|
|
|
* This program is distributed in the hope that it will be useful, |
15
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
16
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17
|
|
|
* GNU Affero General Public License for more details. |
18
|
|
|
* |
19
|
|
|
* You should have received a copy of the GNU Affero General Public License |
20
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
21
|
|
|
* |
22
|
|
|
*/ |
23
|
|
|
|
24
|
|
|
namespace OCA\Passman\Service; |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
class IconService { |
28
|
|
|
// URL types |
29
|
|
|
const URL_TYPE_ABSOLUTE = 1; |
30
|
|
|
const URL_TYPE_ABSOLUTE_SCHEME = 2; |
31
|
|
|
const URL_TYPE_ABSOLUTE_PATH = 3; |
32
|
|
|
const URL_TYPE_RELATIVE = 4; |
33
|
|
|
const URL_TYPE_EMBED_BASE64 = 5; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @var string Page URL |
37
|
|
|
*/ |
38
|
|
|
public $url; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @var string Page URL, after prospective redirects |
42
|
|
|
*/ |
43
|
|
|
public $pageUrl; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @var string Site root URL (homepage), based on $pageUrl |
47
|
|
|
*/ |
48
|
|
|
public $siteUrl; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* @var string full URI to favicon |
52
|
|
|
*/ |
53
|
|
|
public $icoUrl; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @var string favicon type (file extension, ex: ico|gif|png) |
57
|
|
|
*/ |
58
|
|
|
public $icoType; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* @var string favicon url determination method (default /favicon.ico or found in head>link tag) |
62
|
|
|
*/ |
63
|
|
|
public $findMethod; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* @var string details, in case of failure |
67
|
|
|
*/ |
68
|
|
|
public $error; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* @var bool tell if the favicon exists (set after calling IconService) |
72
|
|
|
*/ |
73
|
|
|
public $icoExists; |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @var string md5 of $icoData |
77
|
|
|
*/ |
78
|
|
|
public $icoMd5; |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* @var string favicon binary data |
82
|
|
|
*/ |
83
|
|
|
public $icoData; |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* @var array Additional debug info |
87
|
|
|
*/ |
88
|
|
|
public $debugInfo; |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* @var string HTTP proxy (ex: localhost:8888) |
92
|
|
|
*/ |
93
|
|
|
protected $httpProxy; |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* @var bool SSL verify peer (default: true) |
97
|
|
|
*/ |
98
|
|
|
protected $sslVerify; |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* Create a new IconService object, search & download favicon if $auto is true |
102
|
|
|
* |
103
|
|
|
* @param string $url Page URL |
104
|
|
|
* @param array $options Optional settings |
105
|
|
|
* @param bool $auto Search & download favicon on instantiation |
106
|
|
|
*/ |
107
|
|
|
public function __construct($url, $options = null, $auto = true) { |
108
|
|
|
if (!$url) { |
109
|
|
|
throw new \InvalidArgumentException("url is empty"); |
110
|
|
|
} |
111
|
|
|
if (self::urlType($url) != self::URL_TYPE_ABSOLUTE) { |
112
|
|
|
throw new \InvalidArgumentException("'" . $url . "' is not an absolute url"); |
113
|
|
|
} |
114
|
|
|
$this->url = $url; |
115
|
|
|
$this->httpProxy = isset($options['httpProxy']) ? $options['httpProxy'] : null; |
116
|
|
|
$this->sslVerify = isset($options['sslVerify']) && $options['sslVerify'] === false ? false : true; |
117
|
|
|
if ($auto) { |
118
|
|
|
$this->getFaviconUrl(); |
119
|
|
|
$this->downloadFavicon(); |
120
|
|
|
} |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
/** |
124
|
|
|
* Download page and search html to find favicon URL. Returns favicon URL. |
125
|
|
|
* HTML parsing is achieved using regular expressions (http://blog.codinghorror.com/parsing-html-the-cthulhu-way/) |
126
|
|
|
* to get it work on all kinds of web documents (including non w3c compliance), which an XML parser can't do. |
127
|
|
|
*/ |
128
|
|
|
public function getFaviconUrl() { |
129
|
|
|
// If already executed, don't need to search again |
130
|
|
|
if (!empty($this->icoUrl)) { |
131
|
|
|
return $this->icoUrl; |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
// Check URL to search |
135
|
|
|
if (empty($this->url)) { |
136
|
|
|
throw new \Exception("url is empty"); |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
// Removing fragment (hash) from URL |
140
|
|
|
$url = $this->url; |
141
|
|
|
$urlInfo = parse_url($this->url); |
142
|
|
|
if (isset($urlInfo['fragment'])) { |
143
|
|
|
$url = str_replace('#' . $urlInfo['fragment'], '', $url); |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
// Downloading the page |
147
|
|
|
$info = []; |
148
|
|
|
$html = $this->downloadAs($url, $info); |
149
|
|
|
if ($info['curl_errno'] !== CURLE_OK) { |
150
|
|
|
$this->error = $info['curl_error']; |
151
|
|
|
$this->debugInfo['document_curl_errno'] = $info['curl_errno']; |
152
|
|
|
return false; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
// Saving final URL (after prospective redirects) and get root URL |
156
|
|
|
$this->pageUrl = $info['effective_url']; |
157
|
|
|
$pageUrlInfo = parse_url($this->pageUrl); |
158
|
|
|
if (!empty($pageUrlInfo['scheme']) && !empty($pageUrlInfo['host'])) { |
159
|
|
|
$this->siteUrl = $pageUrlInfo['scheme'] . '://' . $pageUrlInfo['host'] . '/'; |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
// Default favicon URL |
163
|
|
|
$this->icoUrl = $this->siteUrl . 'favicon.ico'; |
164
|
|
|
$this->findMethod = 'default'; |
165
|
|
|
|
166
|
|
|
// HTML <head> tag extraction |
167
|
|
|
preg_match('#^(.*)<\s*body#isU', $html, $matches); |
168
|
|
|
$htmlHead = isset($matches[1]) ? $matches[1] : $html; |
169
|
|
|
|
170
|
|
|
// HTML <base> tag href extraction |
171
|
|
|
$base_href = null; |
172
|
|
|
if (preg_match('#<base[^>]+href=(["\'])([^>]+)\1#i', $htmlHead, $matches)) { |
173
|
|
|
$base_href = rtrim($matches[2], '/') . '/'; |
174
|
|
|
$this->debugInfo['base_href'] = $base_href; |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
// HTML <link> icon tag analysis |
178
|
|
|
$this->parseLinkElement($htmlHead, $pageUrlInfo, $base_href); |
179
|
|
|
|
180
|
|
|
return $this->icoUrl; |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
|
184
|
|
|
private function parseLinkElement($htmlHead, $pageUrlInfo, $base_href){ |
185
|
|
|
if (preg_match('#<\s*link[^>]*(rel=(["\'])[^>\2]*icon[^>\2]*\2)[^>]*>#i', $htmlHead, $matches)) { |
186
|
|
|
$link_tag = $matches[0]; |
187
|
|
|
$this->debugInfo['link_tag'] = $link_tag; |
188
|
|
|
|
189
|
|
|
// HTML <link> icon tag href analysis |
190
|
|
|
if (preg_match('#href\s*=\s*(["\'])(.*?)\1#i', $link_tag, $matches)) { |
191
|
|
|
$ico_href = trim($matches[2]); |
192
|
|
|
$this->debugInfo['ico_href'] = $ico_href; |
193
|
|
|
$this->findMethod = 'head'; |
194
|
|
|
|
195
|
|
|
// Building full absolute URL |
196
|
|
|
$urlType = self::urlType($ico_href); |
197
|
|
|
switch ($urlType) { |
198
|
|
|
case self::URL_TYPE_ABSOLUTE: |
199
|
|
|
$this->findMethod .= ' absolute'; |
200
|
|
|
$this->icoUrl = $ico_href; |
201
|
|
|
$this->icoType = self::getExtension($this->icoUrl); |
202
|
|
|
break; |
203
|
|
|
case self::URL_TYPE_ABSOLUTE_SCHEME: |
204
|
|
|
$this->findMethod .= ' absolute_scheme'; |
205
|
|
|
$this->icoUrl = $pageUrlInfo['scheme'] . ':' . $ico_href; |
206
|
|
|
$this->icoType = self::getExtension($this->icoUrl); |
207
|
|
|
break; |
208
|
|
|
case self::URL_TYPE_ABSOLUTE_PATH: |
209
|
|
|
$this->findMethod .= ' absolute_path'; |
210
|
|
|
$this->icoUrl = rtrim($this->siteUrl, '/') . '/' . ltrim($ico_href, '/'); |
211
|
|
|
$this->findMethod .= ' without base href'; |
212
|
|
|
if (isset($base_href)) { |
213
|
|
|
$baseHrefType = self::urlType($base_href); |
214
|
|
|
if ($baseHrefType != self::URL_TYPE_ABSOLUTE) { |
215
|
|
|
throw new \Exception("Base href is not an absolute URL"); |
216
|
|
|
} |
217
|
|
|
$baseUrlInfo = parse_url($base_href); |
218
|
|
|
$this->icoUrl = $baseUrlInfo['scheme'] . '://' . $baseUrlInfo['host'] . $ico_href; |
219
|
|
|
$this->findMethod .= ' with base href'; |
220
|
|
|
} |
221
|
|
|
$this->icoType = self::getExtension($this->icoUrl); |
222
|
|
|
break; |
223
|
|
|
case self::URL_TYPE_RELATIVE: |
224
|
|
|
$this->findMethod .= ' relative'; |
225
|
|
|
$path = preg_replace('#/[^/]+?$#i', '/', $pageUrlInfo['path']); |
226
|
|
|
$this->icoUrl = $pageUrlInfo['scheme'] . '://' . $pageUrlInfo['host'] . $path . $ico_href; |
227
|
|
|
$this->findMethod .= ' without base href'; |
228
|
|
|
if (isset($base_href)) { |
229
|
|
|
$this->icoUrl = $base_href . $ico_href; |
230
|
|
|
$this->findMethod .= ' with base href'; |
231
|
|
|
} |
232
|
|
|
$this->icoType = self::getExtension($this->icoUrl); |
233
|
|
|
break; |
234
|
|
|
case self::URL_TYPE_EMBED_BASE64: |
235
|
|
|
$this->findMethod .= ' base64'; |
236
|
|
|
$this->icoUrl = $ico_href; |
237
|
|
|
break; |
238
|
|
|
} |
239
|
|
|
} |
240
|
|
|
} |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/** |
244
|
|
|
* Download the favicon if available |
245
|
|
|
*/ |
246
|
|
|
public function downloadFavicon() { |
247
|
|
|
// Check params |
248
|
|
|
if (empty($this->icoUrl)) { |
249
|
|
|
return false; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
// Prevent useless re-download |
253
|
|
|
if (!empty($this->icoData)) { |
254
|
|
|
return false; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
// Base64 embed favicon |
258
|
|
|
if (preg_match('/^\s*data:(.*?);base64,(.*)/i', $this->icoUrl, $matches)) { |
259
|
|
|
$content = base64_decode($matches[2]); |
260
|
|
|
if ($content === false) { |
261
|
|
|
$this->error = "base64 decode error"; |
262
|
|
|
return false; |
263
|
|
|
} |
264
|
|
|
$this->icoData = $content; |
265
|
|
|
$this->icoMd5 = md5($content); |
266
|
|
|
$this->icoExists = true; |
267
|
|
|
$this->icoType = self::getExtensionFromMimeType($matches[1]); |
268
|
|
|
return true; |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
// Download favicon |
272
|
|
|
$info = []; |
273
|
|
|
$content = $this->downloadAs($this->icoUrl, $info); |
274
|
|
|
$this->debugInfo['favicon_download_metadata'] = $info; |
275
|
|
|
|
276
|
|
|
// Failover : if getting a 404 with favicon URL found in HTML source, trying with the default favicon URL |
277
|
|
|
$doFailover = $content === false |
278
|
|
|
&& $info['http_code'] == 404 |
279
|
|
|
&& $this->findMethod != 'default' |
280
|
|
|
&& !isset($this->debugInfo['failover']); |
281
|
|
|
if ($doFailover) { |
282
|
|
|
$this->debugInfo['failoverBefore_icoUrl'] = $this->icoUrl; |
283
|
|
|
$this->debugInfo['failoverBefore_findMethod'] = $this->findMethod; |
284
|
|
|
$this->icoUrl = $this->siteUrl . 'favicon.ico'; |
285
|
|
|
$this->findMethod = 'default'; |
286
|
|
|
$this->icoType = self::getExtension($this->icoUrl); |
287
|
|
|
$this->debugInfo['failover'] = true; |
288
|
|
|
return $this->downloadFavicon(); |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
// Download error |
292
|
|
|
if ($content === false) { |
293
|
|
|
$this->error = 'Favicon download error (HTTP ' . $info['http_code'] . ')'; |
294
|
|
|
return false; |
295
|
|
|
} |
296
|
|
|
|
297
|
|
|
// Check favicon content |
298
|
|
|
if (strlen($content) == 0) { |
299
|
|
|
$this->error = "Empty content"; |
300
|
|
|
return false; |
301
|
|
|
} |
302
|
|
|
$textTypes = array('text/html', 'text/plain'); |
303
|
|
|
if (in_array($info['content_type'], $textTypes) || preg_match('#(</html>|</b>)#i', $content)) { |
304
|
|
|
$this->error = "Seems to be a text document"; |
305
|
|
|
return false; |
306
|
|
|
} |
307
|
|
|
|
308
|
|
|
// All right baby ! |
309
|
|
|
$this->icoData = $content; |
310
|
|
|
$this->icoMd5 = md5($content); |
311
|
|
|
$this->icoExists = true; |
312
|
|
|
return true; |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
/** |
316
|
|
|
* Download URL as Firefox with cURL |
317
|
|
|
* Details available in $info if provided |
318
|
|
|
* |
319
|
|
|
* @param string $url URL to download |
320
|
|
|
* @param array $info Download metadata |
321
|
|
|
* @return bool|mixed |
322
|
|
|
*/ |
323
|
|
|
public function downloadAs($url, &$info = null) { |
324
|
|
|
$ch = curl_init($url); |
325
|
|
|
curl_setopt($ch, CURLOPT_HEADER, false); |
326
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
327
|
|
|
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); |
328
|
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); |
329
|
|
|
curl_setopt($ch, CURLOPT_MAXREDIRS, 20); |
330
|
|
|
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0'); |
331
|
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 3); |
332
|
|
|
// Don't check SSL certificate to allow autosigned certificate |
333
|
|
|
if ($this->sslVerify === false) { |
334
|
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); |
335
|
|
|
} |
336
|
|
|
|
337
|
|
|
// Set HTTP proxy |
338
|
|
|
if ($this->httpProxy) { |
339
|
|
|
curl_setopt($ch, CURLOPT_PROXY, $this->httpProxy); |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
$content = curl_exec($ch); |
343
|
|
|
$info['curl_errno'] = curl_errno($ch); |
344
|
|
|
$info['curl_error'] = curl_error($ch); |
345
|
|
|
$info['http_code'] = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
346
|
|
|
$info['effective_url'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); |
347
|
|
|
$info['redirect_count'] = curl_getinfo($ch, CURLINFO_REDIRECT_COUNT); |
348
|
|
|
$info['content_type'] = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); |
349
|
|
|
curl_close($ch); |
350
|
|
|
|
351
|
|
|
if ($info['curl_errno'] !== CURLE_OK || in_array($info['http_code'], array(403, 404, 500, 503))) { |
352
|
|
|
return false; |
353
|
|
|
} |
354
|
|
|
return $content; |
355
|
|
|
} |
356
|
|
|
|
357
|
|
|
/** |
358
|
|
|
* Return file extension from an URL or a file path |
359
|
|
|
* |
360
|
|
|
* @param string $url |
361
|
|
|
* @return string |
362
|
|
|
*/ |
363
|
|
|
public static function getExtension($url) { |
364
|
|
|
if (preg_match('#^(https?|ftp)#i', $url)) { |
365
|
|
|
$purl = parse_url($url); |
366
|
|
|
$url = $purl['path']; |
367
|
|
|
} |
368
|
|
|
$info = pathinfo($url); |
369
|
|
|
return $info['extension']; |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
/** |
373
|
|
|
* Return file extension from MIME type |
374
|
|
|
* |
375
|
|
|
* @param string $mimeType |
376
|
|
|
* @return string |
377
|
|
|
*/ |
378
|
|
|
public static function getExtensionFromMimeType($mimeType) { |
379
|
|
|
$typeMapping = array( |
380
|
|
|
'ico' => '#image/(x-icon|ico)#i', |
381
|
|
|
'png' => '#image/png#i', |
382
|
|
|
'gif' => '#image/gif#i', |
383
|
|
|
'jpg' => '#image/jpe?g#i', |
384
|
|
|
); |
385
|
|
|
foreach ($typeMapping as $key => $val) { |
386
|
|
|
if (preg_match($val, $mimeType)) { |
387
|
|
|
return $key; |
388
|
|
|
} |
389
|
|
|
} |
390
|
|
|
return 'ico'; |
391
|
|
|
} |
392
|
|
|
|
393
|
|
|
/** |
394
|
|
|
* Return URL type, either : |
395
|
|
|
* - URL_TYPE_ABSOLUTE ex: http://www.domain.com/images/fav.ico |
396
|
|
|
* - URL_TYPE_ABSOLUTE_SCHEME ex: //www.domain.com/images/fav.ico |
397
|
|
|
* - URL_TYPE_ABSOLUTE_PATH ex: /images/fav.ico |
398
|
|
|
* - URL_TYPE_RELATIVE ex: ../images/fav.ico |
399
|
|
|
* - URL_TYPE_EMBED_BASE64 ex: ... |
400
|
|
|
* |
401
|
|
|
* @return int |
402
|
|
|
*/ |
403
|
|
|
public static function urlType($url) { |
404
|
|
|
if (empty($url)) { |
405
|
|
|
return false; |
406
|
|
|
} |
407
|
|
|
$urlInfo = parse_url($url); |
408
|
|
|
if (!empty($urlInfo['scheme'])) { |
409
|
|
|
return $urlInfo['scheme'] === 'data' ? self::URL_TYPE_EMBED_BASE64 : self::URL_TYPE_ABSOLUTE; |
410
|
|
|
} elseif (preg_match('#^//#i', $url)) { |
411
|
|
|
return self::URL_TYPE_ABSOLUTE_SCHEME; |
412
|
|
|
} elseif (preg_match('#^/[^/]#i', $url)) { |
413
|
|
|
return self::URL_TYPE_ABSOLUTE_PATH; |
414
|
|
|
} |
415
|
|
|
return self::URL_TYPE_RELATIVE; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
/** |
419
|
|
|
* Show object printable properties, or return it if $return is true |
420
|
|
|
* |
421
|
|
|
* @param boolean $return |
422
|
|
|
* @return IconService |
423
|
|
|
*/ |
424
|
|
|
public function debug($return = false) { |
425
|
|
|
$dump = clone $this; |
426
|
|
|
if (!empty($dump->icoData) && is_string($dump->icoData)) { |
427
|
|
|
$dump->icoData = substr(bin2hex($dump->icoData), 0, 16) . ' ...'; |
428
|
|
|
} |
429
|
|
|
if ($return) { |
430
|
|
|
return $dump; |
431
|
|
|
} |
432
|
|
|
print_r($dump); |
433
|
|
|
} |
434
|
|
|
} |