Issues (1752)

Security Analysis    not enabled

This project does not seem to handle request data directly as such no vulnerable execution paths were found.

  Cross-Site Scripting
Cross-Site Scripting enables an attacker to inject code into the response of a web-request that is viewed by other users. It can for example be used to bypass access controls, or even to take over other users' accounts.
  File Exposure
File Exposure allows an attacker to gain access to local files that he should not be able to access. These files can for example include database credentials, or other configuration files.
  File Manipulation
File Manipulation enables an attacker to write custom data to files. This potentially leads to injection of arbitrary code on the server.
  Object Injection
Object Injection enables an attacker to inject an object into PHP code, and can lead to arbitrary code execution, file exposure, or file manipulation attacks.
  Code Injection
Code Injection enables an attacker to execute arbitrary code on the server.
  Response Splitting
Response Splitting can be used to send arbitrary responses.
  File Inclusion
File Inclusion enables an attacker to inject custom files into PHP's file loading mechanism, either explicitly passed to include, or for example via PHP's auto-loading mechanism.
  Command Injection
Command Injection enables an attacker to inject a shell command that is execute with the privileges of the web-server. This can be used to expose sensitive data, or gain access of your server.
  SQL Injection
SQL Injection enables an attacker to execute arbitrary SQL code on your database server gaining access to user data, or manipulating user data.
  XPath Injection
XPath Injection enables an attacker to modify the parts of XML document that are read. If that XML document is for example used for authentication, this can lead to further vulnerabilities similar to SQL Injection.
  LDAP Injection
LDAP Injection enables an attacker to inject LDAP statements potentially granting permission to run unauthorized queries, or modify content inside the LDAP tree.
  Header Injection
  Other Vulnerability
This category comprises other attack vectors such as manipulating the PHP runtime, loading custom extensions, freezing the runtime, or similar.
  Regex Injection
Regex Injection enables an attacker to execute arbitrary code in your PHP process.
  XML Injection
XML Injection enables an attacker to read files on your local filesystem including configuration files, or can be abused to freeze your web-server process.
  Variable Injection
Variable Injection enables an attacker to overwrite program variables with custom data, and can lead to further vulnerabilities.
Unfortunately, the security analysis is currently not available for your project. If you are a non-commercial open-source project, please contact support to gain access.

class/todatauri.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
require_once(dirname(__FILE__) . '/fwolflib.php');
3
require_once(FWOLFLIB . 'class/curl.php');
4
require_once(FWOLFLIB . 'func/download.php');
5
require_once(FWOLFLIB . 'func/env.php');
6
require_once(FWOLFLIB . 'func/request.php');
7
require_once(FWOLFLIB . 'func/url.php');
8
9
10
/**
11
 * Convert css, js, image in a html file, to save it in ONE file like mht.
12
 *
13
 * @package		fwolflib
14
 * @copyright	Copyright 2007-2012, Fwolf
15
 * @author		Fwolf <[email protected]>
16
 * @since		2007-04-06
17
 */
18
class ToDataUri extends Curl {
19
	/**
20
	 * Cache of src already retrieved
21
	 * Format: url=>base64_data
22
	 * @var	array
23
	 */
24
	protected $mCache = array();
25
26
	/**
27
	 * Charset of original web page
28
	 * Show in info block.
29
	 * @var	string
30
	 */
31
	protected $mCharset = '';
32
33
	/**
34
	 * Running in cli mode
35
	 * Will echo some message directly
36
	 * @var	boolean
37
	 */
38
	protected $mCliMode = false;
39
40
	/**
41
	 * URI which got error when get
42
	 * Only for debug or output propose
43
	 * @var	array
44
	 */
45
	protected $mGetFailed = array();
46
47
	/**
48
	 * URI which success retrieved
49
	 * @var	array
50
	 */
51
	protected $mGetOk = array();
52
53
	/**
54
	 * Html code get from target
55
	 * Change is also done here, so this can be output directly
56
	 * @var	string
57
	 */
58
	public $mHtml = '';
59
60
	/**
61
	 * Information of Process, display in footer. (obsolete?)
62
	 * @var	string
63
	 * @see	$mMsg
64
	 */
65
	public	$mInfo = '';
66
67
	/**
68
	 * Simple reponse message
69
	 * Display below form
70
	 * @var	string
71
	 * @see	$mInfo
72
	 */
73
	public	$mMsg = '';
74
75
	/**
76
	 * Retrieve html data
77
	 * Auto retrieve html data by url on default, if set to false, $this->mHtml must be set manually.
78
	 * @var boolean
79
	 * @see $mHtml
80
	 */
81
	public	$mRetrieveHtml = true;
82
83
	/**
84
	 * Original url
85
	 * The web page, which contains css, js, image
86
	 * @var	string
87
	 */
88
	public	$mUrl = '';
89
90
	/**
91
	 * Baseurl of target webpage
92
	 * eg: http://tld.com/dir/index.html, baseurl is http://tld.com/dir/
93
	 * @var	string
94
	 */
95
	protected $mUrlBase = '';
96
97
	/**
98
	 * http or https, for Baseurl
99
	 * @var	string
100
	 */
101
	protected $sUrlPlan = '';
102
103
104
	/**
105
	 * Construce
106
	 * @param	string	$url
107
	 */
108
	public function __construct($url = '')
109
	{
110
		parent::__construct();
111
		$this->SetUrl($url);
112
		$this->SetoptSslverify(false);
113
114
		// Detect cli mode
115
		if (IsCli())
116
			$this->mCliMode = true;
117
	} // end of func __construct
118
119
120
	/**
121
	 * Add process information to dom, display at bottom of page
122
	 *
123
	 * @param	DOMDocument	$dom
124
	 */
125
	protected function AddInfo (&$dom) {
126
		// :TODO: original url & this script url
127
		// Using dom now, $this->mInfo is string, so...it's obsolete?
128
129
		$dom_info_ul = $dom->createElement('ul');
130
		$dom_info_ul->setAttribute('style', 'text-align: left');
131
		// Original url
132
		$a = $dom->createElement('a', htmlspecialchars($this->mUrl));
133
		$a->setAttribute('href', $this->mUrl);
134
		$li = $dom->createElement('li', "Original url: ");
135
		$li->appendChild($a);
136
		$dom_info_ul->appendChild($li);
137
		// Original charset
138
		$li = $dom->createElement('li', htmlspecialchars("Original charset: {$this->mCharset}"));
139
		$dom_info_ul->appendChild($li);
140
		// Base url
141
		//$a = $dom->createElement('a', htmlspecialchars($this->mUrlBase));
142
		//$a->setAttribute('href', $this->mUrlBase);
143
		//$li = $dom->createElement('li', "Baseurl: ");
144
		//$li->appendChild($a);
145
		//$dom_info_ul->appendChild($li);
146
		// Url of this script
147
		if ($this->mCliMode) {
148
			$li = $dom->createElement('li', "Generate using Fwolf's 'Save html all in one file' tools(cli mode php script).");
149
		} else {
150
			$a = $dom->createElement('a', "Fwolf's 'Save html all in one file' tools");
151
			$a->setAttribute('href', GetSelfUrl(false));
152
			$li = $dom->createElement('li', "Generate using: ");
153
			$li->appendChild($a);
154
		}
155
		$dom_info_ul->appendChild($li);
156
		// Generate time
157
		$li = $dom->createElement('li', htmlspecialchars("Generate time: " . date('Y-m-d G:i:s')));
158
		$dom_info_ul->appendChild($li);
159
		// Resources
160
		$i_getok = count($this->mGetOk);
161
		$i_getfailed = count($this->mGetFailed);
162
		$li = $dom->createElement('li', "Resources(" . ($i_getok + $i_getfailed) . " : √ $i_getok, × $i_getfailed): ");
163
		$dom_info_ul->appendChild($li);
164
165
		// Baseurl & charset has been set when processed, add resources here
166
		//$this->mInfo .= "Resources: <span style='cursor: hand;'>+</span>";
167
		//$this->mInfo .= "\n<br />√: " . implode($this->mGetOk, "\n<br />√: ");
168
		//$this->mInfo .= "\n<br />×: " . implode($this->mGetFailed, "\n<br />×: ");
169
		$span = $dom->createElement('span', "+++");
170
		$span->setAttribute('style', 'cursor: pointer;');
171
		$span->setAttribute('onclick', "javascript:obj=getElementById('fwolf_todatauri_info_resources_list');if ('none'==obj.style.display || ''==obj.style.display) {obj.style.display='block'; this.textContent='---';} else {obj.style.display='none';this.textContent='+++';}");
172
		$dom_info_ul->lastChild->appendChild($span);
173
174
		// Append resources detail list as sub-ol
175
		$dom_resources_ol = $dom->createElement('ol');
176
		$dom_resources_ol->setAttribute('id', 'fwolf_todatauri_info_resources_list');
177
		$dom_resources_ol->setAttribute('style', 'display: none;');
178 View Code Duplication
		foreach ($this->mGetOk as $val)
179
		{
180
			$val = htmlspecialchars($val);
181
			$a = $dom->createElement('a', $val);
182
			$a->setAttribute('href', $val);
183
			$li = $dom->createElement('li', '√: ');
184
			//$li = $dom->createElement('li', $val);
185
			$li->appendChild($a);
186
			$dom_resources_ol->appendChild($li);
187
		}
188 View Code Duplication
		foreach ($this->mGetFailed as $val)
189
		{
190
			$val = htmlspecialchars($val);
191
			$a = $dom->createElement('a', $val);
192
			$a->setAttribute('href', $val);
193
			$li = $dom->createElement('li', '×: ');
194
			//$li = $dom->createElement('li', $val);
195
			$li->appendChild($a);
196
			$dom_resources_ol->appendChild($li);
197
		}
198
		$dom_info_ul->appendChild($dom_resources_ol);
199
		if ($this->mCliMode)
200
			echo "[Done ] Resources: √: " . count($this->mGetOk) . ", ×: " . count($this->mGetFailed) . ".\n";
201
202
		// If html contents like this, it have not <body>, so we must create it
203
		// <html>
204
		// <meta http-equiv="refresh" content="0;url=http://www.baidu.com/">
205
		// </html>
206
		$dom_body = $dom->getElementsByTagName('body');
207
		if (0 == $dom_body->length) {
208
			// There is no <body> in html, we create it
209
			$body = $dom->createElement('body');
210
			$dom->getElementsByTagName('html')->item(0)->appendChild($body);
211
		} else {
212
			$body = $dom->getElementsByTagName('body')->item(0);
213
		}
214
215
		$div = $dom->createElement('div');
216
		$div->setAttribute('id', 'fwolf_save_file_all_in_one_info');
217
		$div->setAttribute('style', 'clear: both;');
218
		$hr = $dom->createElement('hr');
219
		$hr->setAttribute('style', 'border: 0px; height: 1px; color: #B0C4DE; background-color: #B0C4DE;');
220
		$div->appendChild($hr);
221
		$div->appendChild($dom_info_ul);
222
		$body->appendChild($div);
223
	} // end of func AddInfo
224
225
226
	/**
227
	 * With a dom object, do changes I need
228
	 * Change all $tag's $attr in dom to data:URI style
229
	 * @param	DOMDocument	$dom	DOMDocument object
230
	 * @param	string	$tag
231
	 * @param	string	$attr
232
	 * @param	array	$cond	Condition, eg: type=>'text/css' for link css
233
	 */
234
	protected function DomChange(&$dom, $tag, $attr, $cond=array())
235
	{
236
		$items = $dom->getElementsByTagName($tag);
237
		for ($i=0; $i<$items->length; $i++)
238
		{
239
			$item = $items->item($i);
240
241
			// Check condition by element attribute
242
			$check = true;
243
			if (!empty($cond)) {
244
				foreach ($cond as $k=>$v) {
245
					if ($v != $item->getAttribute($k))
246
						$check = false;
247
				}
248
			}
249
			// In-document js have text/javascript also, but src is empty
250
			if (('script' == $tag) && ('' == $item->getAttribute('src')))
251
				$check = false;
252
253
			// Do change
254
			if (true == $check)
255
			{
256
				$src = $item->getAttribute($attr);
257
				$src = $this->ParseUrl($src);
258
				// If parse failed, use original src
259
				if (!empty($src))
260
					$item->setAttribute($attr, $src);
261
			}
262
		}
263
	} // end of func DomChange
264
265
266
	/**
267
	 * Change embemmed style url in dom
268
	 * Linked style alread parse by:
269
	 *   $this->DomChange($dom, 'link', 'href', array('rel'=>'stylesheet'));
270
	 * @param	DOMDocument	$dom	DOMDocument object
271
	 */
272
	protected function DomChangeStyle(&$dom)
273
	{
274
		$items = $dom->getElementsByTagName('style');
275
		for ($i=0; $i<$items->length; $i++)
276
		{
277
			$item = $items->item($i);
278
279
			$src = $item->nodeValue;
280
			if (empty($src)) continue;
281
282
			// Example1, with @import, no url(
283
			// @import "mystyle.css";
284
			// @import "../hide2.css";
285
			$ar_regex[0] = "/(@import\s*\(?['\"]([^'\"\(\)\{\}]+)['\"]\s*\)?)/i";
286
			// Example2, with url(, recardness @import
287
			// url("../hide1a.css");
288
			// url(../hide1b.css);
289
			$ar_regex[1] = "/(url\s*\(['\"]?\s*([^'\"\(\)\{\}]+)['\"]?\s*\))/i";
290
291
			foreach ($ar_regex as $regex) {
292
				//$ar = $this->Match('/(<style[^<]+url\(\s*(\S+)\s*\)[^<]+<\/style>)/i', $src);
293
				$ar = $this->Match($regex, $src);
294 View Code Duplication
				if (!empty($ar)) {
295
					// Do as multi match
296
					if (!is_array($ar[0])) {
297
						$ar1 = array(0=>$ar);
298
						$ar = $ar1;
299
						unset($ar1);
300
					}
301
					// Begin loop
302
					foreach ($ar as $val) {
303
						$s = $this->ParseUrl($val[1]);
304
						if (!empty($s)) {
305
							// Use whole match to do str_replace, because url can be used multi times.
306
							$s = str_replace($val[1], $s, $val[0]);
307
							$src = str_replace($val[0], $s, $src);
308
						}
309
					}
310
					// Write result to dom
311
					$item->nodeValue = $src;
312
				}
313
			}
314
		}
315
316
		// Embemmed style
317
		// :QUESTION: Is these tags slow down treatment?
318
		$ar_tags = array('a', 'blockquote', 'body', 'button', 'code', 'dd', 'del', 'div', 'dl', 'dt', 'form', 'hr', 'img', 'input', 'li', 'ol', 'option', 'p', 'pre', 'q', 'select', 'small', 'span', 'strong', 'table', 'td', 'textarea', 'th', 'tr', 'ul');
319
		foreach ($ar_tags as $tag) {
320
			$items = $dom->getElementsByTagName($tag);
321
			$i_items = $items->length;
322
			for ($i=0; $i<$i_items; $i++)
323
			{
324
				$item = $items->item($i);
325
326
				$src = $item->getAttribute('style');
327
				if (empty($src)) continue;
328
329
				// Example2 only, with url(, recardness @import
330
				// url("../hide1a.css");
331
				// url(../hide1b.css);
332
				$regex = "/(url\s*\(['\"]?\s*([^'\"]+)['\"]?\s*\))/i";
333
334
				$ar = $this->Match($regex, $src);
335 View Code Duplication
				if (!empty($ar)) {
336
					// Do as multi match
337
					if (!is_array($ar[0])) {
338
						$ar1 = array(0=>$ar);
339
						$ar = $ar1;
340
						unset($ar1);
341
					}
342
					// Begin loop
343
					foreach ($ar as $val) {
344
						$s = $this->ParseUrl($val[1]);
345
						if (!empty($s)) {
346
							// Use whole match to do str_replace, because url can be used multi times.
347
							$s = str_replace($val[1], $s, $val[0]);
348
							$src = str_replace($val[0], $s, $src);
349
						}
350
					}
351
					// Write result to dom
352
					$item->setAttribute('style', $src);
353
				}
354
			}
355
		}
356
		/*
357
		// Example 1
358
		// <style type="text/css" media="screen">@import url( http://theme.cache.yo2.cn/wp-content/user_themes/37/3729/style.css );</style>
359
		$ar = $this->Match('/(<style[^<]+url\(\s*(\S+)\s*\)[^<]+<\/style>)/i', $this->mHtml);
360
		if (!empty($ar)) {
361
			// Do as multi match
362
			if (!is_array($ar[0])) {
363
				$ar1 = array(0=>$ar);
364
				$ar = $ar1;
365
				unset($ar1);
366
			}
367
			// Begin loop
368
			foreach ($ar as $val) {
369
				$s = $this->ParseUrl($val[1]);
370
				if (!empty($s)) {
371
					// Use whole match to do str_replace, because url can be used multi times.
372
					$s = str_replace($val[1], $s, $val[0]);
373
					$this->mHtml = str_replace($val[0], $s, $this->mHtml);
374
				}
375
			}
376
		}
377
		*/
378
	} // end of func DomChangeStyle
379
380
381
	/**
382
	 * Get baseurl from init get
383
	 * Baseurl used in get css, js, images
384
	 * Must execute close to the init curl_exec
385
	 * Baseurl not eq hostname, it may include some dir
386
	 * If not, crul stats will change by other get action
387
	 */
388
	protected function GetBaseUrl()
389
	{
390
		// Input URL is a dir or a file -> Use the url webserver uses
391
		// But still will got wrong when url like this:
392
		// $url = 'http://131.2.101.10/sys/phpinfo.php/aa';
393
		// :TODO: check what link will browser gerenate in upper situation
394
395
		// Uri need add http/https manually
396
		// curl_getinfo can recoginize dir/file of an address
397
		// so here cannot use $this->mUrl + preg_replace to compute baseurl
398
		$baseurl = curl_getinfo($this->mSh, CURLINFO_EFFECTIVE_URL);
399
		// Got the path part of url, should end with '/', exclude this:
400
		// http://131.2.101.10
401
		$baseurl = preg_replace('/(http|https)(:\/\/.+)\/[^\/]*$/i', '\1\2', $baseurl);
402
		// Add the missing tailing '/' in some special condition
403
		if ('/' != $baseurl{strlen($baseurl) - 1})
404
			$baseurl .= '/';
405
		$this->mUrlBase = $baseurl;
406
407
		// Url plan
408
		$this->sUrlPlan = UrlPlan($this->mUrlBase);
409
410
		$this->mInfo .= "Baseurl: $baseurl<br />\n";
411
		if ($this->mCliMode)
412
			echo "[Curl ] Baseurl: $baseurl\n";
413
	} // end of func GetBaseUrl
414
415
416
	/**
417
	 * Check if user input url is safe to retrieve
418
	 * @param	string	$url
419
	 * @return	boolean
420
	 */
421
	protected function IsSafe($url)
422
	{
423
		$safe = true;
424
		if (13 > strlen($url)) $safe = false;
425
		$url_http = strtolower(substr($url, 0, 8));
426
		if (('http://' != substr($url_http, 0, 7)) && ('https://' != $url_http))
427
			$safe = false;
428
		$hostname = preg_replace('/^(http|https):\/\/([^\/]+)\/?.*/i', '\2', $url);
429
		if ('localhost' == substr($hostname, 0, 9)) $safe = false;
430
		if ('127.0.0.1' == substr($hostname, 0, 9)) $safe = false;
431
		if ('2130706433' == substr($hostname, 0, 9)) $safe = false;
432
		if ('192.168.0.' == substr($hostname, 0, 10)) $safe = false;
433
		// :TODO: Can't do with my self
434
435
		if (false == $safe)
436
			$this->mMsg .= "目标网址不安全,不要折腾我的服务器啦~拜托(" . ip2long($hostname) . ")<br />\n";
437
		return $safe;
438
	} // end of func IsSafe
439
440
441
	/**
442
	 * Convert content html to utf8
443
	 * <meta http-equiv="Content-Type" content="text/html;charset=gb2312">
444
	 * @see $mHtml
445
	 */
446
	protected function MbConvert()
447
	{
448
		// Find charset webpage use current
449
		//<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
450
		//$ar = $this->Match('/(<meta[^;]+;[\s]*charset=(\S+)\"[^>]*>)/i');
451
		$ar = $this->Match('/(<meta[^>]+content=[^>]+charset=([\w\d-_]+)[\"\'][^>]*>)/i');
452
		$charset = '';
453
		// For multi charset declaration
454
		if ((isset($ar[0])) && (is_array($ar[0])))
455
			$ar = $ar[0];
456
		if (1 < count($ar)) {
457
			$charset = $ar[1];
458
		}
459
		//$charset = (1 < count($ar)) ? $ar[1] : '';
460
		$charset = strtolower($charset);
461
		// Check charset got is valid, if no, detect it
462
		// Discuz! error, I have no other ways to detect current encoding
463
		// v4.0.0, printed page:
464
		//<meta http-equiv="Content-Type" content="text/html; charset=CHARSET">
465
		if ('charset' == $charset) {
466
			// Treat later
467
			$charset = '';
468
			//$charset = mb_detect_encoding($this->mHtml, "gb2312, gbk, big5, utf-8");
469
			//$charset = strtolower($charset);
470
		}
471
		// :THINK: Use mb_check_encoding check again?
472
473
		// Meta Content-type
474
		$meta = '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
475
		if (!empty($charset)) {
476
			// Remove old markup <!-- charset declare deleted -->
477
			$this->mHtml = str_replace($ar[0], '', $this->mHtml);
478
			// Put meta close to head, so no non-ascii will occur before it
479
			$this->mHtml = preg_replace('/<head[^>]*>/i', $meta, $this->mHtml);
480
			if ('utf-8' != $charset) {
481
				$this->mHtml = mb_convert_encoding($this->mHtml, 'utf-8', $charset);
482
			}
483
			$this->mInfo .= "Original charset: $charset<br />\n";
484
		} else {
485
			// Doc has no charset meta, force added
486
			$charset = strtolower(mb_detect_encoding($this->mHtml
487
				, "gb2312, gbk, big5, utf-8"));
488
			if ('utf-8' != $charset) {
489
				$this->mHtml = mb_convert_encoding($this->mHtml, 'utf-8', $charset);
490
				$this->mInfo .= "Original charset: $charset<br />\n";
491
			}
492
			//$this->mHtml = $meta . $this->mHtml;
493
			$this->mHtml = preg_replace('/<head[^>]*>/i', $meta, $this->mHtml);
494
		}
495
496
		$this->mCharset = $charset;
497
		if ($this->mCliMode)
498
			echo "[Curl ] Original charset: $charset.\n";
499
	} // end of func MbConvert
500
501
502
	/*
503
	 * Output - using download
504
	 */
505
	public function OutputDownload()
506
	{
507
		// Name
508
		$filename = preg_replace('/^(http|https):\/\/(.*)/i', '\2', $this->mUrl);
509
		$ar = array('/', '?', '&', ';', '=', ':');
510
		$filename = str_replace($ar, '_', $filename) . '.html';
511
		Download($this->mHtml, $filename);
512
	} // end of func OutputDownload
513
514
515
	/**
516
	 * Begin get webpage & parse it
517
	 */
518
	public function Parse()
519
	{
520
		if (!empty($this->mUrl))
521
		{
522
			if ($this->mCliMode)
523
				echo "[Curl ] Get html content from $this->mUrl\n";
524
			$this->SetoptReferer($this->mUrl);
525
			if (true == $this->mRetrieveHtml)
526
				$this->mHtml = $this->Get($this->mUrl);
527
			else {
528
				// Do an dummy Get action, mRs is used in Match() (and/or etc...)
529
				$this->Get($this->mUrl);
530
				$this->mRs = $this->mHtml;
531
			}
532
533
			//$this->GetBaseUrl();
534
			if (0 == strlen($this->mHtml))
535
			{
536
				// Some error happen
537
				$this->mMsg .= curl_error($this->mSh);
538
				if ($this->mCliMode)
539
					echo "[Curl ] Failed.\n";
540
			}
541
			else
542
			{
543
				if ($this->mCliMode)
544
					echo "[Curl ] Ok, "
545
						. number_format(strlen($this->mRs))
546
						. " bytes.\n";
547
				$this->GetBaseUrl();
548
				// Go ahead
549
				$this->MbConvert();
550
551
				// Do some cleanup with html code
552
				$this->PreParse();
553
554
				$dom = new DOMDocument();
555
				// Keep original format when output
556
				$dom->preserveWhiteSpace = true;
557
				//$dom->strictErrorChecking = false;
558
559
				// :TODO: parse un-wellform html error ?
560
				// This way can erase some un-wellformed html error, like un-supported/un-readable chars etc.
561
				$this->mHtml = mb_convert_encoding($this->mHtml
562
					, 'HTML-ENTITIES', "UTF-8");
563
				// Seems these warning message can't be erased.
564
				@$dom->loadHTML($this->mHtml);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
565
				// :TODO: If parse all relative link href, can I make a proxy ?
566
567
				// Embemmed style, modify html directly, do this 'slow' step first, or maybe with longer html string will take more time.
568
				$this->DomChangeStyle($dom);
569
570
				$this->DomChange($dom, 'img', 'src');
571
				//$this->DomChange($dom, 'link', 'href', array('rel'=>'stylesheet', 'type'=>'text/css'));
572
				$this->DomChange($dom, 'link', 'href', array('rel'=>'stylesheet'));
573
574
				// array('type'=>'text/javascript')
575
				// Js condition not requested anymore
576
				$this->DomChange($dom, 'script', 'src');
577
578
				$this->AddInfo($dom);
579
				$this->mHtml = $dom->saveHTML();
580
581
			}
582
		}
583
	} // end of func Parse
584
585
586
	/**
587
	 * Get a url & parse it
588
	 * Return value is data:URI format
589
	 * @param	string	$url
590
	 * @return	string
591
	 */
592
	protected function ParseUrl($url)
593
	{
594
		if (empty($url))
595
			return '';
596
		// Uri start from http
597
		$src = strtolower($url);
598
		if (('http://' == substr($src, 0, 7)) || ('https://' == substr($src, 0, 8)))
599
			return $this->ParseUrl2Data($url);
600
		elseif ('//' == substr($src, 0, 2)) {
601
			// For IBM developerworks
602
			return $this->ParseUrl2Data($this->sUrlPlan . ':' . $url);
603
		} else {
604
			// Link baseurl with file needed to parse
605
			if ('/' == $url{0})
606
			{
607
				// Absolute path, compute start from host name
608
				$baseurl = preg_replace('/(http|https)(:\/\/[^\/]+)\/.*/i', '\1\2', $this->mUrlBase);
609
				$objurl = $baseurl . $url;
610
			}
611
			else
612
			{
613
				// Relative path
614
				$objurl = $this->mUrlBase . $url;
615
			}
616
617
			// Got result url, parse & return
618
			return $this->ParseUrl2Data($objurl);
619
		}
620
	} // end of func ParseUrl
621
622
623
	/**
624
	 * Retrieve a http object & return data:URI
625
	 * Return empty string when retrieve failed.
626
	 * @param	string	$url
627
	 * @return	string
628
	 */
629
	protected function ParseUrl2Data($url)
630
	{
631
		if (isset($this->mCache[$url]))
632
			$data = $this->mCache[$url];
633
		else
634
		{
635
			$rs = $this->Get($url);
636
			if (0 < strlen($this->mRs))
637
			{
638
				$rs_code = $this->GetLastCode();
639
				$rs_type = $this->GetLastContentType();
640
641
				$data = 'data:' . $rs_type . ';base64,' . base64_encode($rs);
642
				$this->mCache[$url] = $data;
643
				$this->mGetOk[] = $url;
644 View Code Duplication
				if ($this->mCliMode)
645
					echo "[" . substr('000' . strval(count($this->mGetOk) + count($this->mGetFailed)), -3) . "  ] √: $url\n";
646
			}
647
			else
648
			{
649
				// Fail
650
				$data = '';
651
				$this->mGetFailed[] = $url;
652 View Code Duplication
				if ($this->mCliMode)
653
					echo "[" . substr('000' . strval(count($this->mGetOk) + count($this->mGetFailed)), -3) . "  ] ×: $url\n";
654
			}
655
		}
656
		return $data;
657
	} // end of func ParseUrl2Data
658
659
660
	/**
661
	 * Cleanup html code before parse
662
	 */
663
	protected function PreParse() {
664
		// These extra xml markup can't be treat well by DOM, remove them.
665
666
		// Remove <?xml version="1.0" encoding="utf-8"..
667
		$this->mHtml = preg_replace('/<\?xml version=[^>]+>/i', '', $this->mHtml);
668
		// Remove xmlns from:
669
		// <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
670
		$this->mHtml = preg_replace('/<html\s+xmlns=[^>]+>/i', '<html>', $this->mHtml);
671
	} // end of func PrePare
672
673
674
	/**
675
	 * Set url of web page to process
676
	 * @param	string	$url
677
	 */
678
	public function SetUrl ($url) {
679
		if (!empty($url) && $this->IsSafe($url)) {
680
			// Convert encoded url(eg: chinese) back to original
681
			$url = urldecode($url);
682
			$this->mUrl = $url;
683
		}
684
	} // end of func SetUrl
685
686
687
} // end of class ToDataUri
688
?>
689