This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | require_once(dirname(__FILE__) . '/fwolflib.php'); |
||
3 | require_once(FWOLFLIB . 'class/curl.php'); |
||
4 | require_once(FWOLFLIB . 'func/download.php'); |
||
5 | require_once(FWOLFLIB . 'func/env.php'); |
||
6 | require_once(FWOLFLIB . 'func/request.php'); |
||
7 | require_once(FWOLFLIB . 'func/url.php'); |
||
8 | |||
9 | |||
10 | /** |
||
11 | * Convert css, js, image in a html file, to save it in ONE file like mht. |
||
12 | * |
||
13 | * @package fwolflib |
||
14 | * @copyright Copyright 2007-2012, Fwolf |
||
15 | * @author Fwolf <[email protected]> |
||
16 | * @since 2007-04-06 |
||
17 | */ |
||
18 | class ToDataUri extends Curl { |
||
0 ignored issues
–
show
|
|||
19 | /** |
||
20 | * Cache of src already retrieved |
||
21 | * Format: url=>base64_data |
||
22 | * @var array |
||
23 | */ |
||
24 | protected $mCache = array(); |
||
25 | |||
26 | /** |
||
27 | * Charset of original web page |
||
28 | * Show in info block. |
||
29 | * @var string |
||
30 | */ |
||
31 | protected $mCharset = ''; |
||
32 | |||
33 | /** |
||
34 | * Running in cli mode |
||
35 | * Will echo some message directly |
||
36 | * @var boolean |
||
37 | */ |
||
38 | protected $mCliMode = false; |
||
39 | |||
40 | /** |
||
41 | * URI which got error when get |
||
42 | * Only for debug or output propose |
||
43 | * @var array |
||
44 | */ |
||
45 | protected $mGetFailed = array(); |
||
46 | |||
47 | /** |
||
48 | * URI which success retrieved |
||
49 | * @var array |
||
50 | */ |
||
51 | protected $mGetOk = array(); |
||
52 | |||
53 | /** |
||
54 | * Html code get from target |
||
55 | * Change is also done here, so this can be output directly |
||
56 | * @var string |
||
57 | */ |
||
58 | public $mHtml = ''; |
||
59 | |||
60 | /** |
||
61 | * Information of Process, display in footer. (obsolete?) |
||
62 | * @var string |
||
63 | * @see $mMsg |
||
64 | */ |
||
65 | public $mInfo = ''; |
||
66 | |||
67 | /** |
||
68 | * Simple reponse message |
||
69 | * Display below form |
||
70 | * @var string |
||
71 | * @see $mInfo |
||
72 | */ |
||
73 | public $mMsg = ''; |
||
74 | |||
75 | /** |
||
76 | * Retrieve html data |
||
77 | * Auto retrieve html data by url on default, if set to false, $this->mHtml must be set manually. |
||
78 | * @var boolean |
||
79 | * @see $mHtml |
||
80 | */ |
||
81 | public $mRetrieveHtml = true; |
||
82 | |||
83 | /** |
||
84 | * Original url |
||
85 | * The web page, which contains css, js, image |
||
86 | * @var string |
||
87 | */ |
||
88 | public $mUrl = ''; |
||
89 | |||
90 | /** |
||
91 | * Baseurl of target webpage |
||
92 | * eg: http://tld.com/dir/index.html, baseurl is http://tld.com/dir/ |
||
93 | * @var string |
||
94 | */ |
||
95 | protected $mUrlBase = ''; |
||
96 | |||
97 | /** |
||
98 | * http or https, for Baseurl |
||
99 | * @var string |
||
100 | */ |
||
101 | protected $sUrlPlan = ''; |
||
102 | |||
103 | |||
104 | /** |
||
105 | * Construce |
||
106 | * @param string $url |
||
107 | */ |
||
108 | public function __construct($url = '') |
||
109 | { |
||
110 | parent::__construct(); |
||
111 | $this->SetUrl($url); |
||
112 | $this->SetoptSslverify(false); |
||
113 | |||
114 | // Detect cli mode |
||
115 | if (IsCli()) |
||
0 ignored issues
–
show
The function
IsCli() has been deprecated with message: Use Fwlib\Util\Env::isCli()
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
116 | $this->mCliMode = true; |
||
117 | } // end of func __construct |
||
118 | |||
119 | |||
120 | /** |
||
121 | * Add process information to dom, display at bottom of page |
||
122 | * |
||
123 | * @param DOMDocument $dom |
||
124 | */ |
||
125 | protected function AddInfo (&$dom) { |
||
126 | // :TODO: original url & this script url |
||
127 | // Using dom now, $this->mInfo is string, so...it's obsolete? |
||
128 | |||
129 | $dom_info_ul = $dom->createElement('ul'); |
||
130 | $dom_info_ul->setAttribute('style', 'text-align: left'); |
||
131 | // Original url |
||
132 | $a = $dom->createElement('a', htmlspecialchars($this->mUrl)); |
||
133 | $a->setAttribute('href', $this->mUrl); |
||
134 | $li = $dom->createElement('li', "Original url: "); |
||
135 | $li->appendChild($a); |
||
136 | $dom_info_ul->appendChild($li); |
||
137 | // Original charset |
||
138 | $li = $dom->createElement('li', htmlspecialchars("Original charset: {$this->mCharset}")); |
||
139 | $dom_info_ul->appendChild($li); |
||
140 | // Base url |
||
141 | //$a = $dom->createElement('a', htmlspecialchars($this->mUrlBase)); |
||
142 | //$a->setAttribute('href', $this->mUrlBase); |
||
143 | //$li = $dom->createElement('li', "Baseurl: "); |
||
144 | //$li->appendChild($a); |
||
145 | //$dom_info_ul->appendChild($li); |
||
146 | // Url of this script |
||
147 | if ($this->mCliMode) { |
||
148 | $li = $dom->createElement('li', "Generate using Fwolf's 'Save html all in one file' tools(cli mode php script)."); |
||
149 | } else { |
||
150 | $a = $dom->createElement('a', "Fwolf's 'Save html all in one file' tools"); |
||
151 | $a->setAttribute('href', GetSelfUrl(false)); |
||
0 ignored issues
–
show
The function
GetSelfUrl() has been deprecated with message: Use Fwlib\Util\HttpUtil::getSelfUrl()
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
152 | $li = $dom->createElement('li', "Generate using: "); |
||
153 | $li->appendChild($a); |
||
154 | } |
||
155 | $dom_info_ul->appendChild($li); |
||
156 | // Generate time |
||
157 | $li = $dom->createElement('li', htmlspecialchars("Generate time: " . date('Y-m-d G:i:s'))); |
||
158 | $dom_info_ul->appendChild($li); |
||
159 | // Resources |
||
160 | $i_getok = count($this->mGetOk); |
||
161 | $i_getfailed = count($this->mGetFailed); |
||
162 | $li = $dom->createElement('li', "Resources(" . ($i_getok + $i_getfailed) . " : √ $i_getok, × $i_getfailed): "); |
||
163 | $dom_info_ul->appendChild($li); |
||
164 | |||
165 | // Baseurl & charset has been set when processed, add resources here |
||
166 | //$this->mInfo .= "Resources: <span style='cursor: hand;'>+</span>"; |
||
167 | //$this->mInfo .= "\n<br />√: " . implode($this->mGetOk, "\n<br />√: "); |
||
168 | //$this->mInfo .= "\n<br />×: " . implode($this->mGetFailed, "\n<br />×: "); |
||
169 | $span = $dom->createElement('span', "+++"); |
||
170 | $span->setAttribute('style', 'cursor: pointer;'); |
||
171 | $span->setAttribute('onclick', "javascript:obj=getElementById('fwolf_todatauri_info_resources_list');if ('none'==obj.style.display || ''==obj.style.display) {obj.style.display='block'; this.textContent='---';} else {obj.style.display='none';this.textContent='+++';}"); |
||
172 | $dom_info_ul->lastChild->appendChild($span); |
||
173 | |||
174 | // Append resources detail list as sub-ol |
||
175 | $dom_resources_ol = $dom->createElement('ol'); |
||
176 | $dom_resources_ol->setAttribute('id', 'fwolf_todatauri_info_resources_list'); |
||
177 | $dom_resources_ol->setAttribute('style', 'display: none;'); |
||
178 | View Code Duplication | foreach ($this->mGetOk as $val) |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
179 | { |
||
180 | $val = htmlspecialchars($val); |
||
181 | $a = $dom->createElement('a', $val); |
||
182 | $a->setAttribute('href', $val); |
||
183 | $li = $dom->createElement('li', '√: '); |
||
184 | //$li = $dom->createElement('li', $val); |
||
185 | $li->appendChild($a); |
||
186 | $dom_resources_ol->appendChild($li); |
||
187 | } |
||
188 | View Code Duplication | foreach ($this->mGetFailed as $val) |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
189 | { |
||
190 | $val = htmlspecialchars($val); |
||
191 | $a = $dom->createElement('a', $val); |
||
192 | $a->setAttribute('href', $val); |
||
193 | $li = $dom->createElement('li', '×: '); |
||
194 | //$li = $dom->createElement('li', $val); |
||
195 | $li->appendChild($a); |
||
196 | $dom_resources_ol->appendChild($li); |
||
197 | } |
||
198 | $dom_info_ul->appendChild($dom_resources_ol); |
||
199 | if ($this->mCliMode) |
||
200 | echo "[Done ] Resources: √: " . count($this->mGetOk) . ", ×: " . count($this->mGetFailed) . ".\n"; |
||
201 | |||
202 | // If html contents like this, it have not <body>, so we must create it |
||
203 | // <html> |
||
204 | // <meta http-equiv="refresh" content="0;url=http://www.baidu.com/"> |
||
205 | // </html> |
||
206 | $dom_body = $dom->getElementsByTagName('body'); |
||
207 | if (0 == $dom_body->length) { |
||
208 | // There is no <body> in html, we create it |
||
209 | $body = $dom->createElement('body'); |
||
210 | $dom->getElementsByTagName('html')->item(0)->appendChild($body); |
||
211 | } else { |
||
212 | $body = $dom->getElementsByTagName('body')->item(0); |
||
213 | } |
||
214 | |||
215 | $div = $dom->createElement('div'); |
||
216 | $div->setAttribute('id', 'fwolf_save_file_all_in_one_info'); |
||
217 | $div->setAttribute('style', 'clear: both;'); |
||
218 | $hr = $dom->createElement('hr'); |
||
219 | $hr->setAttribute('style', 'border: 0px; height: 1px; color: #B0C4DE; background-color: #B0C4DE;'); |
||
220 | $div->appendChild($hr); |
||
221 | $div->appendChild($dom_info_ul); |
||
222 | $body->appendChild($div); |
||
223 | } // end of func AddInfo |
||
224 | |||
225 | |||
226 | /** |
||
227 | * With a dom object, do changes I need |
||
228 | * Change all $tag's $attr in dom to data:URI style |
||
229 | * @param DOMDocument $dom DOMDocument object |
||
230 | * @param string $tag |
||
231 | * @param string $attr |
||
232 | * @param array $cond Condition, eg: type=>'text/css' for link css |
||
233 | */ |
||
234 | protected function DomChange(&$dom, $tag, $attr, $cond=array()) |
||
235 | { |
||
236 | $items = $dom->getElementsByTagName($tag); |
||
237 | for ($i=0; $i<$items->length; $i++) |
||
238 | { |
||
239 | $item = $items->item($i); |
||
240 | |||
241 | // Check condition by element attribute |
||
242 | $check = true; |
||
243 | if (!empty($cond)) { |
||
244 | foreach ($cond as $k=>$v) { |
||
245 | if ($v != $item->getAttribute($k)) |
||
246 | $check = false; |
||
247 | } |
||
248 | } |
||
249 | // In-document js have text/javascript also, but src is empty |
||
250 | if (('script' == $tag) && ('' == $item->getAttribute('src'))) |
||
251 | $check = false; |
||
252 | |||
253 | // Do change |
||
254 | if (true == $check) |
||
0 ignored issues
–
show
|
|||
255 | { |
||
256 | $src = $item->getAttribute($attr); |
||
257 | $src = $this->ParseUrl($src); |
||
258 | // If parse failed, use original src |
||
259 | if (!empty($src)) |
||
260 | $item->setAttribute($attr, $src); |
||
261 | } |
||
262 | } |
||
263 | } // end of func DomChange |
||
264 | |||
265 | |||
266 | /** |
||
267 | * Change embemmed style url in dom |
||
268 | * Linked style alread parse by: |
||
269 | * $this->DomChange($dom, 'link', 'href', array('rel'=>'stylesheet')); |
||
270 | * @param DOMDocument $dom DOMDocument object |
||
271 | */ |
||
272 | protected function DomChangeStyle(&$dom) |
||
273 | { |
||
274 | $items = $dom->getElementsByTagName('style'); |
||
275 | for ($i=0; $i<$items->length; $i++) |
||
276 | { |
||
277 | $item = $items->item($i); |
||
278 | |||
279 | $src = $item->nodeValue; |
||
280 | if (empty($src)) continue; |
||
281 | |||
282 | // Example1, with @import, no url( |
||
283 | // @import "mystyle.css"; |
||
284 | // @import "../hide2.css"; |
||
285 | $ar_regex[0] = "/(@import\s*\(?['\"]([^'\"\(\)\{\}]+)['\"]\s*\)?)/i"; |
||
0 ignored issues
–
show
Coding Style
Comprehensibility
introduced
by
$ar_regex was never initialized. Although not strictly required by PHP, it is generally a good practice to add $ar_regex = array(); before regardless.
Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code. Let’s take a look at an example: foreach ($collection as $item) {
$myArray['foo'] = $item->getFoo();
if ($item->hasBar()) {
$myArray['bar'] = $item->getBar();
}
// do something with $myArray
}
As you can see in this example, the array This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop. ![]() |
|||
286 | // Example2, with url(, recardness @import |
||
287 | // url("../hide1a.css"); |
||
288 | // url(../hide1b.css); |
||
289 | $ar_regex[1] = "/(url\s*\(['\"]?\s*([^'\"\(\)\{\}]+)['\"]?\s*\))/i"; |
||
0 ignored issues
–
show
The variable
$ar_regex does not seem to be defined for all execution paths leading up to this point.
If you define a variable conditionally, it can happen that it is not defined for all execution paths. Let’s take a look at an example: function myFunction($a) {
switch ($a) {
case 'foo':
$x = 1;
break;
case 'bar':
$x = 2;
break;
}
// $x is potentially undefined here.
echo $x;
}
In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined. Available Fixes
![]() |
|||
290 | |||
291 | foreach ($ar_regex as $regex) { |
||
292 | //$ar = $this->Match('/(<style[^<]+url\(\s*(\S+)\s*\)[^<]+<\/style>)/i', $src); |
||
293 | $ar = $this->Match($regex, $src); |
||
294 | View Code Duplication | if (!empty($ar)) { |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
295 | // Do as multi match |
||
296 | if (!is_array($ar[0])) { |
||
297 | $ar1 = array(0=>$ar); |
||
298 | $ar = $ar1; |
||
299 | unset($ar1); |
||
300 | } |
||
301 | // Begin loop |
||
302 | foreach ($ar as $val) { |
||
303 | $s = $this->ParseUrl($val[1]); |
||
304 | if (!empty($s)) { |
||
305 | // Use whole match to do str_replace, because url can be used multi times. |
||
306 | $s = str_replace($val[1], $s, $val[0]); |
||
307 | $src = str_replace($val[0], $s, $src); |
||
308 | } |
||
309 | } |
||
310 | // Write result to dom |
||
311 | $item->nodeValue = $src; |
||
312 | } |
||
313 | } |
||
314 | } |
||
315 | |||
316 | // Embemmed style |
||
317 | // :QUESTION: Is these tags slow down treatment? |
||
318 | $ar_tags = array('a', 'blockquote', 'body', 'button', 'code', 'dd', 'del', 'div', 'dl', 'dt', 'form', 'hr', 'img', 'input', 'li', 'ol', 'option', 'p', 'pre', 'q', 'select', 'small', 'span', 'strong', 'table', 'td', 'textarea', 'th', 'tr', 'ul'); |
||
319 | foreach ($ar_tags as $tag) { |
||
320 | $items = $dom->getElementsByTagName($tag); |
||
321 | $i_items = $items->length; |
||
322 | for ($i=0; $i<$i_items; $i++) |
||
323 | { |
||
324 | $item = $items->item($i); |
||
325 | |||
326 | $src = $item->getAttribute('style'); |
||
327 | if (empty($src)) continue; |
||
328 | |||
329 | // Example2 only, with url(, recardness @import |
||
330 | // url("../hide1a.css"); |
||
331 | // url(../hide1b.css); |
||
332 | $regex = "/(url\s*\(['\"]?\s*([^'\"]+)['\"]?\s*\))/i"; |
||
333 | |||
334 | $ar = $this->Match($regex, $src); |
||
335 | View Code Duplication | if (!empty($ar)) { |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
336 | // Do as multi match |
||
337 | if (!is_array($ar[0])) { |
||
338 | $ar1 = array(0=>$ar); |
||
339 | $ar = $ar1; |
||
340 | unset($ar1); |
||
341 | } |
||
342 | // Begin loop |
||
343 | foreach ($ar as $val) { |
||
344 | $s = $this->ParseUrl($val[1]); |
||
345 | if (!empty($s)) { |
||
346 | // Use whole match to do str_replace, because url can be used multi times. |
||
347 | $s = str_replace($val[1], $s, $val[0]); |
||
348 | $src = str_replace($val[0], $s, $src); |
||
349 | } |
||
350 | } |
||
351 | // Write result to dom |
||
352 | $item->setAttribute('style', $src); |
||
353 | } |
||
354 | } |
||
355 | } |
||
356 | /* |
||
357 | // Example 1 |
||
358 | // <style type="text/css" media="screen">@import url( http://theme.cache.yo2.cn/wp-content/user_themes/37/3729/style.css );</style> |
||
359 | $ar = $this->Match('/(<style[^<]+url\(\s*(\S+)\s*\)[^<]+<\/style>)/i', $this->mHtml); |
||
360 | if (!empty($ar)) { |
||
361 | // Do as multi match |
||
362 | if (!is_array($ar[0])) { |
||
363 | $ar1 = array(0=>$ar); |
||
364 | $ar = $ar1; |
||
365 | unset($ar1); |
||
366 | } |
||
367 | // Begin loop |
||
368 | foreach ($ar as $val) { |
||
369 | $s = $this->ParseUrl($val[1]); |
||
370 | if (!empty($s)) { |
||
371 | // Use whole match to do str_replace, because url can be used multi times. |
||
372 | $s = str_replace($val[1], $s, $val[0]); |
||
373 | $this->mHtml = str_replace($val[0], $s, $this->mHtml); |
||
374 | } |
||
375 | } |
||
376 | } |
||
377 | */ |
||
378 | } // end of func DomChangeStyle |
||
379 | |||
380 | |||
381 | /** |
||
382 | * Get baseurl from init get |
||
383 | * Baseurl used in get css, js, images |
||
384 | * Must execute close to the init curl_exec |
||
385 | * Baseurl not eq hostname, it may include some dir |
||
386 | * If not, crul stats will change by other get action |
||
387 | */ |
||
388 | protected function GetBaseUrl() |
||
389 | { |
||
390 | // Input URL is a dir or a file -> Use the url webserver uses |
||
391 | // But still will got wrong when url like this: |
||
392 | // $url = 'http://131.2.101.10/sys/phpinfo.php/aa'; |
||
393 | // :TODO: check what link will browser gerenate in upper situation |
||
394 | |||
395 | // Uri need add http/https manually |
||
396 | // curl_getinfo can recoginize dir/file of an address |
||
397 | // so here cannot use $this->mUrl + preg_replace to compute baseurl |
||
398 | $baseurl = curl_getinfo($this->mSh, CURLINFO_EFFECTIVE_URL); |
||
399 | // Got the path part of url, should end with '/', exclude this: |
||
400 | // http://131.2.101.10 |
||
401 | $baseurl = preg_replace('/(http|https)(:\/\/.+)\/[^\/]*$/i', '\1\2', $baseurl); |
||
402 | // Add the missing tailing '/' in some special condition |
||
403 | if ('/' != $baseurl{strlen($baseurl) - 1}) |
||
404 | $baseurl .= '/'; |
||
405 | $this->mUrlBase = $baseurl; |
||
0 ignored issues
–
show
It seems like
$baseurl can also be of type array<integer,string> . However, the property $mUrlBase is declared as type string . Maybe add an additional type check?
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly. For example, imagine you have a variable Either this assignment is in error or a type check should be added for that assignment. class Id
{
public $id;
public function __construct($id)
{
$this->id = $id;
}
}
class Account
{
/** @var Id $id */
public $id;
}
$account_id = false;
if (starsAreRight()) {
$account_id = new Id(42);
}
$account = new Account();
if ($account instanceof Id)
{
$account->id = $account_id;
}
![]() |
|||
406 | |||
407 | // Url plan |
||
408 | $this->sUrlPlan = UrlPlan($this->mUrlBase); |
||
0 ignored issues
–
show
It seems like
$this->mUrlBase can also be of type array<integer,string> ; however, UrlPlan() does only seem to accept string , maybe add an additional type check?
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check: /**
* @return array|string
*/
function returnsDifferentValues($x) {
if ($x) {
return 'foo';
}
return array();
}
$x = returnsDifferentValues($y);
if (is_array($x)) {
// $x is an array.
}
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue. ![]() |
|||
409 | |||
410 | $this->mInfo .= "Baseurl: $baseurl<br />\n"; |
||
411 | if ($this->mCliMode) |
||
412 | echo "[Curl ] Baseurl: $baseurl\n"; |
||
413 | } // end of func GetBaseUrl |
||
414 | |||
415 | |||
416 | /** |
||
417 | * Check if user input url is safe to retrieve |
||
418 | * @param string $url |
||
419 | * @return boolean |
||
420 | */ |
||
421 | protected function IsSafe($url) |
||
422 | { |
||
423 | $safe = true; |
||
424 | if (13 > strlen($url)) $safe = false; |
||
425 | $url_http = strtolower(substr($url, 0, 8)); |
||
426 | if (('http://' != substr($url_http, 0, 7)) && ('https://' != $url_http)) |
||
427 | $safe = false; |
||
428 | $hostname = preg_replace('/^(http|https):\/\/([^\/]+)\/?.*/i', '\2', $url); |
||
429 | if ('localhost' == substr($hostname, 0, 9)) $safe = false; |
||
430 | if ('127.0.0.1' == substr($hostname, 0, 9)) $safe = false; |
||
431 | if ('2130706433' == substr($hostname, 0, 9)) $safe = false; |
||
432 | if ('192.168.0.' == substr($hostname, 0, 10)) $safe = false; |
||
433 | // :TODO: Can't do with my self |
||
434 | |||
435 | if (false == $safe) |
||
0 ignored issues
–
show
|
|||
436 | $this->mMsg .= "目标网址不安全,不要折腾我的服务器啦~拜托(" . ip2long($hostname) . ")<br />\n"; |
||
437 | return $safe; |
||
438 | } // end of func IsSafe |
||
439 | |||
440 | |||
441 | /** |
||
442 | * Convert content html to utf8 |
||
443 | * <meta http-equiv="Content-Type" content="text/html;charset=gb2312"> |
||
444 | * @see $mHtml |
||
445 | */ |
||
446 | protected function MbConvert() |
||
447 | { |
||
448 | // Find charset webpage use current |
||
449 | //<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
||
450 | //$ar = $this->Match('/(<meta[^;]+;[\s]*charset=(\S+)\"[^>]*>)/i'); |
||
451 | $ar = $this->Match('/(<meta[^>]+content=[^>]+charset=([\w\d-_]+)[\"\'][^>]*>)/i'); |
||
452 | $charset = ''; |
||
453 | // For multi charset declaration |
||
454 | if ((isset($ar[0])) && (is_array($ar[0]))) |
||
455 | $ar = $ar[0]; |
||
456 | if (1 < count($ar)) { |
||
457 | $charset = $ar[1]; |
||
458 | } |
||
459 | //$charset = (1 < count($ar)) ? $ar[1] : ''; |
||
460 | $charset = strtolower($charset); |
||
461 | // Check charset got is valid, if no, detect it |
||
462 | // Discuz! error, I have no other ways to detect current encoding |
||
463 | // v4.0.0, printed page: |
||
464 | //<meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> |
||
465 | if ('charset' == $charset) { |
||
466 | // Treat later |
||
467 | $charset = ''; |
||
468 | //$charset = mb_detect_encoding($this->mHtml, "gb2312, gbk, big5, utf-8"); |
||
469 | //$charset = strtolower($charset); |
||
470 | } |
||
471 | // :THINK: Use mb_check_encoding check again? |
||
472 | |||
473 | // Meta Content-type |
||
474 | $meta = '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'; |
||
475 | if (!empty($charset)) { |
||
476 | // Remove old markup <!-- charset declare deleted --> |
||
477 | $this->mHtml = str_replace($ar[0], '', $this->mHtml); |
||
478 | // Put meta close to head, so no non-ascii will occur before it |
||
479 | $this->mHtml = preg_replace('/<head[^>]*>/i', $meta, $this->mHtml); |
||
480 | if ('utf-8' != $charset) { |
||
481 | $this->mHtml = mb_convert_encoding($this->mHtml, 'utf-8', $charset); |
||
482 | } |
||
483 | $this->mInfo .= "Original charset: $charset<br />\n"; |
||
484 | } else { |
||
485 | // Doc has no charset meta, force added |
||
486 | $charset = strtolower(mb_detect_encoding($this->mHtml |
||
487 | , "gb2312, gbk, big5, utf-8")); |
||
488 | if ('utf-8' != $charset) { |
||
489 | $this->mHtml = mb_convert_encoding($this->mHtml, 'utf-8', $charset); |
||
490 | $this->mInfo .= "Original charset: $charset<br />\n"; |
||
491 | } |
||
492 | //$this->mHtml = $meta . $this->mHtml; |
||
493 | $this->mHtml = preg_replace('/<head[^>]*>/i', $meta, $this->mHtml); |
||
494 | } |
||
495 | |||
496 | $this->mCharset = $charset; |
||
497 | if ($this->mCliMode) |
||
498 | echo "[Curl ] Original charset: $charset.\n"; |
||
499 | } // end of func MbConvert |
||
500 | |||
501 | |||
502 | /* |
||
503 | * Output - using download |
||
504 | */ |
||
505 | public function OutputDownload() |
||
506 | { |
||
507 | // Name |
||
508 | $filename = preg_replace('/^(http|https):\/\/(.*)/i', '\2', $this->mUrl); |
||
509 | $ar = array('/', '?', '&', ';', '=', ':'); |
||
510 | $filename = str_replace($ar, '_', $filename) . '.html'; |
||
511 | Download($this->mHtml, $filename); |
||
0 ignored issues
–
show
The function
Download() has been deprecated with message: Use Fwlib\Util\HttpUtil::download()
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
512 | } // end of func OutputDownload |
||
513 | |||
514 | |||
515 | /** |
||
516 | * Begin get webpage & parse it |
||
517 | */ |
||
518 | public function Parse() |
||
519 | { |
||
520 | if (!empty($this->mUrl)) |
||
521 | { |
||
522 | if ($this->mCliMode) |
||
523 | echo "[Curl ] Get html content from $this->mUrl\n"; |
||
524 | $this->SetoptReferer($this->mUrl); |
||
525 | if (true == $this->mRetrieveHtml) |
||
0 ignored issues
–
show
|
|||
526 | $this->mHtml = $this->Get($this->mUrl); |
||
527 | else { |
||
528 | // Do an dummy Get action, mRs is used in Match() (and/or etc...) |
||
529 | $this->Get($this->mUrl); |
||
530 | $this->mRs = $this->mHtml; |
||
531 | } |
||
532 | |||
533 | //$this->GetBaseUrl(); |
||
534 | if (0 == strlen($this->mHtml)) |
||
535 | { |
||
536 | // Some error happen |
||
537 | $this->mMsg .= curl_error($this->mSh); |
||
538 | if ($this->mCliMode) |
||
539 | echo "[Curl ] Failed.\n"; |
||
540 | } |
||
541 | else |
||
542 | { |
||
543 | if ($this->mCliMode) |
||
544 | echo "[Curl ] Ok, " |
||
545 | . number_format(strlen($this->mRs)) |
||
546 | . " bytes.\n"; |
||
547 | $this->GetBaseUrl(); |
||
548 | // Go ahead |
||
549 | $this->MbConvert(); |
||
550 | |||
551 | // Do some cleanup with html code |
||
552 | $this->PreParse(); |
||
553 | |||
554 | $dom = new DOMDocument(); |
||
555 | // Keep original format when output |
||
556 | $dom->preserveWhiteSpace = true; |
||
557 | //$dom->strictErrorChecking = false; |
||
558 | |||
559 | // :TODO: parse un-wellform html error ? |
||
560 | // This way can erase some un-wellformed html error, like un-supported/un-readable chars etc. |
||
561 | $this->mHtml = mb_convert_encoding($this->mHtml |
||
562 | , 'HTML-ENTITIES', "UTF-8"); |
||
563 | // Seems these warning message can't be erased. |
||
564 | @$dom->loadHTML($this->mHtml); |
||
0 ignored issues
–
show
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.
If you suppress an error, we recommend checking for the error condition explicitly: // For example instead of
@mkdir($dir);
// Better use
if (@mkdir($dir) === false) {
throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
![]() |
|||
565 | // :TODO: If parse all relative link href, can I make a proxy ? |
||
566 | |||
567 | // Embemmed style, modify html directly, do this 'slow' step first, or maybe with longer html string will take more time. |
||
568 | $this->DomChangeStyle($dom); |
||
569 | |||
570 | $this->DomChange($dom, 'img', 'src'); |
||
571 | //$this->DomChange($dom, 'link', 'href', array('rel'=>'stylesheet', 'type'=>'text/css')); |
||
572 | $this->DomChange($dom, 'link', 'href', array('rel'=>'stylesheet')); |
||
573 | |||
574 | // array('type'=>'text/javascript') |
||
575 | // Js condition not requested anymore |
||
576 | $this->DomChange($dom, 'script', 'src'); |
||
577 | |||
578 | $this->AddInfo($dom); |
||
579 | $this->mHtml = $dom->saveHTML(); |
||
580 | |||
581 | } |
||
582 | } |
||
583 | } // end of func Parse |
||
584 | |||
585 | |||
586 | /** |
||
587 | * Get a url & parse it |
||
588 | * Return value is data:URI format |
||
589 | * @param string $url |
||
590 | * @return string |
||
591 | */ |
||
592 | protected function ParseUrl($url) |
||
593 | { |
||
594 | if (empty($url)) |
||
595 | return ''; |
||
596 | // Uri start from http |
||
597 | $src = strtolower($url); |
||
598 | if (('http://' == substr($src, 0, 7)) || ('https://' == substr($src, 0, 8))) |
||
599 | return $this->ParseUrl2Data($url); |
||
600 | elseif ('//' == substr($src, 0, 2)) { |
||
601 | // For IBM developerworks |
||
602 | return $this->ParseUrl2Data($this->sUrlPlan . ':' . $url); |
||
603 | } else { |
||
604 | // Link baseurl with file needed to parse |
||
605 | if ('/' == $url{0}) |
||
606 | { |
||
607 | // Absolute path, compute start from host name |
||
608 | $baseurl = preg_replace('/(http|https)(:\/\/[^\/]+)\/.*/i', '\1\2', $this->mUrlBase); |
||
609 | $objurl = $baseurl . $url; |
||
610 | } |
||
611 | else |
||
612 | { |
||
613 | // Relative path |
||
614 | $objurl = $this->mUrlBase . $url; |
||
615 | } |
||
616 | |||
617 | // Got result url, parse & return |
||
618 | return $this->ParseUrl2Data($objurl); |
||
619 | } |
||
620 | } // end of func ParseUrl |
||
621 | |||
622 | |||
623 | /** |
||
624 | * Retrieve a http object & return data:URI |
||
625 | * Return empty string when retrieve failed. |
||
626 | * @param string $url |
||
627 | * @return string |
||
628 | */ |
||
629 | protected function ParseUrl2Data($url) |
||
630 | { |
||
631 | if (isset($this->mCache[$url])) |
||
632 | $data = $this->mCache[$url]; |
||
633 | else |
||
634 | { |
||
635 | $rs = $this->Get($url); |
||
636 | if (0 < strlen($this->mRs)) |
||
637 | { |
||
638 | $rs_code = $this->GetLastCode(); |
||
0 ignored issues
–
show
$rs_code is not used, you could remove the assignment.
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value';
$higher = false;
if (rand(1, 6) > 3) {
$higher = true;
} else {
$higher = false;
}
Both the ![]() |
|||
639 | $rs_type = $this->GetLastContentType(); |
||
640 | |||
641 | $data = 'data:' . $rs_type . ';base64,' . base64_encode($rs); |
||
642 | $this->mCache[$url] = $data; |
||
643 | $this->mGetOk[] = $url; |
||
644 | View Code Duplication | if ($this->mCliMode) |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
645 | echo "[" . substr('000' . strval(count($this->mGetOk) + count($this->mGetFailed)), -3) . " ] √: $url\n"; |
||
646 | } |
||
647 | else |
||
648 | { |
||
649 | // Fail |
||
650 | $data = ''; |
||
651 | $this->mGetFailed[] = $url; |
||
652 | View Code Duplication | if ($this->mCliMode) |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
653 | echo "[" . substr('000' . strval(count($this->mGetOk) + count($this->mGetFailed)), -3) . " ] ×: $url\n"; |
||
654 | } |
||
655 | } |
||
656 | return $data; |
||
657 | } // end of func ParseUrl2Data |
||
658 | |||
659 | |||
660 | /** |
||
661 | * Cleanup html code before parse |
||
662 | */ |
||
663 | protected function PreParse() { |
||
664 | // These extra xml markup can't be treat well by DOM, remove them. |
||
665 | |||
666 | // Remove <?xml version="1.0" encoding="utf-8".. |
||
667 | $this->mHtml = preg_replace('/<\?xml version=[^>]+>/i', '', $this->mHtml); |
||
668 | // Remove xmlns from: |
||
669 | // <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
||
670 | $this->mHtml = preg_replace('/<html\s+xmlns=[^>]+>/i', '<html>', $this->mHtml); |
||
671 | } // end of func PrePare |
||
672 | |||
673 | |||
674 | /** |
||
675 | * Set url of web page to process |
||
676 | * @param string $url |
||
677 | */ |
||
678 | public function SetUrl ($url) { |
||
679 | if (!empty($url) && $this->IsSafe($url)) { |
||
680 | // Convert encoded url(eg: chinese) back to original |
||
681 | $url = urldecode($url); |
||
682 | $this->mUrl = $url; |
||
683 | } |
||
684 | } // end of func SetUrl |
||
685 | |||
686 | |||
687 | } // end of class ToDataUri |
||
688 | ?> |
||
0 ignored issues
–
show
It is not recommended to use PHP's closing tag
?> in files other than templates.
Using a closing tag in PHP files that only contain PHP code is not recommended as you might accidentally add whitespace after the closing tag which would then be output by PHP. This can cause severe problems, for example headers cannot be sent anymore. A simple precaution is to leave off the closing tag as it is not required, and it also has no negative effects whatsoever. ![]() |
|||
689 |
This class, trait or interface has been deprecated. The supplier of the file has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the type will be removed from the class and what other constant to use instead.