1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* patch to keep Snoopy working in PHP8 for now */ |
4
|
|
|
if (!function_exists('each')) { |
5
|
|
|
/** |
6
|
|
|
* @param array $a |
7
|
|
|
* |
8
|
|
|
* @return array|false |
9
|
|
|
*/ |
10
|
|
|
function each(&$a) |
11
|
|
|
{ |
12
|
|
|
$key = key($a); |
13
|
|
|
$value = current($a); |
14
|
|
|
next($a); |
15
|
|
|
return is_null($key) ? false : [1 => $value, 'value' => $value, 0 => $key, 'key' => $key]; |
16
|
|
|
} |
17
|
|
|
} |
18
|
|
|
|
19
|
|
|
/************************************************* |
20
|
|
|
* |
21
|
|
|
* Snoopy - the PHP net client |
22
|
|
|
* Author: Monte Ohrt <[email protected]> |
23
|
|
|
* Copyright (c): 1999-2014, all rights reserved |
24
|
|
|
* Version: 1.2.5 |
25
|
|
|
* This library is free software; you can redistribute it and/or |
26
|
|
|
* modify it under the terms of the GNU Lesser General Public |
27
|
|
|
* License as published by the Free Software Foundation; either |
28
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
29
|
|
|
* |
30
|
|
|
* This library is distributed in the hope that it will be useful, |
31
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
32
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
33
|
|
|
* Lesser General Public License for more details. |
34
|
|
|
* |
35
|
|
|
* You should have received a copy of the GNU Lesser General Public |
36
|
|
|
* License along with this library; if not, write to the Free Software |
37
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
38
|
|
|
* |
39
|
|
|
* You may contact the author of Snoopy by e-mail at: |
40
|
|
|
* [email protected] |
41
|
|
|
* |
42
|
|
|
* The latest version of Snoopy can be obtained from: |
43
|
|
|
* http://snoopy.sourceforge.net/ |
44
|
|
|
* |
45
|
|
|
* @deprecated please, find another way |
46
|
|
|
*************************************************/ |
47
|
|
|
class Snoopy |
48
|
|
|
{ |
49
|
|
|
public function __construct() |
50
|
|
|
{ |
51
|
|
|
$GLOBALS['xoopsLogger']->addDeprecated("Use of Snoopy in XOOPS is deprecated and has been replaced in core with XoopsHttpGet. Snoopy will be removed in future versions.."); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
/**** Public variables ****/ |
55
|
|
|
|
56
|
|
|
/* user definable vars */ |
57
|
|
|
|
58
|
|
|
public $host = "www.php.net"; // host name we are connecting to |
59
|
|
|
public $port = 80; // port we are connecting to |
60
|
|
|
public $proxy_host = ""; // proxy host to use |
61
|
|
|
public $proxy_port = ""; // proxy port to use |
62
|
|
|
public $proxy_user = ""; // proxy user to use |
63
|
|
|
public $proxy_pass = ""; // proxy password to use |
64
|
|
|
|
65
|
|
|
public $agent = "Snoopy v1.2.5"; // agent we masquerade as |
66
|
|
|
public $referer = ""; // referer info to pass |
67
|
|
|
public $cookies = []; // array of cookies to pass |
68
|
|
|
// $cookies["username"]="joe"; |
69
|
|
|
public $rawheaders = []; // array of raw headers to send |
70
|
|
|
// $rawheaders["Content-type"]="text/html"; |
71
|
|
|
|
72
|
|
|
public $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
73
|
|
|
public $lastredirectaddr = ""; // contains address of last redirected address |
74
|
|
|
public $offsiteok = true; // allows redirection off-site |
75
|
|
|
public $maxframes = 0; // frame content depth maximum. 0 = disallow |
76
|
|
|
public $expandlinks = true; // expand links to fully qualified URLs. |
77
|
|
|
// this only applies to fetchlinks() |
78
|
|
|
// submitlinks(), and submittext() |
79
|
|
|
public $passcookies = true; // pass set cookies back through redirects |
80
|
|
|
// NOTE: this currently does not respect |
81
|
|
|
// dates, domains or paths. |
82
|
|
|
|
83
|
|
|
public $user = ""; // user for http authentication |
84
|
|
|
public $pass = ""; // password for http authentication |
85
|
|
|
|
86
|
|
|
// http accept types |
87
|
|
|
public $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
88
|
|
|
|
89
|
|
|
public $results = []; // where the content is put |
90
|
|
|
|
91
|
|
|
public $error = ""; // error messages sent here |
92
|
|
|
public $response_code = ""; // response code returned from server |
93
|
|
|
public $headers = []; // headers returned from server sent here |
94
|
|
|
public $maxlength = 500000; // max return data length (body) |
95
|
|
|
public $read_timeout = 0; // timeout on read operations, in seconds |
96
|
|
|
// supported only since PHP 4 Beta 4 |
97
|
|
|
// set to 0 to disallow timeouts |
98
|
|
|
public $timed_out = false; // if a read operation timed out |
99
|
|
|
public $status = 0; // http request status |
100
|
|
|
|
101
|
|
|
public $temp_dir = "/tmp"; // temporary directory that the webserver |
102
|
|
|
// has permission to write to. |
103
|
|
|
// under Windows, this should be C:\temp |
104
|
|
|
|
105
|
|
|
public $curl_path = "/usr/bin/curl"; |
106
|
|
|
// Snoopy will use cURL for fetching |
107
|
|
|
// SSL content if a full system path to |
108
|
|
|
// the cURL binary is supplied here. |
109
|
|
|
// set to false if you do not have |
110
|
|
|
// cURL installed. See http://curl.haxx.se |
111
|
|
|
// for details on installing cURL. |
112
|
|
|
// Snoopy does *not* use the cURL |
113
|
|
|
// library functions built into php, |
114
|
|
|
// as these functions are not stable |
115
|
|
|
// as of this Snoopy release. |
116
|
|
|
|
117
|
|
|
// send Accept-encoding: gzip? |
118
|
|
|
public $use_gzip = true; |
119
|
|
|
/**** Private variables ****/ |
120
|
|
|
|
121
|
|
|
public $_maxlinelen = 4096; // max line length (headers) |
122
|
|
|
|
123
|
|
|
public $_httpmethod = "GET"; // default http request method |
124
|
|
|
public $_httpversion = "HTTP/1.0"; // default http request version |
125
|
|
|
public $_submit_method = "POST"; // default submit method |
126
|
|
|
public $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
127
|
|
|
public $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
128
|
|
|
public $_redirectaddr = false; // will be set if page fetched is a redirect |
129
|
|
|
public $_redirectdepth = 0; // increments on an http redirect |
130
|
|
|
public $_frameurls = []; // frame src urls |
131
|
|
|
public $_framedepth = 0; // increments on frame depth |
132
|
|
|
|
133
|
|
|
public $_isproxy = false; // set if using a proxy server |
134
|
|
|
public $_fp_timeout = 30; // timeout for socket connection |
135
|
|
|
|
136
|
|
|
/*======================================================================*\ |
137
|
|
|
Function: fetch |
138
|
|
|
Purpose: fetch the contents of a web page |
139
|
|
|
(and possibly other protocols in the |
140
|
|
|
future like ftp, nntp, gopher, etc.) |
141
|
|
|
Input: $URI the location of the page to fetch |
142
|
|
|
Output: $this->results the output text from the fetch |
143
|
|
|
\*======================================================================*/ |
144
|
|
|
|
145
|
|
|
public function fetch($URI) |
146
|
|
|
{ |
147
|
|
|
|
148
|
|
|
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
149
|
|
|
$URI_PARTS = parse_url($URI); |
150
|
|
|
if (!empty($URI_PARTS["user"])) { |
151
|
|
|
$this->user = $URI_PARTS["user"]; |
152
|
|
|
} |
153
|
|
|
if (!empty($URI_PARTS["pass"])) { |
154
|
|
|
$this->pass = $URI_PARTS["pass"]; |
155
|
|
|
} |
156
|
|
|
if (empty($URI_PARTS["query"])) { |
157
|
|
|
$URI_PARTS["query"] = ''; |
158
|
|
|
} |
159
|
|
|
if (empty($URI_PARTS["path"])) { |
160
|
|
|
$URI_PARTS["path"] = ''; |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
switch (strtolower($URI_PARTS["scheme"])) { |
164
|
|
|
case "http": |
165
|
|
|
$this->host = $URI_PARTS["host"]; |
166
|
|
|
if (!empty($URI_PARTS["port"])) { |
167
|
|
|
$this->port = $URI_PARTS["port"]; |
168
|
|
|
} |
169
|
|
|
if ($this->_connect($fp)) { |
170
|
|
|
if ($this->_isproxy) { |
171
|
|
|
// using proxy, send entire URI |
172
|
|
|
$this->_httprequest($URI, $fp, $URI, $this->_httpmethod); |
173
|
|
|
} else { |
174
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
175
|
|
|
// no proxy, send only the path |
176
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
$this->_disconnect($fp); |
180
|
|
|
|
181
|
|
|
if ($this->_redirectaddr) { |
182
|
|
|
/* url was redirected, check if we've hit the max depth */ |
183
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
184
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
185
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
186
|
|
|
/* follow the redirect */ |
187
|
|
|
$this->_redirectdepth++; |
188
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
189
|
|
|
$this->fetch($this->_redirectaddr); |
190
|
|
|
} |
191
|
|
|
} |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
195
|
|
|
$frameurls = $this->_frameurls; |
196
|
|
|
$this->_frameurls = []; |
197
|
|
|
|
198
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
199
|
|
|
if ($this->_framedepth < $this->maxframes) { |
200
|
|
|
$this->fetch($frameurl); |
201
|
|
|
$this->_framedepth++; |
202
|
|
|
} else { |
203
|
|
|
break; |
204
|
|
|
} |
205
|
|
|
} |
206
|
|
|
} |
207
|
|
|
} else { |
208
|
|
|
return false; |
209
|
|
|
} |
210
|
|
|
return true; |
211
|
|
|
break; |
|
|
|
|
212
|
|
|
case "https": |
213
|
|
|
if (!$this->curl_path) { |
214
|
|
|
return false; |
215
|
|
|
} |
216
|
|
|
if (function_exists("is_executable")) { |
217
|
|
|
if (!is_executable($this->curl_path)) { |
218
|
|
|
return false; |
219
|
|
|
} |
220
|
|
|
} |
221
|
|
|
$this->host = $URI_PARTS["host"]; |
222
|
|
|
if (!empty($URI_PARTS["port"])) { |
223
|
|
|
$this->port = $URI_PARTS["port"]; |
224
|
|
|
} |
225
|
|
|
if ($this->_isproxy) { |
226
|
|
|
// using proxy, send entire URI |
227
|
|
|
$this->_httpsrequest($URI, $URI, $this->_httpmethod); |
228
|
|
|
} else { |
229
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
230
|
|
|
// no proxy, send only the path |
231
|
|
|
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
if ($this->_redirectaddr) { |
235
|
|
|
/* url was redirected, check if we've hit the max depth */ |
236
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
237
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
238
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
|
|
|
|
239
|
|
|
/* follow the redirect */ |
240
|
|
|
$this->_redirectdepth++; |
241
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
242
|
|
|
$this->fetch($this->_redirectaddr); |
243
|
|
|
} |
244
|
|
|
} |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
248
|
|
|
$frameurls = $this->_frameurls; |
249
|
|
|
$this->_frameurls = []; |
250
|
|
|
|
251
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
252
|
|
|
if ($this->_framedepth < $this->maxframes) { |
253
|
|
|
$this->fetch($frameurl); |
254
|
|
|
$this->_framedepth++; |
255
|
|
|
} else { |
256
|
|
|
break; |
257
|
|
|
} |
258
|
|
|
} |
259
|
|
|
} |
260
|
|
|
return true; |
261
|
|
|
break; |
262
|
|
|
default: |
263
|
|
|
// not a valid protocol |
264
|
|
|
$this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n'; |
265
|
|
|
return false; |
266
|
|
|
break; |
267
|
|
|
} |
268
|
|
|
return true; |
|
|
|
|
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
/*======================================================================*\ |
272
|
|
|
Function: submit |
273
|
|
|
Purpose: submit an http form |
274
|
|
|
Input: $URI the location to post the data |
275
|
|
|
$formvars the formvars to use. |
276
|
|
|
format: $formvars["var"] = "val"; |
277
|
|
|
$formfiles an array of files to submit |
278
|
|
|
format: $formfiles["var"] = "/dir/filename.ext"; |
279
|
|
|
Output: $this->results the text output from the post |
280
|
|
|
\*======================================================================*/ |
281
|
|
|
|
282
|
|
|
public function submit($URI, $formvars = "", $formfiles = "") |
283
|
|
|
{ |
284
|
|
|
unset($postdata); |
|
|
|
|
285
|
|
|
|
286
|
|
|
$postdata = $this->_prepare_post_body($formvars, $formfiles); |
287
|
|
|
|
288
|
|
|
$URI_PARTS = parse_url($URI); |
289
|
|
|
if (!empty($URI_PARTS["user"])) { |
290
|
|
|
$this->user = $URI_PARTS["user"]; |
291
|
|
|
} |
292
|
|
|
if (!empty($URI_PARTS["pass"])) { |
293
|
|
|
$this->pass = $URI_PARTS["pass"]; |
294
|
|
|
} |
295
|
|
|
if (empty($URI_PARTS["query"])) { |
296
|
|
|
$URI_PARTS["query"] = ''; |
297
|
|
|
} |
298
|
|
|
if (empty($URI_PARTS["path"])) { |
299
|
|
|
$URI_PARTS["path"] = ''; |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
switch (strtolower($URI_PARTS["scheme"])) { |
303
|
|
|
case "http": |
304
|
|
|
$this->host = $URI_PARTS["host"]; |
305
|
|
|
if (!empty($URI_PARTS["port"])) { |
306
|
|
|
$this->port = $URI_PARTS["port"]; |
307
|
|
|
} |
308
|
|
|
if ($this->_connect($fp)) { |
309
|
|
|
if ($this->_isproxy) { |
310
|
|
|
// using proxy, send entire URI |
311
|
|
|
$this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
312
|
|
|
} else { |
313
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
314
|
|
|
// no proxy, send only the path |
315
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
316
|
|
|
} |
317
|
|
|
|
318
|
|
|
$this->_disconnect($fp); |
319
|
|
|
|
320
|
|
|
if ($this->_redirectaddr) { |
321
|
|
|
/* url was redirected, check if we've hit the max depth */ |
322
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
323
|
|
|
if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) { |
324
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]); |
325
|
|
|
} |
326
|
|
|
|
327
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
328
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
329
|
|
|
/* follow the redirect */ |
330
|
|
|
$this->_redirectdepth++; |
331
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
332
|
|
|
if (strpos($this->_redirectaddr, "?") > 0) { |
333
|
|
|
$this->fetch($this->_redirectaddr); |
334
|
|
|
} // the redirect has changed the request method from post to get |
335
|
|
|
else { |
336
|
|
|
$this->submit($this->_redirectaddr, $formvars, $formfiles); |
337
|
|
|
} |
338
|
|
|
} |
339
|
|
|
} |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
343
|
|
|
$frameurls = $this->_frameurls; |
344
|
|
|
$this->_frameurls = []; |
345
|
|
|
|
346
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
347
|
|
|
if ($this->_framedepth < $this->maxframes) { |
348
|
|
|
$this->fetch($frameurl); |
349
|
|
|
$this->_framedepth++; |
350
|
|
|
} else { |
351
|
|
|
break; |
352
|
|
|
} |
353
|
|
|
} |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
} else { |
357
|
|
|
return false; |
358
|
|
|
} |
359
|
|
|
return true; |
360
|
|
|
break; |
|
|
|
|
361
|
|
|
case "https": |
362
|
|
|
if (!$this->curl_path) { |
363
|
|
|
return false; |
364
|
|
|
} |
365
|
|
|
if (function_exists("is_executable")) { |
366
|
|
|
if (!is_executable($this->curl_path)) { |
367
|
|
|
return false; |
368
|
|
|
} |
369
|
|
|
} |
370
|
|
|
$this->host = $URI_PARTS["host"]; |
371
|
|
|
if (!empty($URI_PARTS["port"])) { |
372
|
|
|
$this->port = $URI_PARTS["port"]; |
373
|
|
|
} |
374
|
|
|
if ($this->_isproxy) { |
375
|
|
|
// using proxy, send entire URI |
376
|
|
|
$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
377
|
|
|
} else { |
378
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
379
|
|
|
// no proxy, send only the path |
380
|
|
|
$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
if ($this->_redirectaddr) { |
384
|
|
|
/* url was redirected, check if we've hit the max depth */ |
385
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
386
|
|
|
if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) { |
|
|
|
|
387
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]); |
388
|
|
|
} |
389
|
|
|
|
390
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
391
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
392
|
|
|
/* follow the redirect */ |
393
|
|
|
$this->_redirectdepth++; |
394
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
395
|
|
|
if (strpos($this->_redirectaddr, "?") > 0) { |
|
|
|
|
396
|
|
|
$this->fetch($this->_redirectaddr); |
397
|
|
|
} // the redirect has changed the request method from post to get |
398
|
|
|
else { |
399
|
|
|
$this->submit($this->_redirectaddr, $formvars, $formfiles); |
400
|
|
|
} |
401
|
|
|
} |
402
|
|
|
} |
403
|
|
|
} |
404
|
|
|
|
405
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
406
|
|
|
$frameurls = $this->_frameurls; |
407
|
|
|
$this->_frameurls = []; |
408
|
|
|
|
409
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
410
|
|
|
if ($this->_framedepth < $this->maxframes) { |
411
|
|
|
$this->fetch($frameurl); |
412
|
|
|
$this->_framedepth++; |
413
|
|
|
} else { |
414
|
|
|
break; |
415
|
|
|
} |
416
|
|
|
} |
417
|
|
|
} |
418
|
|
|
return true; |
419
|
|
|
break; |
420
|
|
|
|
421
|
|
|
default: |
422
|
|
|
// not a valid protocol |
423
|
|
|
$this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n'; |
424
|
|
|
return false; |
425
|
|
|
break; |
426
|
|
|
} |
427
|
|
|
return true; |
|
|
|
|
428
|
|
|
} |
429
|
|
|
|
430
|
|
|
/*======================================================================*\ |
431
|
|
|
Function: fetchlinks |
432
|
|
|
Purpose: fetch the links from a web page |
433
|
|
|
Input: $URI where you are fetching from |
434
|
|
|
Output: $this->results an array of the URLs |
435
|
|
|
\*======================================================================*/ |
436
|
|
|
|
437
|
|
|
public function fetchlinks($URI) |
438
|
|
|
{ |
439
|
|
|
if ($this->fetch($URI)) { |
440
|
|
|
if ($this->lastredirectaddr) { |
441
|
|
|
$URI = $this->lastredirectaddr; |
442
|
|
|
} |
443
|
|
|
if (is_array($this->results)) { |
|
|
|
|
444
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
445
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
446
|
|
|
} |
447
|
|
|
} else { |
448
|
|
|
$this->results = $this->_striplinks($this->results); |
449
|
|
|
} |
450
|
|
|
|
451
|
|
|
if ($this->expandlinks) { |
452
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
453
|
|
|
} |
454
|
|
|
return true; |
455
|
|
|
} else { |
456
|
|
|
return false; |
457
|
|
|
} |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
/*======================================================================*\ |
461
|
|
|
Function: fetchform |
462
|
|
|
Purpose: fetch the form elements from a web page |
463
|
|
|
Input: $URI where you are fetching from |
464
|
|
|
Output: $this->results the resulting html form |
465
|
|
|
\*======================================================================*/ |
466
|
|
|
|
467
|
|
|
public function fetchform($URI) |
468
|
|
|
{ |
469
|
|
|
|
470
|
|
|
if ($this->fetch($URI)) { |
471
|
|
|
|
472
|
|
|
if (is_array($this->results)) { |
|
|
|
|
473
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
474
|
|
|
$this->results[$x] = $this->_stripform($this->results[$x]); |
475
|
|
|
} |
476
|
|
|
} else { |
477
|
|
|
$this->results = $this->_stripform($this->results); |
478
|
|
|
} |
479
|
|
|
|
480
|
|
|
return true; |
481
|
|
|
} else { |
482
|
|
|
return false; |
483
|
|
|
} |
484
|
|
|
} |
485
|
|
|
|
486
|
|
|
|
487
|
|
|
/*======================================================================*\ |
488
|
|
|
Function: fetchtext |
489
|
|
|
Purpose: fetch the text from a web page, stripping the links |
490
|
|
|
Input: $URI where you are fetching from |
491
|
|
|
Output: $this->results the text from the web page |
492
|
|
|
\*======================================================================*/ |
493
|
|
|
|
494
|
|
|
public function fetchtext($URI) |
495
|
|
|
{ |
496
|
|
|
if ($this->fetch($URI)) { |
497
|
|
|
if (is_array($this->results)) { |
|
|
|
|
498
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
499
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
500
|
|
|
} |
501
|
|
|
} else { |
502
|
|
|
$this->results = $this->_striptext($this->results); |
503
|
|
|
} |
504
|
|
|
return true; |
505
|
|
|
} else { |
506
|
|
|
return false; |
507
|
|
|
} |
508
|
|
|
} |
509
|
|
|
|
510
|
|
|
/*======================================================================*\ |
511
|
|
|
Function: submitlinks |
512
|
|
|
Purpose: grab links from a form submission |
513
|
|
|
Input: $URI where you are submitting from |
514
|
|
|
Output: $this->results an array of the links from the post |
515
|
|
|
\*======================================================================*/ |
516
|
|
|
|
517
|
|
|
public function submitlinks($URI, $formvars = "", $formfiles = "") |
518
|
|
|
{ |
519
|
|
|
if ($this->submit($URI, $formvars, $formfiles)) { |
520
|
|
|
if ($this->lastredirectaddr) { |
521
|
|
|
$URI = $this->lastredirectaddr; |
522
|
|
|
} |
523
|
|
|
if (is_array($this->results)) { |
|
|
|
|
524
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
525
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
526
|
|
|
if ($this->expandlinks) { |
527
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x], $URI); |
528
|
|
|
} |
529
|
|
|
} |
530
|
|
|
} else { |
531
|
|
|
$this->results = $this->_striplinks($this->results); |
532
|
|
|
if ($this->expandlinks) { |
533
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
534
|
|
|
} |
535
|
|
|
} |
536
|
|
|
return true; |
537
|
|
|
} else { |
538
|
|
|
return false; |
539
|
|
|
} |
540
|
|
|
} |
541
|
|
|
|
542
|
|
|
/*======================================================================*\ |
543
|
|
|
Function: submittext |
544
|
|
|
Purpose: grab text from a form submission |
545
|
|
|
Input: $URI where you are submitting from |
546
|
|
|
Output: $this->results the text from the web page |
547
|
|
|
\*======================================================================*/ |
548
|
|
|
|
549
|
|
|
public function submittext($URI, $formvars = "", $formfiles = "") |
550
|
|
|
{ |
551
|
|
|
if ($this->submit($URI, $formvars, $formfiles)) { |
552
|
|
|
if ($this->lastredirectaddr) { |
553
|
|
|
$URI = $this->lastredirectaddr; |
554
|
|
|
} |
555
|
|
|
if (is_array($this->results)) { |
|
|
|
|
556
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
557
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
558
|
|
|
if ($this->expandlinks) { |
559
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x], $URI); |
560
|
|
|
} |
561
|
|
|
} |
562
|
|
|
} else { |
563
|
|
|
$this->results = $this->_striptext($this->results); |
564
|
|
|
if ($this->expandlinks) { |
565
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
566
|
|
|
} |
567
|
|
|
} |
568
|
|
|
return true; |
569
|
|
|
} else { |
570
|
|
|
return false; |
571
|
|
|
} |
572
|
|
|
} |
573
|
|
|
|
574
|
|
|
|
575
|
|
|
/*======================================================================*\ |
576
|
|
|
Function: set_submit_multipart |
577
|
|
|
Purpose: Set the form submission content type to |
578
|
|
|
multipart/form-data |
579
|
|
|
\*======================================================================*/ |
580
|
|
|
public function set_submit_multipart() |
581
|
|
|
{ |
582
|
|
|
$this->_submit_type = "multipart/form-data"; |
583
|
|
|
} |
584
|
|
|
|
585
|
|
|
|
586
|
|
|
/*======================================================================*\ |
587
|
|
|
Function: set_submit_normal |
588
|
|
|
Purpose: Set the form submission content type to |
589
|
|
|
application/x-www-form-urlencoded |
590
|
|
|
\*======================================================================*/ |
591
|
|
|
public function set_submit_normal() |
592
|
|
|
{ |
593
|
|
|
$this->_submit_type = "application/x-www-form-urlencoded"; |
594
|
|
|
} |
595
|
|
|
|
596
|
|
|
|
597
|
|
|
|
598
|
|
|
|
599
|
|
|
/*======================================================================*\ |
600
|
|
|
Private functions |
601
|
|
|
\*======================================================================*/ |
602
|
|
|
|
603
|
|
|
|
604
|
|
|
/*======================================================================*\ |
605
|
|
|
Function: _striplinks |
606
|
|
|
Purpose: strip the hyperlinks from an HTML document |
607
|
|
|
Input: $document document to strip. |
608
|
|
|
Output: $match an array of the links |
609
|
|
|
\*======================================================================*/ |
610
|
|
|
|
611
|
|
|
public function _striplinks($document) |
612
|
|
|
{ |
613
|
|
|
preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
614
|
|
|
([\"\'])? # find single or double quote |
615
|
|
|
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
616
|
|
|
# quote, otherwise match up to next space |
617
|
|
|
'isx", $document, $links); |
618
|
|
|
|
619
|
|
|
|
620
|
|
|
// catenate the non-empty matches from the conditional subpattern |
621
|
|
|
|
622
|
|
|
while (list($key, $val) = each($links[2])) { |
623
|
|
|
if (!empty($val)) { |
624
|
|
|
$match[] = $val; |
625
|
|
|
} |
626
|
|
|
} |
627
|
|
|
|
628
|
|
|
while (list($key, $val) = each($links[3])) { |
629
|
|
|
if (!empty($val)) { |
630
|
|
|
$match[] = $val; |
631
|
|
|
} |
632
|
|
|
} |
633
|
|
|
|
634
|
|
|
// return the links |
635
|
|
|
return $match; |
|
|
|
|
636
|
|
|
} |
637
|
|
|
|
638
|
|
|
/*======================================================================*\ |
639
|
|
|
Function: _stripform |
640
|
|
|
Purpose: strip the form elements from an HTML document |
641
|
|
|
Input: $document document to strip. |
642
|
|
|
Output: $match an array of the links |
643
|
|
|
\*======================================================================*/ |
644
|
|
|
|
645
|
|
|
public function _stripform($document) |
646
|
|
|
{ |
647
|
|
|
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements); |
648
|
|
|
|
649
|
|
|
// catenate the matches |
650
|
|
|
$match = implode("\r\n", $elements[0]); |
651
|
|
|
|
652
|
|
|
// return the links |
653
|
|
|
return $match; |
654
|
|
|
} |
655
|
|
|
|
656
|
|
|
|
657
|
|
|
/*======================================================================*\ |
658
|
|
|
Function: _striptext |
659
|
|
|
Purpose: strip the text from an HTML document |
660
|
|
|
Input: $document document to strip. |
661
|
|
|
Output: $text the resulting text |
662
|
|
|
\*======================================================================*/ |
663
|
|
|
|
664
|
|
|
public function _striptext($document) |
665
|
|
|
{ |
666
|
|
|
|
667
|
|
|
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
668
|
|
|
// so, list your entities one by one here. I included some of the |
669
|
|
|
// more common ones. |
670
|
|
|
|
671
|
|
|
$search = ["'<script[^>]*?>.*?</script>'si", // strip out javascript |
672
|
|
|
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
673
|
|
|
"'([\r\n])[\s]+'", // strip out white space |
674
|
|
|
"'&(quot|#34|#034|#x22);'i", // replace html entities |
675
|
|
|
"'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
676
|
|
|
"'&(lt|#60|#060|#x3c);'i", |
677
|
|
|
"'&(gt|#62|#062|#x3e);'i", |
678
|
|
|
"'&(nbsp|#160|#xa0);'i", |
679
|
|
|
"'&(iexcl|#161);'i", |
680
|
|
|
"'&(cent|#162);'i", |
681
|
|
|
"'&(pound|#163);'i", |
682
|
|
|
"'&(copy|#169);'i", |
683
|
|
|
"'&(reg|#174);'i", |
684
|
|
|
"'&(deg|#176);'i", |
685
|
|
|
"'&(#39|#039|#x27);'", |
686
|
|
|
"'&(euro|#8364);'i", // europe |
687
|
|
|
"'&a(uml|UML);'", // german |
688
|
|
|
"'&o(uml|UML);'", |
689
|
|
|
"'&u(uml|UML);'", |
690
|
|
|
"'&A(uml|UML);'", |
691
|
|
|
"'&O(uml|UML);'", |
692
|
|
|
"'&U(uml|UML);'", |
693
|
|
|
"'ß'i", |
694
|
|
|
]; |
695
|
|
|
$replace = ["", |
696
|
|
|
"", |
697
|
|
|
"\\1", |
698
|
|
|
"\"", |
699
|
|
|
"&", |
700
|
|
|
"<", |
701
|
|
|
">", |
702
|
|
|
" ", |
703
|
|
|
chr(161), |
704
|
|
|
chr(162), |
705
|
|
|
chr(163), |
706
|
|
|
chr(169), |
707
|
|
|
chr(174), |
708
|
|
|
chr(176), |
709
|
|
|
chr(39), |
710
|
|
|
chr(128), |
711
|
|
|
"ä", |
712
|
|
|
"ö", |
713
|
|
|
"ü", |
714
|
|
|
"Ä", |
715
|
|
|
"Ö", |
716
|
|
|
"Ü", |
717
|
|
|
"ß", |
718
|
|
|
]; |
719
|
|
|
|
720
|
|
|
$text = preg_replace($search, $replace, $document); |
721
|
|
|
|
722
|
|
|
return $text; |
723
|
|
|
} |
724
|
|
|
|
725
|
|
|
/*======================================================================*\ |
726
|
|
|
Function: _expandlinks |
727
|
|
|
Purpose: expand each link into a fully qualified URL |
728
|
|
|
Input: $links the links to qualify |
729
|
|
|
$URI the full URI to get the base from |
730
|
|
|
Output: $expandedLinks the expanded links |
731
|
|
|
\*======================================================================*/ |
732
|
|
|
|
733
|
|
|
public function _expandlinks($links, $URI) |
734
|
|
|
{ |
735
|
|
|
|
736
|
|
|
preg_match("/^[^\?]+/", $URI, $match); |
737
|
|
|
|
738
|
|
|
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]); |
739
|
|
|
$match = preg_replace("|/$|", "", $match); |
740
|
|
|
$match_part = parse_url($match); |
741
|
|
|
$match_root = |
742
|
|
|
$match_part["scheme"] . "://" . $match_part["host"]; |
743
|
|
|
|
744
|
|
|
$search = ["|^http://" . preg_quote($this->host) . "|i", "|^(\/)|i", "|^(?!http://)(?!mailto:)|i", "|/\./|", "|/[^\/]+/\.\./|"]; |
745
|
|
|
|
746
|
|
|
$replace = ["", $match_root . "/", $match . "/", "/", "/"]; |
747
|
|
|
|
748
|
|
|
$expandedLinks = preg_replace($search, $replace, $links); |
749
|
|
|
|
750
|
|
|
return $expandedLinks; |
751
|
|
|
} |
752
|
|
|
|
753
|
|
|
/*======================================================================*\ |
754
|
|
|
Function: _httprequest |
755
|
|
|
Purpose: go get the http data from the server |
756
|
|
|
Input: $url the url to fetch |
757
|
|
|
$fp the current open file pointer |
758
|
|
|
$URI the full URI |
759
|
|
|
$body body contents to send if any (POST) |
760
|
|
|
Output: |
761
|
|
|
\*======================================================================*/ |
762
|
|
|
|
763
|
|
|
public function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "") |
764
|
|
|
{ |
765
|
|
|
$cookie_headers = ''; |
766
|
|
|
if ($this->passcookies && $this->_redirectaddr) { |
767
|
|
|
$this->setcookies(); |
768
|
|
|
} |
769
|
|
|
|
770
|
|
|
$URI_PARTS = parse_url($URI); |
771
|
|
|
if (empty($url)) { |
772
|
|
|
$url = "/"; |
773
|
|
|
} |
774
|
|
|
$headers = $http_method . " " . $url . " " . $this->_httpversion . "\r\n"; |
775
|
|
|
if (!empty($this->agent)) { |
776
|
|
|
$headers .= "User-Agent: " . $this->agent . "\r\n"; |
777
|
|
|
} |
778
|
|
|
if (!empty($this->host) && !isset($this->rawheaders['Host'])) { |
779
|
|
|
$headers .= "Host: " . $this->host; |
780
|
|
|
if (!empty($this->port) && $this->port != '80') { |
781
|
|
|
$headers .= ":" . $this->port; |
782
|
|
|
} |
783
|
|
|
$headers .= "\r\n"; |
784
|
|
|
} |
785
|
|
|
if (!empty($this->accept)) { |
786
|
|
|
$headers .= "Accept: " . $this->accept . "\r\n"; |
787
|
|
|
} |
788
|
|
|
if ($this->use_gzip) { |
789
|
|
|
// make sure PHP was built with --with-zlib |
790
|
|
|
// and we can handle gzipp'ed data |
791
|
|
|
if (function_exists('gzinflate')) { |
792
|
|
|
$headers .= "Accept-encoding: gzip\r\n"; |
793
|
|
|
} else { |
794
|
|
|
trigger_error( |
795
|
|
|
"use_gzip is on, but PHP was built without zlib support." . |
796
|
|
|
" Requesting file(s) without gzip encoding.", |
797
|
|
|
E_USER_NOTICE, |
798
|
|
|
); |
799
|
|
|
} |
800
|
|
|
} |
801
|
|
|
if (!empty($this->referer)) { |
802
|
|
|
$headers .= "Referer: " . $this->referer . "\r\n"; |
803
|
|
|
} |
804
|
|
|
if (!empty($this->cookies)) { |
805
|
|
|
if (!is_array($this->cookies)) { |
|
|
|
|
806
|
|
|
$this->cookies = (array) $this->cookies; |
807
|
|
|
} |
808
|
|
|
|
809
|
|
|
reset($this->cookies); |
810
|
|
|
if (count($this->cookies) > 0) { |
811
|
|
|
$cookie_headers .= 'Cookie: '; |
812
|
|
|
foreach ($this->cookies as $cookieKey => $cookieVal) { |
813
|
|
|
$cookie_headers .= $cookieKey . "=" . urlencode($cookieVal) . "; "; |
814
|
|
|
} |
815
|
|
|
$headers .= substr($cookie_headers, 0, -2) . "\r\n"; |
816
|
|
|
} |
817
|
|
|
} |
818
|
|
|
if (!empty($this->rawheaders)) { |
819
|
|
|
if (!is_array($this->rawheaders)) { |
|
|
|
|
820
|
|
|
$this->rawheaders = (array) $this->rawheaders; |
821
|
|
|
} |
822
|
|
|
while (list($headerKey, $headerVal) = each($this->rawheaders)) { |
823
|
|
|
$headers .= $headerKey . ": " . $headerVal . "\r\n"; |
824
|
|
|
} |
825
|
|
|
} |
826
|
|
|
if (!empty($content_type)) { |
827
|
|
|
$headers .= "Content-type: $content_type"; |
828
|
|
|
if ($content_type == "multipart/form-data") { |
829
|
|
|
$headers .= "; boundary=" . $this->_mime_boundary; |
830
|
|
|
} |
831
|
|
|
$headers .= "\r\n"; |
832
|
|
|
} |
833
|
|
|
if (!empty($body)) { |
834
|
|
|
$headers .= "Content-length: " . strlen($body) . "\r\n"; |
835
|
|
|
} |
836
|
|
|
if (!empty($this->user) || !empty($this->pass)) { |
837
|
|
|
$headers .= "Authorization: Basic " . base64_encode($this->user . ":" . $this->pass) . "\r\n"; |
838
|
|
|
} |
839
|
|
|
|
840
|
|
|
//add proxy auth headers |
841
|
|
|
if (!empty($this->proxy_user)) { |
842
|
|
|
$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass) . "\r\n"; |
843
|
|
|
} |
844
|
|
|
|
845
|
|
|
|
846
|
|
|
$headers .= "\r\n"; |
847
|
|
|
|
848
|
|
|
// set the read timeout if needed |
849
|
|
|
if ($this->read_timeout > 0) { |
850
|
|
|
socket_set_timeout($fp, $this->read_timeout); |
851
|
|
|
} |
852
|
|
|
$this->timed_out = false; |
853
|
|
|
|
854
|
|
|
fwrite($fp, $headers . $body, strlen($headers . $body)); |
855
|
|
|
|
856
|
|
|
$this->_redirectaddr = false; |
857
|
|
|
unset($this->headers); |
858
|
|
|
|
859
|
|
|
// content was returned gzip encoded? |
860
|
|
|
$is_gzipped = false; |
861
|
|
|
|
862
|
|
|
while ($currentHeader = fgets($fp, $this->_maxlinelen)) { |
863
|
|
|
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { |
864
|
|
|
$this->status = -100; |
865
|
|
|
return false; |
866
|
|
|
} |
867
|
|
|
|
868
|
|
|
if ($currentHeader == "\r\n") { |
869
|
|
|
break; |
870
|
|
|
} |
871
|
|
|
|
872
|
|
|
// if a header begins with Location: or URI:, set the redirect |
873
|
|
|
if (preg_match("/^(Location:|URI:)/i", $currentHeader)) { |
874
|
|
|
// get URL portion of the redirect |
875
|
|
|
preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches); |
876
|
|
|
// look for :// in the Location header to see if hostname is included |
877
|
|
|
if (!preg_match("|\:\/\/|", $matches[2])) { |
878
|
|
|
// no host in the path, so prepend |
879
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port; |
880
|
|
|
// eliminate double slash |
881
|
|
|
if (!preg_match("|^/|", $matches[2])) { |
882
|
|
|
$this->_redirectaddr .= "/" . $matches[2]; |
883
|
|
|
} else { |
884
|
|
|
$this->_redirectaddr .= $matches[2]; |
885
|
|
|
} |
886
|
|
|
} else { |
887
|
|
|
$this->_redirectaddr = $matches[2]; |
888
|
|
|
} |
889
|
|
|
} |
890
|
|
|
|
891
|
|
|
if (preg_match("|^HTTP/|", $currentHeader)) { |
892
|
|
|
if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) { |
893
|
|
|
$this->status = $status[1]; |
894
|
|
|
} |
895
|
|
|
$this->response_code = $currentHeader; |
896
|
|
|
} |
897
|
|
|
|
898
|
|
|
if (preg_match("/Content-Encoding: gzip/", $currentHeader)) { |
899
|
|
|
$is_gzipped = true; |
900
|
|
|
} |
901
|
|
|
|
902
|
|
|
$this->headers[] = $currentHeader; |
903
|
|
|
} |
904
|
|
|
|
905
|
|
|
$results = ''; |
906
|
|
|
do { |
907
|
|
|
$_data = fread($fp, $this->maxlength); |
908
|
|
|
if (strlen($_data) == 0) { |
909
|
|
|
break; |
910
|
|
|
} |
911
|
|
|
$results .= $_data; |
912
|
|
|
} while (true); |
913
|
|
|
|
914
|
|
|
// gunzip |
915
|
|
|
if ($is_gzipped) { |
916
|
|
|
// per https://www.php.net/manual/en/function.gzencode.php |
917
|
|
|
$results = substr($results, 10); |
918
|
|
|
$results = gzinflate($results); |
919
|
|
|
} |
920
|
|
|
|
921
|
|
|
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { |
922
|
|
|
$this->status = -100; |
923
|
|
|
return false; |
924
|
|
|
} |
925
|
|
|
|
926
|
|
|
// check if there is a a redirect meta tag |
927
|
|
|
|
928
|
|
|
if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) { |
929
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1], $URI); |
930
|
|
|
} |
931
|
|
|
|
932
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
933
|
|
|
if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) { |
934
|
|
|
$this->results[] = $results; |
935
|
|
|
for ($x = 0; $x < count($match[1]); $x++) { |
|
|
|
|
936
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host); |
937
|
|
|
} |
938
|
|
|
} // have we already fetched framed content? |
939
|
|
|
elseif (is_array($this->results)) { |
|
|
|
|
940
|
|
|
$this->results[] = $results; |
941
|
|
|
} |
942
|
|
|
// no framed content |
943
|
|
|
else { |
944
|
|
|
$this->results = $results; |
945
|
|
|
} |
946
|
|
|
|
947
|
|
|
return true; |
948
|
|
|
} |
949
|
|
|
|
950
|
|
|
/*======================================================================*\ |
951
|
|
|
Function: _httpsrequest |
952
|
|
|
Purpose: go get the https data from the server using curl |
953
|
|
|
Input: $url the url to fetch |
954
|
|
|
$URI the full URI |
955
|
|
|
$body body contents to send if any (POST) |
956
|
|
|
Output: |
957
|
|
|
\*======================================================================*/ |
958
|
|
|
|
959
|
|
|
public function _httpsrequest($url, $URI, $http_method, $content_type = "", $body = "") |
|
|
|
|
960
|
|
|
{ |
961
|
|
|
if ($this->passcookies && $this->_redirectaddr) { |
962
|
|
|
$this->setcookies(); |
963
|
|
|
} |
964
|
|
|
|
965
|
|
|
$headers = []; |
966
|
|
|
|
967
|
|
|
$URI_PARTS = parse_url($URI); |
968
|
|
|
if (empty($url)) { |
969
|
|
|
$url = "/"; |
|
|
|
|
970
|
|
|
} |
971
|
|
|
// GET ... header not needed for curl |
972
|
|
|
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
973
|
|
|
if (!empty($this->agent)) { |
974
|
|
|
$headers[] = "User-Agent: " . $this->agent; |
975
|
|
|
} |
976
|
|
|
if (!empty($this->host)) { |
977
|
|
|
if (!empty($this->port)) { |
978
|
|
|
$headers[] = "Host: " . $this->host . ":" . $this->port; |
979
|
|
|
} else { |
980
|
|
|
$headers[] = "Host: " . $this->host; |
981
|
|
|
} |
982
|
|
|
} |
983
|
|
|
if (!empty($this->accept)) { |
984
|
|
|
$headers[] = "Accept: " . $this->accept; |
985
|
|
|
} |
986
|
|
|
if (!empty($this->referer)) { |
987
|
|
|
$headers[] = "Referer: " . $this->referer; |
988
|
|
|
} |
989
|
|
|
if (!empty($this->cookies)) { |
990
|
|
|
if (!is_array($this->cookies)) { |
|
|
|
|
991
|
|
|
$this->cookies = (array) $this->cookies; |
992
|
|
|
} |
993
|
|
|
|
994
|
|
|
reset($this->cookies); |
995
|
|
|
if (count($this->cookies) > 0) { |
996
|
|
|
$cookie_str = 'Cookie: '; |
997
|
|
|
foreach ($this->cookies as $cookieKey => $cookieVal) { |
998
|
|
|
$cookie_str .= $cookieKey . "=" . urlencode($cookieVal) . "; "; |
999
|
|
|
} |
1000
|
|
|
$headers[] = substr($cookie_str, 0, -2); |
1001
|
|
|
} |
1002
|
|
|
} |
1003
|
|
|
if (!empty($this->rawheaders)) { |
1004
|
|
|
if (!is_array($this->rawheaders)) { |
|
|
|
|
1005
|
|
|
$this->rawheaders = (array) $this->rawheaders; |
1006
|
|
|
} |
1007
|
|
|
while (list($headerKey, $headerVal) = each($this->rawheaders)) { |
1008
|
|
|
$headers[] = $headerKey . ": " . $headerVal; |
1009
|
|
|
} |
1010
|
|
|
} |
1011
|
|
|
if (!empty($content_type)) { |
1012
|
|
|
if ($content_type == "multipart/form-data") { |
1013
|
|
|
$headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary; |
1014
|
|
|
} else { |
1015
|
|
|
$headers[] = "Content-type: $content_type"; |
1016
|
|
|
} |
1017
|
|
|
} |
1018
|
|
|
if (!empty($body)) { |
1019
|
|
|
$headers[] = "Content-length: " . strlen($body); |
1020
|
|
|
} |
1021
|
|
|
if (!empty($this->user) || !empty($this->pass)) { |
1022
|
|
|
$headers[] = "Authorization: BASIC " . base64_encode($this->user . ":" . $this->pass); |
1023
|
|
|
} |
1024
|
|
|
|
1025
|
|
|
for ($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
|
|
|
|
1026
|
|
|
$safer_header = strtr($headers[$curr_header], "\"", " "); |
1027
|
|
|
$cmdline_params .= " -H \"" . $safer_header . "\""; |
|
|
|
|
1028
|
|
|
} |
1029
|
|
|
|
1030
|
|
|
if (!empty($body)) { |
1031
|
|
|
$cmdline_params .= " -d \"$body\""; |
1032
|
|
|
} |
1033
|
|
|
|
1034
|
|
|
if ($this->read_timeout > 0) { |
1035
|
|
|
$cmdline_params .= " -m " . $this->read_timeout; |
1036
|
|
|
} |
1037
|
|
|
|
1038
|
|
|
$headerfile = tempnam($temp_dir, "sno"); |
|
|
|
|
1039
|
|
|
|
1040
|
|
|
exec($this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " " . escapeshellarg($URI), $results, $return); |
1041
|
|
|
|
1042
|
|
|
if ($return) { |
1043
|
|
|
$this->error = "Error: cURL could not retrieve the document, error $return."; |
1044
|
|
|
return false; |
1045
|
|
|
} |
1046
|
|
|
|
1047
|
|
|
|
1048
|
|
|
$results = implode("\r\n", $results); |
1049
|
|
|
|
1050
|
|
|
$result_headers = file((string)$headerfile); |
1051
|
|
|
|
1052
|
|
|
$this->_redirectaddr = false; |
1053
|
|
|
unset($this->headers); |
1054
|
|
|
|
1055
|
|
|
for ($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) { |
|
|
|
|
1056
|
|
|
|
1057
|
|
|
// if a header begins with Location: or URI:, set the redirect |
1058
|
|
|
if (preg_match("/^(Location: |URI: )/i", $result_headers[$currentHeader])) { |
1059
|
|
|
// get URL portion of the redirect |
1060
|
|
|
preg_match("/^(Location: |URI:)\s+(.*)/", chop($result_headers[$currentHeader]), $matches); |
1061
|
|
|
// look for :// in the Location header to see if hostname is included |
1062
|
|
|
if (!preg_match("|\:\/\/|", $matches[2])) { |
1063
|
|
|
// no host in the path, so prepend |
1064
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port; |
1065
|
|
|
// eliminate double slash |
1066
|
|
|
if (!preg_match("|^/|", $matches[2])) { |
1067
|
|
|
$this->_redirectaddr .= "/" . $matches[2]; |
1068
|
|
|
} else { |
1069
|
|
|
$this->_redirectaddr .= $matches[2]; |
1070
|
|
|
} |
1071
|
|
|
} else { |
1072
|
|
|
$this->_redirectaddr = $matches[2]; |
1073
|
|
|
} |
1074
|
|
|
} |
1075
|
|
|
|
1076
|
|
|
if (preg_match("|^HTTP/|", $result_headers[$currentHeader])) { |
1077
|
|
|
$this->response_code = $result_headers[$currentHeader]; |
1078
|
|
|
if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match)) { |
1079
|
|
|
$this->status = $match[1]; |
1080
|
|
|
} |
1081
|
|
|
} |
1082
|
|
|
|
1083
|
|
|
$this->headers[] = $result_headers[$currentHeader]; |
1084
|
|
|
} |
1085
|
|
|
|
1086
|
|
|
// check if there is a a redirect meta tag |
1087
|
|
|
|
1088
|
|
|
if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) { |
1089
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1], $URI); |
1090
|
|
|
} |
1091
|
|
|
|
1092
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
1093
|
|
|
if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) { |
1094
|
|
|
$this->results[] = $results; |
1095
|
|
|
for ($x = 0; $x < count($match[1]); $x++) { |
|
|
|
|
1096
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host); |
1097
|
|
|
} |
1098
|
|
|
} // have we already fetched framed content? |
1099
|
|
|
elseif (is_array($this->results)) { |
|
|
|
|
1100
|
|
|
$this->results[] = $results; |
1101
|
|
|
} |
1102
|
|
|
// no framed content |
1103
|
|
|
else { |
1104
|
|
|
$this->results = $results; |
1105
|
|
|
} |
1106
|
|
|
|
1107
|
|
|
unlink((string)$headerfile); |
1108
|
|
|
|
1109
|
|
|
return true; |
1110
|
|
|
} |
1111
|
|
|
|
1112
|
|
|
/*======================================================================*\ |
1113
|
|
|
Function: setcookies() |
1114
|
|
|
Purpose: set cookies for a redirection |
1115
|
|
|
\*======================================================================*/ |
1116
|
|
|
|
1117
|
|
|
public function setcookies() |
1118
|
|
|
{ |
1119
|
|
|
for ($x = 0; $x < count($this->headers); $x++) { |
|
|
|
|
1120
|
|
|
if (preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match)) { |
1121
|
|
|
$this->cookies[$match[1]] = urldecode($match[2]); |
1122
|
|
|
} |
1123
|
|
|
} |
1124
|
|
|
} |
1125
|
|
|
|
1126
|
|
|
|
1127
|
|
|
/*======================================================================*\ |
1128
|
|
|
Function: _check_timeout |
1129
|
|
|
Purpose: checks whether timeout has occurred |
1130
|
|
|
Input: $fp file pointer |
1131
|
|
|
\*======================================================================*/ |
1132
|
|
|
|
1133
|
|
|
public function _check_timeout($fp) |
1134
|
|
|
{ |
1135
|
|
|
if ($this->read_timeout > 0) { |
1136
|
|
|
$fp_status = socket_get_status($fp); |
1137
|
|
|
if ($fp_status["timed_out"]) { |
1138
|
|
|
$this->timed_out = true; |
1139
|
|
|
return true; |
1140
|
|
|
} |
1141
|
|
|
} |
1142
|
|
|
return false; |
1143
|
|
|
} |
1144
|
|
|
|
1145
|
|
|
/*======================================================================*\ |
1146
|
|
|
Function: _connect |
1147
|
|
|
Purpose: make a socket connection |
1148
|
|
|
Input: $fp file pointer |
1149
|
|
|
\*======================================================================*/ |
1150
|
|
|
|
1151
|
|
|
public function _connect(&$fp) |
1152
|
|
|
{ |
1153
|
|
|
if (!empty($this->proxy_host) && !empty($this->proxy_port)) { |
1154
|
|
|
$this->_isproxy = true; |
1155
|
|
|
|
1156
|
|
|
$host = $this->proxy_host; |
1157
|
|
|
$port = $this->proxy_port; |
1158
|
|
|
} else { |
1159
|
|
|
$host = $this->host; |
1160
|
|
|
$port = $this->port; |
1161
|
|
|
} |
1162
|
|
|
|
1163
|
|
|
$this->status = 0; |
1164
|
|
|
|
1165
|
|
|
if ($fp = fsockopen( |
1166
|
|
|
$host, |
1167
|
|
|
$port, |
|
|
|
|
1168
|
|
|
$errno, |
1169
|
|
|
$errstr, |
1170
|
|
|
$this->_fp_timeout, |
1171
|
|
|
) |
1172
|
|
|
) { |
1173
|
|
|
// socket connection succeeded |
1174
|
|
|
|
1175
|
|
|
return true; |
1176
|
|
|
} else { |
1177
|
|
|
// socket connection failed |
1178
|
|
|
$this->status = $errno; |
1179
|
|
|
switch ($errno) { |
1180
|
|
|
case -3: |
1181
|
|
|
$this->error = "socket creation failed (-3)"; |
1182
|
|
|
// no break |
1183
|
|
|
case -4: |
1184
|
|
|
$this->error = "dns lookup failure (-4)"; |
1185
|
|
|
// no break |
1186
|
|
|
case -5: |
1187
|
|
|
$this->error = "connection refused or timed out (-5)"; |
1188
|
|
|
// no break |
1189
|
|
|
default: |
1190
|
|
|
$this->error = "connection failed (" . $errno . ")"; |
1191
|
|
|
} |
1192
|
|
|
return false; |
1193
|
|
|
} |
1194
|
|
|
} |
1195
|
|
|
|
1196
|
|
|
/*======================================================================*\ |
1197
|
|
|
Function: _disconnect |
1198
|
|
|
Purpose: disconnect a socket connection |
1199
|
|
|
Input: $fp file pointer |
1200
|
|
|
\*======================================================================*/ |
1201
|
|
|
|
1202
|
|
|
public function _disconnect($fp) |
1203
|
|
|
{ |
1204
|
|
|
return (fclose($fp)); |
1205
|
|
|
} |
1206
|
|
|
|
1207
|
|
|
|
1208
|
|
|
/*======================================================================*\ |
1209
|
|
|
Function: _prepare_post_body |
1210
|
|
|
Purpose: Prepare post body according to encoding type |
1211
|
|
|
Input: $formvars - form variables |
1212
|
|
|
$formfiles - form upload files |
1213
|
|
|
Output: post body |
1214
|
|
|
\*======================================================================*/ |
1215
|
|
|
|
1216
|
|
|
public function _prepare_post_body($formvars, $formfiles) |
1217
|
|
|
{ |
1218
|
|
|
$formvars = (array)$formvars; |
1219
|
|
|
$formfiles = (array)$formfiles; |
1220
|
|
|
$postdata = ''; |
1221
|
|
|
|
1222
|
|
|
if (count($formvars) == 0 && count($formfiles) == 0) { |
1223
|
|
|
return; |
1224
|
|
|
} |
1225
|
|
|
|
1226
|
|
|
switch ($this->_submit_type) { |
1227
|
|
|
case "application/x-www-form-urlencoded": |
1228
|
|
|
reset($formvars); |
1229
|
|
|
while (list($key, $val) = each($formvars)) { |
1230
|
|
|
if (is_array($val) || is_object($val)) { |
1231
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
1232
|
|
|
$postdata .= urlencode($key) . "[]=" . urlencode($cur_val) . "&"; |
1233
|
|
|
} |
1234
|
|
|
} else { |
1235
|
|
|
$postdata .= urlencode($key) . "=" . urlencode($val) . "&"; |
1236
|
|
|
} |
1237
|
|
|
} |
1238
|
|
|
break; |
1239
|
|
|
|
1240
|
|
|
case "multipart/form-data": |
1241
|
|
|
$this->_mime_boundary = "Snoopy" . md5(uniqid(microtime())); |
1242
|
|
|
|
1243
|
|
|
reset($formvars); |
1244
|
|
|
while (list($key, $val) = each($formvars)) { |
1245
|
|
|
if (is_array($val) || is_object($val)) { |
1246
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
1247
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1248
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
1249
|
|
|
$postdata .= "$cur_val\r\n"; |
1250
|
|
|
} |
1251
|
|
|
} else { |
1252
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1253
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
1254
|
|
|
$postdata .= "$val\r\n"; |
1255
|
|
|
} |
1256
|
|
|
} |
1257
|
|
|
|
1258
|
|
|
reset($formfiles); |
1259
|
|
|
while (list($field_name, $file_names) = each($formfiles)) { |
1260
|
|
|
$file_names = (array)$file_names; |
1261
|
|
|
while (list(, $file_name) = each($file_names)) { |
1262
|
|
|
if (!is_readable($file_name)) { |
1263
|
|
|
continue; |
1264
|
|
|
} |
1265
|
|
|
|
1266
|
|
|
$fp = fopen($file_name, "r"); |
1267
|
|
|
$file_content = fread($fp, filesize($file_name)); |
1268
|
|
|
fclose($fp); |
1269
|
|
|
$base_name = basename($file_name); |
1270
|
|
|
|
1271
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1272
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
1273
|
|
|
$postdata .= "$file_content\r\n"; |
1274
|
|
|
} |
1275
|
|
|
} |
1276
|
|
|
$postdata .= "--" . $this->_mime_boundary . "--\r\n"; |
1277
|
|
|
break; |
1278
|
|
|
} |
1279
|
|
|
|
1280
|
|
|
return $postdata; |
1281
|
|
|
} |
1282
|
|
|
} |
1283
|
|
|
|
The
break
statement is not necessary if it is preceded for example by areturn
statement:If you would like to keep this construct to be consistent with other
case
statements, you can safely mark this issue as a false-positive.