1
|
|
|
<?php |
2
|
|
|
/* patch to keep Snoopy working in PHP8 for now */ |
3
|
|
|
if (!function_exists('each')) { |
4
|
|
|
/** |
5
|
|
|
* @param array $a |
6
|
|
|
* |
7
|
|
|
* @return array|false |
8
|
|
|
*/ |
9
|
|
|
function each(&$a) |
10
|
|
|
{ |
11
|
|
|
$key = key($a); |
12
|
|
|
$value = current($a); |
13
|
|
|
next($a); |
14
|
|
|
return is_null($key) ? false : array(1 => $value, 'value' => $value, 0 => $key, 'key' => $key); |
15
|
|
|
} |
16
|
|
|
} |
17
|
|
|
|
18
|
|
|
/************************************************* |
19
|
|
|
* |
20
|
|
|
* Snoopy - the PHP net client |
21
|
|
|
* Author: Monte Ohrt <[email protected]> |
22
|
|
|
* Copyright (c): 1999-2014, all rights reserved |
23
|
|
|
* Version: 1.2.5 |
24
|
|
|
* This library is free software; you can redistribute it and/or |
25
|
|
|
* modify it under the terms of the GNU Lesser General Public |
26
|
|
|
* License as published by the Free Software Foundation; either |
27
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
28
|
|
|
* |
29
|
|
|
* This library is distributed in the hope that it will be useful, |
30
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
31
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
32
|
|
|
* Lesser General Public License for more details. |
33
|
|
|
* |
34
|
|
|
* You should have received a copy of the GNU Lesser General Public |
35
|
|
|
* License along with this library; if not, write to the Free Software |
36
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
37
|
|
|
* |
38
|
|
|
* You may contact the author of Snoopy by e-mail at: |
39
|
|
|
* [email protected] |
40
|
|
|
* |
41
|
|
|
* The latest version of Snoopy can be obtained from: |
42
|
|
|
* http://snoopy.sourceforge.net/ |
43
|
|
|
* |
44
|
|
|
* @deprecated please, find another way |
45
|
|
|
*************************************************/ |
46
|
|
|
class Snoopy |
47
|
|
|
{ |
48
|
|
|
public function __construct() |
49
|
|
|
{ |
50
|
|
|
$GLOBALS['xoopsLogger']->addDeprecated("Use of Snoopy in XOOPS is deprecated and has been replaced in core with XoopsHttpGet. Snoopy will be removed in future versions.."); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
/**** Public variables ****/ |
54
|
|
|
|
55
|
|
|
/* user definable vars */ |
56
|
|
|
|
57
|
|
|
var $host = "www.php.net"; // host name we are connecting to |
58
|
|
|
var $port = 80; // port we are connecting to |
59
|
|
|
var $proxy_host = ""; // proxy host to use |
60
|
|
|
var $proxy_port = ""; // proxy port to use |
61
|
|
|
var $proxy_user = ""; // proxy user to use |
62
|
|
|
var $proxy_pass = ""; // proxy password to use |
63
|
|
|
|
64
|
|
|
var $agent = "Snoopy v1.2.5"; // agent we masquerade as |
65
|
|
|
var $referer = ""; // referer info to pass |
66
|
|
|
var $cookies = array(); // array of cookies to pass |
67
|
|
|
// $cookies["username"]="joe"; |
68
|
|
|
var $rawheaders = array(); // array of raw headers to send |
69
|
|
|
// $rawheaders["Content-type"]="text/html"; |
70
|
|
|
|
71
|
|
|
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
72
|
|
|
var $lastredirectaddr = ""; // contains address of last redirected address |
73
|
|
|
var $offsiteok = true; // allows redirection off-site |
74
|
|
|
var $maxframes = 0; // frame content depth maximum. 0 = disallow |
75
|
|
|
var $expandlinks = true; // expand links to fully qualified URLs. |
76
|
|
|
// this only applies to fetchlinks() |
77
|
|
|
// submitlinks(), and submittext() |
78
|
|
|
var $passcookies = true; // pass set cookies back through redirects |
79
|
|
|
// NOTE: this currently does not respect |
80
|
|
|
// dates, domains or paths. |
81
|
|
|
|
82
|
|
|
var $user = ""; // user for http authentication |
83
|
|
|
var $pass = ""; // password for http authentication |
84
|
|
|
|
85
|
|
|
// http accept types |
86
|
|
|
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
87
|
|
|
|
88
|
|
|
var $results = ""; // where the content is put |
89
|
|
|
|
90
|
|
|
var $error = ""; // error messages sent here |
91
|
|
|
var $response_code = ""; // response code returned from server |
92
|
|
|
var $headers = array(); // headers returned from server sent here |
93
|
|
|
var $maxlength = 500000; // max return data length (body) |
94
|
|
|
var $read_timeout = 0; // timeout on read operations, in seconds |
95
|
|
|
// supported only since PHP 4 Beta 4 |
96
|
|
|
// set to 0 to disallow timeouts |
97
|
|
|
var $timed_out = false; // if a read operation timed out |
98
|
|
|
var $status = 0; // http request status |
99
|
|
|
|
100
|
|
|
var $temp_dir = "/tmp"; // temporary directory that the webserver |
101
|
|
|
// has permission to write to. |
102
|
|
|
// under Windows, this should be C:\temp |
103
|
|
|
|
104
|
|
|
var $curl_path = "/usr/bin/curl"; |
105
|
|
|
// Snoopy will use cURL for fetching |
106
|
|
|
// SSL content if a full system path to |
107
|
|
|
// the cURL binary is supplied here. |
108
|
|
|
// set to false if you do not have |
109
|
|
|
// cURL installed. See http://curl.haxx.se |
110
|
|
|
// for details on installing cURL. |
111
|
|
|
// Snoopy does *not* use the cURL |
112
|
|
|
// library functions built into php, |
113
|
|
|
// as these functions are not stable |
114
|
|
|
// as of this Snoopy release. |
115
|
|
|
|
116
|
|
|
// send Accept-encoding: gzip? |
117
|
|
|
var $use_gzip = true; |
118
|
|
|
/**** Private variables ****/ |
119
|
|
|
|
120
|
|
|
var $_maxlinelen = 4096; // max line length (headers) |
121
|
|
|
|
122
|
|
|
var $_httpmethod = "GET"; // default http request method |
123
|
|
|
var $_httpversion = "HTTP/1.0"; // default http request version |
124
|
|
|
var $_submit_method = "POST"; // default submit method |
125
|
|
|
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
126
|
|
|
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
127
|
|
|
var $_redirectaddr = false; // will be set if page fetched is a redirect |
128
|
|
|
var $_redirectdepth = 0; // increments on an http redirect |
129
|
|
|
var $_frameurls = array(); // frame src urls |
130
|
|
|
var $_framedepth = 0; // increments on frame depth |
131
|
|
|
|
132
|
|
|
var $_isproxy = false; // set if using a proxy server |
133
|
|
|
var $_fp_timeout = 30; // timeout for socket connection |
134
|
|
|
|
135
|
|
|
/*======================================================================*\ |
136
|
|
|
Function: fetch |
137
|
|
|
Purpose: fetch the contents of a web page |
138
|
|
|
(and possibly other protocols in the |
139
|
|
|
future like ftp, nntp, gopher, etc.) |
140
|
|
|
Input: $URI the location of the page to fetch |
141
|
|
|
Output: $this->results the output text from the fetch |
142
|
|
|
\*======================================================================*/ |
143
|
|
|
|
144
|
|
|
function fetch($URI) |
|
|
|
|
145
|
|
|
{ |
146
|
|
|
|
147
|
|
|
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
148
|
|
|
$URI_PARTS = parse_url($URI); |
149
|
|
|
if (!empty($URI_PARTS["user"])) |
150
|
|
|
$this->user = $URI_PARTS["user"]; |
151
|
|
|
if (!empty($URI_PARTS["pass"])) |
152
|
|
|
$this->pass = $URI_PARTS["pass"]; |
153
|
|
|
if (empty($URI_PARTS["query"])) |
154
|
|
|
$URI_PARTS["query"] = ''; |
155
|
|
|
if (empty($URI_PARTS["path"])) |
156
|
|
|
$URI_PARTS["path"] = ''; |
157
|
|
|
|
158
|
|
|
switch (strtolower($URI_PARTS["scheme"])) { |
159
|
|
|
case "http": |
160
|
|
|
$this->host = $URI_PARTS["host"]; |
161
|
|
|
if (!empty($URI_PARTS["port"])) |
162
|
|
|
$this->port = $URI_PARTS["port"]; |
163
|
|
|
if ($this->_connect($fp)) { |
164
|
|
|
if ($this->_isproxy) { |
165
|
|
|
// using proxy, send entire URI |
166
|
|
|
$this->_httprequest($URI, $fp, $URI, $this->_httpmethod); |
167
|
|
|
} else { |
168
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
169
|
|
|
// no proxy, send only the path |
170
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
$this->_disconnect($fp); |
174
|
|
|
|
175
|
|
|
if ($this->_redirectaddr) { |
176
|
|
|
/* url was redirected, check if we've hit the max depth */ |
177
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
178
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
179
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
180
|
|
|
/* follow the redirect */ |
181
|
|
|
$this->_redirectdepth++; |
182
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
183
|
|
|
$this->fetch($this->_redirectaddr); |
184
|
|
|
} |
185
|
|
|
} |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
189
|
|
|
$frameurls = $this->_frameurls; |
190
|
|
|
$this->_frameurls = array(); |
191
|
|
|
|
192
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
193
|
|
|
if ($this->_framedepth < $this->maxframes) { |
194
|
|
|
$this->fetch($frameurl); |
195
|
|
|
$this->_framedepth++; |
196
|
|
|
} else |
197
|
|
|
break; |
198
|
|
|
} |
199
|
|
|
} |
200
|
|
|
} else { |
201
|
|
|
return false; |
202
|
|
|
} |
203
|
|
|
return true; |
204
|
|
|
break; |
|
|
|
|
205
|
|
|
case "https": |
206
|
|
|
if (!$this->curl_path) |
207
|
|
|
return false; |
208
|
|
|
if (function_exists("is_executable")) |
209
|
|
|
if (!is_executable($this->curl_path)) |
210
|
|
|
return false; |
211
|
|
|
$this->host = $URI_PARTS["host"]; |
212
|
|
|
if (!empty($URI_PARTS["port"])) |
213
|
|
|
$this->port = $URI_PARTS["port"]; |
214
|
|
|
if ($this->_isproxy) { |
215
|
|
|
// using proxy, send entire URI |
216
|
|
|
$this->_httpsrequest($URI, $URI, $this->_httpmethod); |
217
|
|
|
} else { |
218
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
219
|
|
|
// no proxy, send only the path |
220
|
|
|
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
if ($this->_redirectaddr) { |
224
|
|
|
/* url was redirected, check if we've hit the max depth */ |
225
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
226
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
227
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
|
|
|
|
228
|
|
|
/* follow the redirect */ |
229
|
|
|
$this->_redirectdepth++; |
230
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
231
|
|
|
$this->fetch($this->_redirectaddr); |
232
|
|
|
} |
233
|
|
|
} |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
237
|
|
|
$frameurls = $this->_frameurls; |
238
|
|
|
$this->_frameurls = array(); |
239
|
|
|
|
240
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
241
|
|
|
if ($this->_framedepth < $this->maxframes) { |
242
|
|
|
$this->fetch($frameurl); |
243
|
|
|
$this->_framedepth++; |
244
|
|
|
} else |
245
|
|
|
break; |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
return true; |
249
|
|
|
break; |
250
|
|
|
default: |
251
|
|
|
// not a valid protocol |
252
|
|
|
$this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n'; |
253
|
|
|
return false; |
254
|
|
|
break; |
255
|
|
|
} |
256
|
|
|
return true; |
|
|
|
|
257
|
|
|
} |
258
|
|
|
|
259
|
|
|
/*======================================================================*\ |
260
|
|
|
Function: submit |
261
|
|
|
Purpose: submit an http form |
262
|
|
|
Input: $URI the location to post the data |
263
|
|
|
$formvars the formvars to use. |
264
|
|
|
format: $formvars["var"] = "val"; |
265
|
|
|
$formfiles an array of files to submit |
266
|
|
|
format: $formfiles["var"] = "/dir/filename.ext"; |
267
|
|
|
Output: $this->results the text output from the post |
268
|
|
|
\*======================================================================*/ |
269
|
|
|
|
270
|
|
|
function submit($URI, $formvars = "", $formfiles = "") |
|
|
|
|
271
|
|
|
{ |
272
|
|
|
unset($postdata); |
|
|
|
|
273
|
|
|
|
274
|
|
|
$postdata = $this->_prepare_post_body($formvars, $formfiles); |
275
|
|
|
|
276
|
|
|
$URI_PARTS = parse_url($URI); |
277
|
|
|
if (!empty($URI_PARTS["user"])) |
278
|
|
|
$this->user = $URI_PARTS["user"]; |
279
|
|
|
if (!empty($URI_PARTS["pass"])) |
280
|
|
|
$this->pass = $URI_PARTS["pass"]; |
281
|
|
|
if (empty($URI_PARTS["query"])) |
282
|
|
|
$URI_PARTS["query"] = ''; |
283
|
|
|
if (empty($URI_PARTS["path"])) |
284
|
|
|
$URI_PARTS["path"] = ''; |
285
|
|
|
|
286
|
|
|
switch (strtolower($URI_PARTS["scheme"])) { |
287
|
|
|
case "http": |
288
|
|
|
$this->host = $URI_PARTS["host"]; |
289
|
|
|
if (!empty($URI_PARTS["port"])) |
290
|
|
|
$this->port = $URI_PARTS["port"]; |
291
|
|
|
if ($this->_connect($fp)) { |
292
|
|
|
if ($this->_isproxy) { |
293
|
|
|
// using proxy, send entire URI |
294
|
|
|
$this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
295
|
|
|
} else { |
296
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
297
|
|
|
// no proxy, send only the path |
298
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
$this->_disconnect($fp); |
302
|
|
|
|
303
|
|
|
if ($this->_redirectaddr) { |
304
|
|
|
/* url was redirected, check if we've hit the max depth */ |
305
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
306
|
|
|
if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) |
307
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]); |
308
|
|
|
|
309
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
310
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
311
|
|
|
/* follow the redirect */ |
312
|
|
|
$this->_redirectdepth++; |
313
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
314
|
|
|
if (strpos($this->_redirectaddr, "?") > 0) |
315
|
|
|
$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
316
|
|
|
else |
317
|
|
|
$this->submit($this->_redirectaddr, $formvars, $formfiles); |
318
|
|
|
} |
319
|
|
|
} |
320
|
|
|
} |
321
|
|
|
|
322
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
323
|
|
|
$frameurls = $this->_frameurls; |
324
|
|
|
$this->_frameurls = array(); |
325
|
|
|
|
326
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
327
|
|
|
if ($this->_framedepth < $this->maxframes) { |
328
|
|
|
$this->fetch($frameurl); |
329
|
|
|
$this->_framedepth++; |
330
|
|
|
} else |
331
|
|
|
break; |
332
|
|
|
} |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
} else { |
336
|
|
|
return false; |
337
|
|
|
} |
338
|
|
|
return true; |
339
|
|
|
break; |
|
|
|
|
340
|
|
|
case "https": |
341
|
|
|
if (!$this->curl_path) |
342
|
|
|
return false; |
343
|
|
|
if (function_exists("is_executable")) |
344
|
|
|
if (!is_executable($this->curl_path)) |
345
|
|
|
return false; |
346
|
|
|
$this->host = $URI_PARTS["host"]; |
347
|
|
|
if (!empty($URI_PARTS["port"])) |
348
|
|
|
$this->port = $URI_PARTS["port"]; |
349
|
|
|
if ($this->_isproxy) { |
350
|
|
|
// using proxy, send entire URI |
351
|
|
|
$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
352
|
|
|
} else { |
353
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
354
|
|
|
// no proxy, send only the path |
355
|
|
|
$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
if ($this->_redirectaddr) { |
359
|
|
|
/* url was redirected, check if we've hit the max depth */ |
360
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
361
|
|
|
if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) |
|
|
|
|
362
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]); |
363
|
|
|
|
364
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
365
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
366
|
|
|
/* follow the redirect */ |
367
|
|
|
$this->_redirectdepth++; |
368
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
369
|
|
|
if (strpos($this->_redirectaddr, "?") > 0) |
|
|
|
|
370
|
|
|
$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
371
|
|
|
else |
372
|
|
|
$this->submit($this->_redirectaddr, $formvars, $formfiles); |
373
|
|
|
} |
374
|
|
|
} |
375
|
|
|
} |
376
|
|
|
|
377
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
378
|
|
|
$frameurls = $this->_frameurls; |
379
|
|
|
$this->_frameurls = array(); |
380
|
|
|
|
381
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
382
|
|
|
if ($this->_framedepth < $this->maxframes) { |
383
|
|
|
$this->fetch($frameurl); |
384
|
|
|
$this->_framedepth++; |
385
|
|
|
} else |
386
|
|
|
break; |
387
|
|
|
} |
388
|
|
|
} |
389
|
|
|
return true; |
390
|
|
|
break; |
391
|
|
|
|
392
|
|
|
default: |
393
|
|
|
// not a valid protocol |
394
|
|
|
$this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n'; |
395
|
|
|
return false; |
396
|
|
|
break; |
397
|
|
|
} |
398
|
|
|
return true; |
|
|
|
|
399
|
|
|
} |
400
|
|
|
|
401
|
|
|
/*======================================================================*\ |
402
|
|
|
Function: fetchlinks |
403
|
|
|
Purpose: fetch the links from a web page |
404
|
|
|
Input: $URI where you are fetching from |
405
|
|
|
Output: $this->results an array of the URLs |
406
|
|
|
\*======================================================================*/ |
407
|
|
|
|
408
|
|
|
function fetchlinks($URI) |
|
|
|
|
409
|
|
|
{ |
410
|
|
|
if ($this->fetch($URI)) { |
411
|
|
|
if ($this->lastredirectaddr) |
412
|
|
|
$URI = $this->lastredirectaddr; |
413
|
|
|
if (is_array($this->results)) { |
|
|
|
|
414
|
|
|
for ($x = 0; $x < count($this->results); $x++) |
|
|
|
|
415
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
416
|
|
|
} else |
417
|
|
|
$this->results = $this->_striplinks($this->results); |
418
|
|
|
|
419
|
|
|
if ($this->expandlinks) |
420
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
421
|
|
|
return true; |
422
|
|
|
} else |
423
|
|
|
return false; |
424
|
|
|
} |
425
|
|
|
|
426
|
|
|
/*======================================================================*\ |
427
|
|
|
Function: fetchform |
428
|
|
|
Purpose: fetch the form elements from a web page |
429
|
|
|
Input: $URI where you are fetching from |
430
|
|
|
Output: $this->results the resulting html form |
431
|
|
|
\*======================================================================*/ |
432
|
|
|
|
433
|
|
|
function fetchform($URI) |
|
|
|
|
434
|
|
|
{ |
435
|
|
|
|
436
|
|
|
if ($this->fetch($URI)) { |
437
|
|
|
|
438
|
|
|
if (is_array($this->results)) { |
|
|
|
|
439
|
|
|
for ($x = 0; $x < count($this->results); $x++) |
|
|
|
|
440
|
|
|
$this->results[$x] = $this->_stripform($this->results[$x]); |
441
|
|
|
} else |
442
|
|
|
$this->results = $this->_stripform($this->results); |
443
|
|
|
|
444
|
|
|
return true; |
445
|
|
|
} else |
446
|
|
|
return false; |
447
|
|
|
} |
448
|
|
|
|
449
|
|
|
|
450
|
|
|
/*======================================================================*\ |
451
|
|
|
Function: fetchtext |
452
|
|
|
Purpose: fetch the text from a web page, stripping the links |
453
|
|
|
Input: $URI where you are fetching from |
454
|
|
|
Output: $this->results the text from the web page |
455
|
|
|
\*======================================================================*/ |
456
|
|
|
|
457
|
|
|
function fetchtext($URI) |
|
|
|
|
458
|
|
|
{ |
459
|
|
|
if ($this->fetch($URI)) { |
460
|
|
|
if (is_array($this->results)) { |
|
|
|
|
461
|
|
|
for ($x = 0; $x < count($this->results); $x++) |
|
|
|
|
462
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
463
|
|
|
} else |
464
|
|
|
$this->results = $this->_striptext($this->results); |
465
|
|
|
return true; |
466
|
|
|
} else |
467
|
|
|
return false; |
468
|
|
|
} |
469
|
|
|
|
470
|
|
|
/*======================================================================*\ |
471
|
|
|
Function: submitlinks |
472
|
|
|
Purpose: grab links from a form submission |
473
|
|
|
Input: $URI where you are submitting from |
474
|
|
|
Output: $this->results an array of the links from the post |
475
|
|
|
\*======================================================================*/ |
476
|
|
|
|
477
|
|
|
function submitlinks($URI, $formvars = "", $formfiles = "") |
|
|
|
|
478
|
|
|
{ |
479
|
|
|
if ($this->submit($URI, $formvars, $formfiles)) { |
480
|
|
|
if ($this->lastredirectaddr) |
481
|
|
|
$URI = $this->lastredirectaddr; |
482
|
|
|
if (is_array($this->results)) { |
|
|
|
|
483
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
484
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
485
|
|
|
if ($this->expandlinks) |
486
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x], $URI); |
487
|
|
|
} |
488
|
|
|
} else { |
489
|
|
|
$this->results = $this->_striplinks($this->results); |
490
|
|
|
if ($this->expandlinks) |
491
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
492
|
|
|
} |
493
|
|
|
return true; |
494
|
|
|
} else |
495
|
|
|
return false; |
496
|
|
|
} |
497
|
|
|
|
498
|
|
|
/*======================================================================*\ |
499
|
|
|
Function: submittext |
500
|
|
|
Purpose: grab text from a form submission |
501
|
|
|
Input: $URI where you are submitting from |
502
|
|
|
Output: $this->results the text from the web page |
503
|
|
|
\*======================================================================*/ |
504
|
|
|
|
505
|
|
|
function submittext($URI, $formvars = "", $formfiles = "") |
|
|
|
|
506
|
|
|
{ |
507
|
|
|
if ($this->submit($URI, $formvars, $formfiles)) { |
508
|
|
|
if ($this->lastredirectaddr) |
509
|
|
|
$URI = $this->lastredirectaddr; |
510
|
|
|
if (is_array($this->results)) { |
|
|
|
|
511
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
512
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
513
|
|
|
if ($this->expandlinks) |
514
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x], $URI); |
515
|
|
|
} |
516
|
|
|
} else { |
517
|
|
|
$this->results = $this->_striptext($this->results); |
518
|
|
|
if ($this->expandlinks) |
519
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
520
|
|
|
} |
521
|
|
|
return true; |
522
|
|
|
} else |
523
|
|
|
return false; |
524
|
|
|
} |
525
|
|
|
|
526
|
|
|
|
527
|
|
|
/*======================================================================*\ |
528
|
|
|
Function: set_submit_multipart |
529
|
|
|
Purpose: Set the form submission content type to |
530
|
|
|
multipart/form-data |
531
|
|
|
\*======================================================================*/ |
532
|
|
|
function set_submit_multipart() |
|
|
|
|
533
|
|
|
{ |
534
|
|
|
$this->_submit_type = "multipart/form-data"; |
535
|
|
|
} |
536
|
|
|
|
537
|
|
|
|
538
|
|
|
/*======================================================================*\ |
539
|
|
|
Function: set_submit_normal |
540
|
|
|
Purpose: Set the form submission content type to |
541
|
|
|
application/x-www-form-urlencoded |
542
|
|
|
\*======================================================================*/ |
543
|
|
|
function set_submit_normal() |
|
|
|
|
544
|
|
|
{ |
545
|
|
|
$this->_submit_type = "application/x-www-form-urlencoded"; |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
|
549
|
|
|
|
550
|
|
|
|
551
|
|
|
/*======================================================================*\ |
552
|
|
|
Private functions |
553
|
|
|
\*======================================================================*/ |
554
|
|
|
|
555
|
|
|
|
556
|
|
|
/*======================================================================*\ |
557
|
|
|
Function: _striplinks |
558
|
|
|
Purpose: strip the hyperlinks from an HTML document |
559
|
|
|
Input: $document document to strip. |
560
|
|
|
Output: $match an array of the links |
561
|
|
|
\*======================================================================*/ |
562
|
|
|
|
563
|
|
|
function _striplinks($document) |
|
|
|
|
564
|
|
|
{ |
565
|
|
|
preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
566
|
|
|
([\"\'])? # find single or double quote |
567
|
|
|
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
568
|
|
|
# quote, otherwise match up to next space |
569
|
|
|
'isx", $document, $links); |
570
|
|
|
|
571
|
|
|
|
572
|
|
|
// catenate the non-empty matches from the conditional subpattern |
573
|
|
|
|
574
|
|
|
while (list($key, $val) = each($links[2])) { |
575
|
|
|
if (!empty($val)) |
576
|
|
|
$match[] = $val; |
577
|
|
|
} |
578
|
|
|
|
579
|
|
|
while (list($key, $val) = each($links[3])) { |
580
|
|
|
if (!empty($val)) |
581
|
|
|
$match[] = $val; |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
// return the links |
585
|
|
|
return $match; |
|
|
|
|
586
|
|
|
} |
587
|
|
|
|
588
|
|
|
/*======================================================================*\ |
589
|
|
|
Function: _stripform |
590
|
|
|
Purpose: strip the form elements from an HTML document |
591
|
|
|
Input: $document document to strip. |
592
|
|
|
Output: $match an array of the links |
593
|
|
|
\*======================================================================*/ |
594
|
|
|
|
595
|
|
|
function _stripform($document) |
|
|
|
|
596
|
|
|
{ |
597
|
|
|
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements); |
598
|
|
|
|
599
|
|
|
// catenate the matches |
600
|
|
|
$match = implode("\r\n", $elements[0]); |
601
|
|
|
|
602
|
|
|
// return the links |
603
|
|
|
return $match; |
604
|
|
|
} |
605
|
|
|
|
606
|
|
|
|
607
|
|
|
/*======================================================================*\ |
608
|
|
|
Function: _striptext |
609
|
|
|
Purpose: strip the text from an HTML document |
610
|
|
|
Input: $document document to strip. |
611
|
|
|
Output: $text the resulting text |
612
|
|
|
\*======================================================================*/ |
613
|
|
|
|
614
|
|
|
function _striptext($document) |
|
|
|
|
615
|
|
|
{ |
616
|
|
|
|
617
|
|
|
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
618
|
|
|
// so, list your entities one by one here. I included some of the |
619
|
|
|
// more common ones. |
620
|
|
|
|
621
|
|
|
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
622
|
|
|
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
623
|
|
|
"'([\r\n])[\s]+'", // strip out white space |
624
|
|
|
"'&(quot|#34|#034|#x22);'i", // replace html entities |
625
|
|
|
"'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
626
|
|
|
"'&(lt|#60|#060|#x3c);'i", |
627
|
|
|
"'&(gt|#62|#062|#x3e);'i", |
628
|
|
|
"'&(nbsp|#160|#xa0);'i", |
629
|
|
|
"'&(iexcl|#161);'i", |
630
|
|
|
"'&(cent|#162);'i", |
631
|
|
|
"'&(pound|#163);'i", |
632
|
|
|
"'&(copy|#169);'i", |
633
|
|
|
"'&(reg|#174);'i", |
634
|
|
|
"'&(deg|#176);'i", |
635
|
|
|
"'&(#39|#039|#x27);'", |
636
|
|
|
"'&(euro|#8364);'i", // europe |
637
|
|
|
"'&a(uml|UML);'", // german |
638
|
|
|
"'&o(uml|UML);'", |
639
|
|
|
"'&u(uml|UML);'", |
640
|
|
|
"'&A(uml|UML);'", |
641
|
|
|
"'&O(uml|UML);'", |
642
|
|
|
"'&U(uml|UML);'", |
643
|
|
|
"'ß'i", |
644
|
|
|
); |
645
|
|
|
$replace = array("", |
646
|
|
|
"", |
647
|
|
|
"\\1", |
648
|
|
|
"\"", |
649
|
|
|
"&", |
650
|
|
|
"<", |
651
|
|
|
">", |
652
|
|
|
" ", |
653
|
|
|
chr(161), |
654
|
|
|
chr(162), |
655
|
|
|
chr(163), |
656
|
|
|
chr(169), |
657
|
|
|
chr(174), |
658
|
|
|
chr(176), |
659
|
|
|
chr(39), |
660
|
|
|
chr(128), |
661
|
|
|
"ä", |
662
|
|
|
"ö", |
663
|
|
|
"ü", |
664
|
|
|
"Ä", |
665
|
|
|
"Ö", |
666
|
|
|
"Ü", |
667
|
|
|
"ß", |
668
|
|
|
); |
669
|
|
|
|
670
|
|
|
$text = preg_replace($search, $replace, $document); |
671
|
|
|
|
672
|
|
|
return $text; |
673
|
|
|
} |
674
|
|
|
|
675
|
|
|
/*======================================================================*\ |
676
|
|
|
Function: _expandlinks |
677
|
|
|
Purpose: expand each link into a fully qualified URL |
678
|
|
|
Input: $links the links to qualify |
679
|
|
|
$URI the full URI to get the base from |
680
|
|
|
Output: $expandedLinks the expanded links |
681
|
|
|
\*======================================================================*/ |
682
|
|
|
|
683
|
|
|
function _expandlinks($links, $URI) |
|
|
|
|
684
|
|
|
{ |
685
|
|
|
|
686
|
|
|
preg_match("/^[^\?]+/", $URI, $match); |
687
|
|
|
|
688
|
|
|
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]); |
689
|
|
|
$match = preg_replace("|/$|", "", $match); |
690
|
|
|
$match_part = parse_url($match); |
691
|
|
|
$match_root = |
692
|
|
|
$match_part["scheme"] . "://" . $match_part["host"]; |
693
|
|
|
|
694
|
|
|
$search = array("|^http://" . preg_quote($this->host) . "|i", |
695
|
|
|
"|^(\/)|i", |
696
|
|
|
"|^(?!http://)(?!mailto:)|i", |
697
|
|
|
"|/\./|", |
698
|
|
|
"|/[^\/]+/\.\./|" |
699
|
|
|
); |
700
|
|
|
|
701
|
|
|
$replace = array("", |
702
|
|
|
$match_root . "/", |
703
|
|
|
$match . "/", |
704
|
|
|
"/", |
705
|
|
|
"/" |
706
|
|
|
); |
707
|
|
|
|
708
|
|
|
$expandedLinks = preg_replace($search, $replace, $links); |
709
|
|
|
|
710
|
|
|
return $expandedLinks; |
711
|
|
|
} |
712
|
|
|
|
713
|
|
|
/*======================================================================*\ |
714
|
|
|
Function: _httprequest |
715
|
|
|
Purpose: go get the http data from the server |
716
|
|
|
Input: $url the url to fetch |
717
|
|
|
$fp the current open file pointer |
718
|
|
|
$URI the full URI |
719
|
|
|
$body body contents to send if any (POST) |
720
|
|
|
Output: |
721
|
|
|
\*======================================================================*/ |
722
|
|
|
|
723
|
|
|
function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "") |
|
|
|
|
724
|
|
|
{ |
725
|
|
|
$cookie_headers = ''; |
726
|
|
|
if ($this->passcookies && $this->_redirectaddr) |
727
|
|
|
$this->setcookies(); |
728
|
|
|
|
729
|
|
|
$URI_PARTS = parse_url($URI); |
730
|
|
|
if (empty($url)) |
731
|
|
|
$url = "/"; |
732
|
|
|
$headers = $http_method . " " . $url . " " . $this->_httpversion . "\r\n"; |
733
|
|
|
if (!empty($this->agent)) |
734
|
|
|
$headers .= "User-Agent: " . $this->agent . "\r\n"; |
735
|
|
|
if (!empty($this->host) && !isset($this->rawheaders['Host'])) { |
736
|
|
|
$headers .= "Host: " . $this->host; |
737
|
|
|
if (!empty($this->port) && $this->port != '80') |
738
|
|
|
$headers .= ":" . $this->port; |
739
|
|
|
$headers .= "\r\n"; |
740
|
|
|
} |
741
|
|
|
if (!empty($this->accept)) |
742
|
|
|
$headers .= "Accept: " . $this->accept . "\r\n"; |
743
|
|
|
if ($this->use_gzip) { |
744
|
|
|
// make sure PHP was built with --with-zlib |
745
|
|
|
// and we can handle gzipp'ed data |
746
|
|
|
if (function_exists('gzinflate')) { |
747
|
|
|
$headers .= "Accept-encoding: gzip\r\n"; |
748
|
|
|
} else { |
749
|
|
|
trigger_error( |
750
|
|
|
"use_gzip is on, but PHP was built without zlib support." . |
751
|
|
|
" Requesting file(s) without gzip encoding.", |
752
|
|
|
E_USER_NOTICE); |
753
|
|
|
} |
754
|
|
|
} |
755
|
|
|
if (!empty($this->referer)) |
756
|
|
|
$headers .= "Referer: " . $this->referer . "\r\n"; |
757
|
|
|
if (!empty($this->cookies)) { |
758
|
|
|
if (!is_array($this->cookies)) |
|
|
|
|
759
|
|
|
$this->cookies = (array)$this->cookies; |
760
|
|
|
|
761
|
|
|
reset($this->cookies); |
762
|
|
|
if (count($this->cookies) > 0) { |
763
|
|
|
$cookie_headers .= 'Cookie: '; |
764
|
|
|
foreach ($this->cookies as $cookieKey => $cookieVal) { |
765
|
|
|
$cookie_headers .= $cookieKey . "=" . urlencode($cookieVal) . "; "; |
766
|
|
|
} |
767
|
|
|
$headers .= substr($cookie_headers, 0, -2) . "\r\n"; |
768
|
|
|
} |
769
|
|
|
} |
770
|
|
|
if (!empty($this->rawheaders)) { |
771
|
|
|
if (!is_array($this->rawheaders)) |
|
|
|
|
772
|
|
|
$this->rawheaders = (array)$this->rawheaders; |
773
|
|
|
while (list($headerKey, $headerVal) = each($this->rawheaders)) |
774
|
|
|
$headers .= $headerKey . ": " . $headerVal . "\r\n"; |
775
|
|
|
} |
776
|
|
|
if (!empty($content_type)) { |
777
|
|
|
$headers .= "Content-type: $content_type"; |
778
|
|
|
if ($content_type == "multipart/form-data") |
779
|
|
|
$headers .= "; boundary=" . $this->_mime_boundary; |
780
|
|
|
$headers .= "\r\n"; |
781
|
|
|
} |
782
|
|
|
if (!empty($body)) |
783
|
|
|
$headers .= "Content-length: " . strlen($body) . "\r\n"; |
784
|
|
|
if (!empty($this->user) || !empty($this->pass)) |
785
|
|
|
$headers .= "Authorization: Basic " . base64_encode($this->user . ":" . $this->pass) . "\r\n"; |
786
|
|
|
|
787
|
|
|
//add proxy auth headers |
788
|
|
|
if (!empty($this->proxy_user)) |
789
|
|
|
$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass) . "\r\n"; |
790
|
|
|
|
791
|
|
|
|
792
|
|
|
$headers .= "\r\n"; |
793
|
|
|
|
794
|
|
|
// set the read timeout if needed |
795
|
|
|
if ($this->read_timeout > 0) |
796
|
|
|
socket_set_timeout($fp, $this->read_timeout); |
797
|
|
|
$this->timed_out = false; |
798
|
|
|
|
799
|
|
|
fwrite($fp, $headers . $body, strlen($headers . $body)); |
800
|
|
|
|
801
|
|
|
$this->_redirectaddr = false; |
802
|
|
|
unset($this->headers); |
803
|
|
|
|
804
|
|
|
// content was returned gzip encoded? |
805
|
|
|
$is_gzipped = false; |
806
|
|
|
|
807
|
|
|
while ($currentHeader = fgets($fp, $this->_maxlinelen)) { |
808
|
|
|
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { |
809
|
|
|
$this->status = -100; |
810
|
|
|
return false; |
811
|
|
|
} |
812
|
|
|
|
813
|
|
|
if ($currentHeader == "\r\n") |
814
|
|
|
break; |
815
|
|
|
|
816
|
|
|
// if a header begins with Location: or URI:, set the redirect |
817
|
|
|
if (preg_match("/^(Location:|URI:)/i", $currentHeader)) { |
818
|
|
|
// get URL portion of the redirect |
819
|
|
|
preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches); |
820
|
|
|
// look for :// in the Location header to see if hostname is included |
821
|
|
|
if (!preg_match("|\:\/\/|", $matches[2])) { |
822
|
|
|
// no host in the path, so prepend |
823
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port; |
824
|
|
|
// eliminate double slash |
825
|
|
|
if (!preg_match("|^/|", $matches[2])) |
826
|
|
|
$this->_redirectaddr .= "/" . $matches[2]; |
827
|
|
|
else |
828
|
|
|
$this->_redirectaddr .= $matches[2]; |
829
|
|
|
} else |
830
|
|
|
$this->_redirectaddr = $matches[2]; |
831
|
|
|
} |
832
|
|
|
|
833
|
|
|
if (preg_match("|^HTTP/|", $currentHeader)) { |
834
|
|
|
if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) { |
835
|
|
|
$this->status = $status[1]; |
836
|
|
|
} |
837
|
|
|
$this->response_code = $currentHeader; |
838
|
|
|
} |
839
|
|
|
|
840
|
|
|
if (preg_match("/Content-Encoding: gzip/", $currentHeader)) { |
841
|
|
|
$is_gzipped = true; |
842
|
|
|
} |
843
|
|
|
|
844
|
|
|
$this->headers[] = $currentHeader; |
845
|
|
|
} |
846
|
|
|
|
847
|
|
|
$results = ''; |
848
|
|
|
do { |
849
|
|
|
$_data = fread($fp, $this->maxlength); |
850
|
|
|
if (strlen($_data) == 0) { |
851
|
|
|
break; |
852
|
|
|
} |
853
|
|
|
$results .= $_data; |
854
|
|
|
} while (true); |
855
|
|
|
|
856
|
|
|
// gunzip |
857
|
|
|
if ($is_gzipped) { |
858
|
|
|
// per https://www.php.net/manual/en/function.gzencode.php |
859
|
|
|
$results = substr($results, 10); |
860
|
|
|
$results = gzinflate($results); |
861
|
|
|
} |
862
|
|
|
|
863
|
|
|
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { |
864
|
|
|
$this->status = -100; |
865
|
|
|
return false; |
866
|
|
|
} |
867
|
|
|
|
868
|
|
|
// check if there is a a redirect meta tag |
869
|
|
|
|
870
|
|
|
if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) { |
871
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1], $URI); |
872
|
|
|
} |
873
|
|
|
|
874
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
875
|
|
|
if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) { |
876
|
|
|
$this->results[] = $results; |
877
|
|
|
for ($x = 0; $x < count($match[1]); $x++) |
|
|
|
|
878
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host); |
879
|
|
|
} // have we already fetched framed content? |
880
|
|
|
elseif (is_array($this->results)) |
|
|
|
|
881
|
|
|
$this->results[] = $results; |
882
|
|
|
// no framed content |
883
|
|
|
else |
884
|
|
|
$this->results = $results; |
885
|
|
|
|
886
|
|
|
return true; |
887
|
|
|
} |
888
|
|
|
|
889
|
|
|
/*======================================================================*\ |
890
|
|
|
Function: _httpsrequest |
891
|
|
|
Purpose: go get the https data from the server using curl |
892
|
|
|
Input: $url the url to fetch |
893
|
|
|
$URI the full URI |
894
|
|
|
$body body contents to send if any (POST) |
895
|
|
|
Output: |
896
|
|
|
\*======================================================================*/ |
897
|
|
|
|
898
|
|
|
function _httpsrequest($url, $URI, $http_method, $content_type = "", $body = "") |
|
|
|
|
899
|
|
|
{ |
900
|
|
|
if ($this->passcookies && $this->_redirectaddr) |
901
|
|
|
$this->setcookies(); |
902
|
|
|
|
903
|
|
|
$headers = array(); |
904
|
|
|
|
905
|
|
|
$URI_PARTS = parse_url($URI); |
906
|
|
|
if (empty($url)) |
907
|
|
|
$url = "/"; |
|
|
|
|
908
|
|
|
// GET ... header not needed for curl |
909
|
|
|
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
910
|
|
|
if (!empty($this->agent)) |
911
|
|
|
$headers[] = "User-Agent: " . $this->agent; |
912
|
|
|
if (!empty($this->host)) |
913
|
|
|
if (!empty($this->port)) |
914
|
|
|
$headers[] = "Host: " . $this->host . ":" . $this->port; |
915
|
|
|
else |
916
|
|
|
$headers[] = "Host: " . $this->host; |
917
|
|
|
if (!empty($this->accept)) |
918
|
|
|
$headers[] = "Accept: " . $this->accept; |
919
|
|
|
if (!empty($this->referer)) |
920
|
|
|
$headers[] = "Referer: " . $this->referer; |
921
|
|
|
if (!empty($this->cookies)) { |
922
|
|
|
if (!is_array($this->cookies)) |
|
|
|
|
923
|
|
|
$this->cookies = (array)$this->cookies; |
924
|
|
|
|
925
|
|
|
reset($this->cookies); |
926
|
|
|
if (count($this->cookies) > 0) { |
927
|
|
|
$cookie_str = 'Cookie: '; |
928
|
|
|
foreach ($this->cookies as $cookieKey => $cookieVal) { |
929
|
|
|
$cookie_str .= $cookieKey . "=" . urlencode($cookieVal) . "; "; |
930
|
|
|
} |
931
|
|
|
$headers[] = substr($cookie_str, 0, -2); |
932
|
|
|
} |
933
|
|
|
} |
934
|
|
|
if (!empty($this->rawheaders)) { |
935
|
|
|
if (!is_array($this->rawheaders)) |
|
|
|
|
936
|
|
|
$this->rawheaders = (array)$this->rawheaders; |
937
|
|
|
while (list($headerKey, $headerVal) = each($this->rawheaders)) |
938
|
|
|
$headers[] = $headerKey . ": " . $headerVal; |
939
|
|
|
} |
940
|
|
|
if (!empty($content_type)) { |
941
|
|
|
if ($content_type == "multipart/form-data") |
942
|
|
|
$headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary; |
943
|
|
|
else |
944
|
|
|
$headers[] = "Content-type: $content_type"; |
945
|
|
|
} |
946
|
|
|
if (!empty($body)) |
947
|
|
|
$headers[] = "Content-length: " . strlen($body); |
948
|
|
|
if (!empty($this->user) || !empty($this->pass)) |
949
|
|
|
$headers[] = "Authorization: BASIC " . base64_encode($this->user . ":" . $this->pass); |
950
|
|
|
|
951
|
|
|
for ($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
|
|
|
|
952
|
|
|
$safer_header = strtr($headers[$curr_header], "\"", " "); |
953
|
|
|
$cmdline_params .= " -H \"" . $safer_header . "\""; |
|
|
|
|
954
|
|
|
} |
955
|
|
|
|
956
|
|
|
if (!empty($body)) |
957
|
|
|
$cmdline_params .= " -d \"$body\""; |
958
|
|
|
|
959
|
|
|
if ($this->read_timeout > 0) |
960
|
|
|
$cmdline_params .= " -m " . $this->read_timeout; |
961
|
|
|
|
962
|
|
|
$headerfile = tempnam($temp_dir, "sno"); |
|
|
|
|
963
|
|
|
|
964
|
|
|
exec($this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " " . escapeshellarg($URI), $results, $return); |
965
|
|
|
|
966
|
|
|
if ($return) { |
967
|
|
|
$this->error = "Error: cURL could not retrieve the document, error $return."; |
968
|
|
|
return false; |
969
|
|
|
} |
970
|
|
|
|
971
|
|
|
|
972
|
|
|
$results = implode("\r\n", $results); |
973
|
|
|
|
974
|
|
|
$result_headers = file("$headerfile"); |
975
|
|
|
|
976
|
|
|
$this->_redirectaddr = false; |
977
|
|
|
unset($this->headers); |
978
|
|
|
|
979
|
|
|
for ($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) { |
|
|
|
|
980
|
|
|
|
981
|
|
|
// if a header begins with Location: or URI:, set the redirect |
982
|
|
|
if (preg_match("/^(Location: |URI: )/i", $result_headers[$currentHeader])) { |
983
|
|
|
// get URL portion of the redirect |
984
|
|
|
preg_match("/^(Location: |URI:)\s+(.*)/", chop($result_headers[$currentHeader]), $matches); |
985
|
|
|
// look for :// in the Location header to see if hostname is included |
986
|
|
|
if (!preg_match("|\:\/\/|", $matches[2])) { |
987
|
|
|
// no host in the path, so prepend |
988
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port; |
989
|
|
|
// eliminate double slash |
990
|
|
|
if (!preg_match("|^/|", $matches[2])) |
991
|
|
|
$this->_redirectaddr .= "/" . $matches[2]; |
992
|
|
|
else |
993
|
|
|
$this->_redirectaddr .= $matches[2]; |
994
|
|
|
} else |
995
|
|
|
$this->_redirectaddr = $matches[2]; |
996
|
|
|
} |
997
|
|
|
|
998
|
|
|
if (preg_match("|^HTTP/|", $result_headers[$currentHeader])) { |
999
|
|
|
$this->response_code = $result_headers[$currentHeader]; |
1000
|
|
|
if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match)) { |
1001
|
|
|
$this->status = $match[1]; |
1002
|
|
|
} |
1003
|
|
|
} |
1004
|
|
|
|
1005
|
|
|
$this->headers[] = $result_headers[$currentHeader]; |
1006
|
|
|
} |
1007
|
|
|
|
1008
|
|
|
// check if there is a a redirect meta tag |
1009
|
|
|
|
1010
|
|
|
if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) { |
1011
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1], $URI); |
1012
|
|
|
} |
1013
|
|
|
|
1014
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
1015
|
|
|
if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) { |
1016
|
|
|
$this->results[] = $results; |
1017
|
|
|
for ($x = 0; $x < count($match[1]); $x++) |
|
|
|
|
1018
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host); |
1019
|
|
|
} // have we already fetched framed content? |
1020
|
|
|
elseif (is_array($this->results)) |
|
|
|
|
1021
|
|
|
$this->results[] = $results; |
1022
|
|
|
// no framed content |
1023
|
|
|
else |
1024
|
|
|
$this->results = $results; |
1025
|
|
|
|
1026
|
|
|
unlink("$headerfile"); |
1027
|
|
|
|
1028
|
|
|
return true; |
1029
|
|
|
} |
1030
|
|
|
|
1031
|
|
|
/*======================================================================*\ |
1032
|
|
|
Function: setcookies() |
1033
|
|
|
Purpose: set cookies for a redirection |
1034
|
|
|
\*======================================================================*/ |
1035
|
|
|
|
1036
|
|
|
function setcookies() |
|
|
|
|
1037
|
|
|
{ |
1038
|
|
|
for ($x = 0; $x < count($this->headers); $x++) { |
|
|
|
|
1039
|
|
|
if (preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match)) |
1040
|
|
|
$this->cookies[$match[1]] = urldecode($match[2]); |
1041
|
|
|
} |
1042
|
|
|
} |
1043
|
|
|
|
1044
|
|
|
|
1045
|
|
|
/*======================================================================*\ |
1046
|
|
|
Function: _check_timeout |
1047
|
|
|
Purpose: checks whether timeout has occurred |
1048
|
|
|
Input: $fp file pointer |
1049
|
|
|
\*======================================================================*/ |
1050
|
|
|
|
1051
|
|
|
function _check_timeout($fp) |
|
|
|
|
1052
|
|
|
{ |
1053
|
|
|
if ($this->read_timeout > 0) { |
1054
|
|
|
$fp_status = socket_get_status($fp); |
1055
|
|
|
if ($fp_status["timed_out"]) { |
1056
|
|
|
$this->timed_out = true; |
1057
|
|
|
return true; |
1058
|
|
|
} |
1059
|
|
|
} |
1060
|
|
|
return false; |
1061
|
|
|
} |
1062
|
|
|
|
1063
|
|
|
/*======================================================================*\ |
1064
|
|
|
Function: _connect |
1065
|
|
|
Purpose: make a socket connection |
1066
|
|
|
Input: $fp file pointer |
1067
|
|
|
\*======================================================================*/ |
1068
|
|
|
|
1069
|
|
|
function _connect(&$fp) |
|
|
|
|
1070
|
|
|
{ |
1071
|
|
|
if (!empty($this->proxy_host) && !empty($this->proxy_port)) { |
1072
|
|
|
$this->_isproxy = true; |
1073
|
|
|
|
1074
|
|
|
$host = $this->proxy_host; |
1075
|
|
|
$port = $this->proxy_port; |
1076
|
|
|
} else { |
1077
|
|
|
$host = $this->host; |
1078
|
|
|
$port = $this->port; |
1079
|
|
|
} |
1080
|
|
|
|
1081
|
|
|
$this->status = 0; |
1082
|
|
|
|
1083
|
|
|
if ($fp = fsockopen( |
1084
|
|
|
$host, |
1085
|
|
|
$port, |
|
|
|
|
1086
|
|
|
$errno, |
1087
|
|
|
$errstr, |
1088
|
|
|
$this->_fp_timeout |
1089
|
|
|
) |
1090
|
|
|
) { |
1091
|
|
|
// socket connection succeeded |
1092
|
|
|
|
1093
|
|
|
return true; |
1094
|
|
|
} else { |
1095
|
|
|
// socket connection failed |
1096
|
|
|
$this->status = $errno; |
1097
|
|
|
switch ($errno) { |
1098
|
|
|
case -3: |
1099
|
|
|
$this->error = "socket creation failed (-3)"; |
|
|
|
|
1100
|
|
|
case -4: |
1101
|
|
|
$this->error = "dns lookup failure (-4)"; |
|
|
|
|
1102
|
|
|
case -5: |
1103
|
|
|
$this->error = "connection refused or timed out (-5)"; |
|
|
|
|
1104
|
|
|
default: |
1105
|
|
|
$this->error = "connection failed (" . $errno . ")"; |
1106
|
|
|
} |
1107
|
|
|
return false; |
1108
|
|
|
} |
1109
|
|
|
} |
1110
|
|
|
|
1111
|
|
|
/*======================================================================*\ |
1112
|
|
|
Function: _disconnect |
1113
|
|
|
Purpose: disconnect a socket connection |
1114
|
|
|
Input: $fp file pointer |
1115
|
|
|
\*======================================================================*/ |
1116
|
|
|
|
1117
|
|
|
function _disconnect($fp) |
|
|
|
|
1118
|
|
|
{ |
1119
|
|
|
return (fclose($fp)); |
1120
|
|
|
} |
1121
|
|
|
|
1122
|
|
|
|
1123
|
|
|
/*======================================================================*\ |
1124
|
|
|
Function: _prepare_post_body |
1125
|
|
|
Purpose: Prepare post body according to encoding type |
1126
|
|
|
Input: $formvars - form variables |
1127
|
|
|
$formfiles - form upload files |
1128
|
|
|
Output: post body |
1129
|
|
|
\*======================================================================*/ |
1130
|
|
|
|
1131
|
|
|
function _prepare_post_body($formvars, $formfiles) |
|
|
|
|
1132
|
|
|
{ |
1133
|
|
|
settype($formvars, "array"); |
1134
|
|
|
settype($formfiles, "array"); |
1135
|
|
|
$postdata = ''; |
1136
|
|
|
|
1137
|
|
|
if (count($formvars) == 0 && count($formfiles) == 0) |
1138
|
|
|
return; |
1139
|
|
|
|
1140
|
|
|
switch ($this->_submit_type) { |
1141
|
|
|
case "application/x-www-form-urlencoded": |
1142
|
|
|
reset($formvars); |
1143
|
|
|
while (list($key, $val) = each($formvars)) { |
1144
|
|
|
if (is_array($val) || is_object($val)) { |
1145
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
1146
|
|
|
$postdata .= urlencode($key) . "[]=" . urlencode($cur_val) . "&"; |
1147
|
|
|
} |
1148
|
|
|
} else |
1149
|
|
|
$postdata .= urlencode($key) . "=" . urlencode($val) . "&"; |
1150
|
|
|
} |
1151
|
|
|
break; |
1152
|
|
|
|
1153
|
|
|
case "multipart/form-data": |
1154
|
|
|
$this->_mime_boundary = "Snoopy" . md5(uniqid(microtime())); |
1155
|
|
|
|
1156
|
|
|
reset($formvars); |
1157
|
|
|
while (list($key, $val) = each($formvars)) { |
1158
|
|
|
if (is_array($val) || is_object($val)) { |
1159
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
1160
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1161
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
1162
|
|
|
$postdata .= "$cur_val\r\n"; |
1163
|
|
|
} |
1164
|
|
|
} else { |
1165
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1166
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
1167
|
|
|
$postdata .= "$val\r\n"; |
1168
|
|
|
} |
1169
|
|
|
} |
1170
|
|
|
|
1171
|
|
|
reset($formfiles); |
1172
|
|
|
while (list($field_name, $file_names) = each($formfiles)) { |
1173
|
|
|
settype($file_names, "array"); |
1174
|
|
|
while (list(, $file_name) = each($file_names)) { |
1175
|
|
|
if (!is_readable($file_name)) continue; |
1176
|
|
|
|
1177
|
|
|
$fp = fopen($file_name, "r"); |
1178
|
|
|
$file_content = fread($fp, filesize($file_name)); |
1179
|
|
|
fclose($fp); |
1180
|
|
|
$base_name = basename($file_name); |
1181
|
|
|
|
1182
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1183
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
1184
|
|
|
$postdata .= "$file_content\r\n"; |
1185
|
|
|
} |
1186
|
|
|
} |
1187
|
|
|
$postdata .= "--" . $this->_mime_boundary . "--\r\n"; |
1188
|
|
|
break; |
1189
|
|
|
} |
1190
|
|
|
|
1191
|
|
|
return $postdata; |
1192
|
|
|
} |
1193
|
|
|
} |
1194
|
|
|
|
1195
|
|
|
?> |
|
|
|
|
1196
|
|
|
|
Adding explicit visibility (
private
,protected
, orpublic
) is generally recommend to communicate to other developers how, and from where this method is intended to be used.