1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/************************************************* |
4
|
|
|
* |
5
|
|
|
* Snoopy - the PHP net client |
6
|
|
|
* Author: Monte Ohrt <[email protected]> |
7
|
|
|
* Copyright (c): 1999-2014, all rights reserved |
8
|
|
|
* Version: 1.2.5 |
9
|
|
|
* This library is free software; you can redistribute it and/or |
10
|
|
|
* modify it under the terms of the GNU Lesser General Public |
11
|
|
|
* License as published by the Free Software Foundation; either |
12
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
13
|
|
|
* |
14
|
|
|
* This library is distributed in the hope that it will be useful, |
15
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
16
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17
|
|
|
* Lesser General Public License for more details. |
18
|
|
|
* |
19
|
|
|
* You should have received a copy of the GNU Lesser General Public |
20
|
|
|
* License along with this library; if not, write to the Free Software |
21
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22
|
|
|
* |
23
|
|
|
* You may contact the author of Snoopy by e-mail at: |
24
|
|
|
* [email protected] |
25
|
|
|
* |
26
|
|
|
* The latest version of Snoopy can be obtained from: |
27
|
|
|
* http://snoopy.sourceforge.net/ |
28
|
|
|
*************************************************/ |
29
|
|
|
class Snoopy |
30
|
|
|
{ |
31
|
|
|
public function __construct() |
32
|
|
|
{ |
33
|
|
|
$GLOBALS['xoopsLogger']->addDeprecated("Use of Snoopy in XOOPS is deprecated and has been replaced in core with XoopsHttpGet. Snoopy will be removed in future versions.."); |
34
|
|
|
} |
35
|
|
|
|
36
|
|
|
/**** Public variables ****/ |
37
|
|
|
|
38
|
|
|
/* user definable vars */ |
39
|
|
|
|
40
|
|
|
var $host = "www.php.net"; // host name we are connecting to |
41
|
|
|
var $port = 80; // port we are connecting to |
42
|
|
|
var $proxy_host = ""; // proxy host to use |
43
|
|
|
var $proxy_port = ""; // proxy port to use |
44
|
|
|
var $proxy_user = ""; // proxy user to use |
45
|
|
|
var $proxy_pass = ""; // proxy password to use |
46
|
|
|
|
47
|
|
|
var $agent = "Snoopy v1.2.5"; // agent we masquerade as |
48
|
|
|
var $referer = ""; // referer info to pass |
49
|
|
|
var $cookies = array(); // array of cookies to pass |
50
|
|
|
// $cookies["username"]="joe"; |
51
|
|
|
var $rawheaders = array(); // array of raw headers to send |
52
|
|
|
// $rawheaders["Content-type"]="text/html"; |
53
|
|
|
|
54
|
|
|
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
55
|
|
|
var $lastredirectaddr = ""; // contains address of last redirected address |
56
|
|
|
var $offsiteok = true; // allows redirection off-site |
57
|
|
|
var $maxframes = 0; // frame content depth maximum. 0 = disallow |
58
|
|
|
var $expandlinks = true; // expand links to fully qualified URLs. |
59
|
|
|
// this only applies to fetchlinks() |
60
|
|
|
// submitlinks(), and submittext() |
61
|
|
|
var $passcookies = true; // pass set cookies back through redirects |
62
|
|
|
// NOTE: this currently does not respect |
63
|
|
|
// dates, domains or paths. |
64
|
|
|
|
65
|
|
|
var $user = ""; // user for http authentication |
66
|
|
|
var $pass = ""; // password for http authentication |
67
|
|
|
|
68
|
|
|
// http accept types |
69
|
|
|
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
70
|
|
|
|
71
|
|
|
var $results = ""; // where the content is put |
72
|
|
|
|
73
|
|
|
var $error = ""; // error messages sent here |
74
|
|
|
var $response_code = ""; // response code returned from server |
75
|
|
|
var $headers = array(); // headers returned from server sent here |
76
|
|
|
var $maxlength = 500000; // max return data length (body) |
77
|
|
|
var $read_timeout = 0; // timeout on read operations, in seconds |
78
|
|
|
// supported only since PHP 4 Beta 4 |
79
|
|
|
// set to 0 to disallow timeouts |
80
|
|
|
var $timed_out = false; // if a read operation timed out |
81
|
|
|
var $status = 0; // http request status |
82
|
|
|
|
83
|
|
|
var $temp_dir = "/tmp"; // temporary directory that the webserver |
84
|
|
|
// has permission to write to. |
85
|
|
|
// under Windows, this should be C:\temp |
86
|
|
|
|
87
|
|
|
var $curl_path = "/usr/bin/curl"; |
88
|
|
|
// Snoopy will use cURL for fetching |
89
|
|
|
// SSL content if a full system path to |
90
|
|
|
// the cURL binary is supplied here. |
91
|
|
|
// set to false if you do not have |
92
|
|
|
// cURL installed. See http://curl.haxx.se |
93
|
|
|
// for details on installing cURL. |
94
|
|
|
// Snoopy does *not* use the cURL |
95
|
|
|
// library functions built into php, |
96
|
|
|
// as these functions are not stable |
97
|
|
|
// as of this Snoopy release. |
98
|
|
|
|
99
|
|
|
// send Accept-encoding: gzip? |
100
|
|
|
var $use_gzip = true; |
101
|
|
|
/**** Private variables ****/ |
102
|
|
|
|
103
|
|
|
var $_maxlinelen = 4096; // max line length (headers) |
104
|
|
|
|
105
|
|
|
var $_httpmethod = "GET"; // default http request method |
106
|
|
|
var $_httpversion = "HTTP/1.0"; // default http request version |
107
|
|
|
var $_submit_method = "POST"; // default submit method |
108
|
|
|
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
109
|
|
|
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
110
|
|
|
var $_redirectaddr = false; // will be set if page fetched is a redirect |
111
|
|
|
var $_redirectdepth = 0; // increments on an http redirect |
112
|
|
|
var $_frameurls = array(); // frame src urls |
113
|
|
|
var $_framedepth = 0; // increments on frame depth |
114
|
|
|
|
115
|
|
|
var $_isproxy = false; // set if using a proxy server |
116
|
|
|
var $_fp_timeout = 30; // timeout for socket connection |
117
|
|
|
|
118
|
|
|
/*======================================================================*\ |
119
|
|
|
Function: fetch |
120
|
|
|
Purpose: fetch the contents of a web page |
121
|
|
|
(and possibly other protocols in the |
122
|
|
|
future like ftp, nntp, gopher, etc.) |
123
|
|
|
Input: $URI the location of the page to fetch |
124
|
|
|
Output: $this->results the output text from the fetch |
125
|
|
|
\*======================================================================*/ |
126
|
|
|
|
127
|
|
|
function fetch($URI) |
|
|
|
|
128
|
|
|
{ |
129
|
|
|
|
130
|
|
|
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
131
|
|
|
$URI_PARTS = parse_url($URI); |
132
|
|
|
if (!empty($URI_PARTS["user"])) |
133
|
|
|
$this->user = $URI_PARTS["user"]; |
134
|
|
|
if (!empty($URI_PARTS["pass"])) |
135
|
|
|
$this->pass = $URI_PARTS["pass"]; |
136
|
|
|
if (empty($URI_PARTS["query"])) |
137
|
|
|
$URI_PARTS["query"] = ''; |
138
|
|
|
if (empty($URI_PARTS["path"])) |
139
|
|
|
$URI_PARTS["path"] = ''; |
140
|
|
|
|
141
|
|
|
switch (strtolower($URI_PARTS["scheme"])) { |
142
|
|
|
case "http": |
143
|
|
|
$this->host = $URI_PARTS["host"]; |
144
|
|
|
if (!empty($URI_PARTS["port"])) |
145
|
|
|
$this->port = $URI_PARTS["port"]; |
146
|
|
|
if ($this->_connect($fp)) { |
147
|
|
|
if ($this->_isproxy) { |
148
|
|
|
// using proxy, send entire URI |
149
|
|
|
$this->_httprequest($URI, $fp, $URI, $this->_httpmethod); |
150
|
|
|
} else { |
151
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
152
|
|
|
// no proxy, send only the path |
153
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
$this->_disconnect($fp); |
157
|
|
|
|
158
|
|
|
if ($this->_redirectaddr) { |
159
|
|
|
/* url was redirected, check if we've hit the max depth */ |
160
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
161
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
162
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
163
|
|
|
/* follow the redirect */ |
164
|
|
|
$this->_redirectdepth++; |
165
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
166
|
|
|
$this->fetch($this->_redirectaddr); |
167
|
|
|
} |
168
|
|
|
} |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
172
|
|
|
$frameurls = $this->_frameurls; |
173
|
|
|
$this->_frameurls = array(); |
174
|
|
|
|
175
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
|
|
|
|
176
|
|
|
if ($this->_framedepth < $this->maxframes) { |
177
|
|
|
$this->fetch($frameurl); |
178
|
|
|
$this->_framedepth++; |
179
|
|
|
} else |
180
|
|
|
break; |
181
|
|
|
} |
182
|
|
|
} |
183
|
|
|
} else { |
184
|
|
|
return false; |
185
|
|
|
} |
186
|
|
|
return true; |
187
|
|
|
break; |
|
|
|
|
188
|
|
|
case "https": |
189
|
|
|
if (!$this->curl_path) |
190
|
|
|
return false; |
191
|
|
|
if (function_exists("is_executable")) |
192
|
|
|
if (!is_executable($this->curl_path)) |
193
|
|
|
return false; |
194
|
|
|
$this->host = $URI_PARTS["host"]; |
195
|
|
|
if (!empty($URI_PARTS["port"])) |
196
|
|
|
$this->port = $URI_PARTS["port"]; |
197
|
|
|
if ($this->_isproxy) { |
198
|
|
|
// using proxy, send entire URI |
199
|
|
|
$this->_httpsrequest($URI, $URI, $this->_httpmethod); |
200
|
|
|
} else { |
201
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
202
|
|
|
// no proxy, send only the path |
203
|
|
|
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
if ($this->_redirectaddr) { |
207
|
|
|
/* url was redirected, check if we've hit the max depth */ |
208
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
209
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
210
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
|
|
|
|
211
|
|
|
/* follow the redirect */ |
212
|
|
|
$this->_redirectdepth++; |
213
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
214
|
|
|
$this->fetch($this->_redirectaddr); |
215
|
|
|
} |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
220
|
|
|
$frameurls = $this->_frameurls; |
221
|
|
|
$this->_frameurls = array(); |
222
|
|
|
|
223
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
|
|
|
|
224
|
|
|
if ($this->_framedepth < $this->maxframes) { |
225
|
|
|
$this->fetch($frameurl); |
226
|
|
|
$this->_framedepth++; |
227
|
|
|
} else |
228
|
|
|
break; |
229
|
|
|
} |
230
|
|
|
} |
231
|
|
|
return true; |
232
|
|
|
break; |
233
|
|
|
default: |
234
|
|
|
// not a valid protocol |
235
|
|
|
$this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n'; |
236
|
|
|
return false; |
237
|
|
|
break; |
238
|
|
|
} |
239
|
|
|
return true; |
|
|
|
|
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
/*======================================================================*\ |
243
|
|
|
Function: submit |
244
|
|
|
Purpose: submit an http form |
245
|
|
|
Input: $URI the location to post the data |
246
|
|
|
$formvars the formvars to use. |
247
|
|
|
format: $formvars["var"] = "val"; |
248
|
|
|
$formfiles an array of files to submit |
249
|
|
|
format: $formfiles["var"] = "/dir/filename.ext"; |
250
|
|
|
Output: $this->results the text output from the post |
251
|
|
|
\*======================================================================*/ |
252
|
|
|
|
253
|
|
|
function submit($URI, $formvars = "", $formfiles = "") |
|
|
|
|
254
|
|
|
{ |
255
|
|
|
unset($postdata); |
|
|
|
|
256
|
|
|
|
257
|
|
|
$postdata = $this->_prepare_post_body($formvars, $formfiles); |
258
|
|
|
|
259
|
|
|
$URI_PARTS = parse_url($URI); |
260
|
|
|
if (!empty($URI_PARTS["user"])) |
261
|
|
|
$this->user = $URI_PARTS["user"]; |
262
|
|
|
if (!empty($URI_PARTS["pass"])) |
263
|
|
|
$this->pass = $URI_PARTS["pass"]; |
264
|
|
|
if (empty($URI_PARTS["query"])) |
265
|
|
|
$URI_PARTS["query"] = ''; |
266
|
|
|
if (empty($URI_PARTS["path"])) |
267
|
|
|
$URI_PARTS["path"] = ''; |
268
|
|
|
|
269
|
|
|
switch (strtolower($URI_PARTS["scheme"])) { |
270
|
|
|
case "http": |
271
|
|
|
$this->host = $URI_PARTS["host"]; |
272
|
|
|
if (!empty($URI_PARTS["port"])) |
273
|
|
|
$this->port = $URI_PARTS["port"]; |
274
|
|
|
if ($this->_connect($fp)) { |
275
|
|
|
if ($this->_isproxy) { |
276
|
|
|
// using proxy, send entire URI |
277
|
|
|
$this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
278
|
|
|
} else { |
279
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
280
|
|
|
// no proxy, send only the path |
281
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
$this->_disconnect($fp); |
285
|
|
|
|
286
|
|
|
if ($this->_redirectaddr) { |
287
|
|
|
/* url was redirected, check if we've hit the max depth */ |
288
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
289
|
|
|
if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) |
290
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]); |
291
|
|
|
|
292
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
293
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
294
|
|
|
/* follow the redirect */ |
295
|
|
|
$this->_redirectdepth++; |
296
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
297
|
|
|
if (strpos($this->_redirectaddr, "?") > 0) |
298
|
|
|
$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
299
|
|
|
else |
300
|
|
|
$this->submit($this->_redirectaddr, $formvars, $formfiles); |
301
|
|
|
} |
302
|
|
|
} |
303
|
|
|
} |
304
|
|
|
|
305
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
306
|
|
|
$frameurls = $this->_frameurls; |
307
|
|
|
$this->_frameurls = array(); |
308
|
|
|
|
309
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
|
|
|
|
310
|
|
|
if ($this->_framedepth < $this->maxframes) { |
311
|
|
|
$this->fetch($frameurl); |
312
|
|
|
$this->_framedepth++; |
313
|
|
|
} else |
314
|
|
|
break; |
315
|
|
|
} |
316
|
|
|
} |
317
|
|
|
|
318
|
|
|
} else { |
319
|
|
|
return false; |
320
|
|
|
} |
321
|
|
|
return true; |
322
|
|
|
break; |
|
|
|
|
323
|
|
|
case "https": |
324
|
|
|
if (!$this->curl_path) |
325
|
|
|
return false; |
326
|
|
|
if (function_exists("is_executable")) |
327
|
|
|
if (!is_executable($this->curl_path)) |
328
|
|
|
return false; |
329
|
|
|
$this->host = $URI_PARTS["host"]; |
330
|
|
|
if (!empty($URI_PARTS["port"])) |
331
|
|
|
$this->port = $URI_PARTS["port"]; |
332
|
|
|
if ($this->_isproxy) { |
333
|
|
|
// using proxy, send entire URI |
334
|
|
|
$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
335
|
|
|
} else { |
336
|
|
|
$path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : ""); |
337
|
|
|
// no proxy, send only the path |
338
|
|
|
$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
if ($this->_redirectaddr) { |
342
|
|
|
/* url was redirected, check if we've hit the max depth */ |
343
|
|
|
if ($this->maxredirs > $this->_redirectdepth) { |
344
|
|
|
if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) |
|
|
|
|
345
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]); |
346
|
|
|
|
347
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
348
|
|
|
if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) { |
349
|
|
|
/* follow the redirect */ |
350
|
|
|
$this->_redirectdepth++; |
351
|
|
|
$this->lastredirectaddr = $this->_redirectaddr; |
352
|
|
|
if (strpos($this->_redirectaddr, "?") > 0) |
|
|
|
|
353
|
|
|
$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
354
|
|
|
else |
355
|
|
|
$this->submit($this->_redirectaddr, $formvars, $formfiles); |
356
|
|
|
} |
357
|
|
|
} |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) { |
361
|
|
|
$frameurls = $this->_frameurls; |
362
|
|
|
$this->_frameurls = array(); |
363
|
|
|
|
364
|
|
|
while (list(, $frameurl) = each($frameurls)) { |
|
|
|
|
365
|
|
|
if ($this->_framedepth < $this->maxframes) { |
366
|
|
|
$this->fetch($frameurl); |
367
|
|
|
$this->_framedepth++; |
368
|
|
|
} else |
369
|
|
|
break; |
370
|
|
|
} |
371
|
|
|
} |
372
|
|
|
return true; |
373
|
|
|
break; |
374
|
|
|
|
375
|
|
|
default: |
376
|
|
|
// not a valid protocol |
377
|
|
|
$this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n'; |
378
|
|
|
return false; |
379
|
|
|
break; |
380
|
|
|
} |
381
|
|
|
return true; |
|
|
|
|
382
|
|
|
} |
383
|
|
|
|
384
|
|
|
/*======================================================================*\ |
385
|
|
|
Function: fetchlinks |
386
|
|
|
Purpose: fetch the links from a web page |
387
|
|
|
Input: $URI where you are fetching from |
388
|
|
|
Output: $this->results an array of the URLs |
389
|
|
|
\*======================================================================*/ |
390
|
|
|
|
391
|
|
|
function fetchlinks($URI) |
|
|
|
|
392
|
|
|
{ |
393
|
|
|
if ($this->fetch($URI)) { |
394
|
|
|
if ($this->lastredirectaddr) |
395
|
|
|
$URI = $this->lastredirectaddr; |
396
|
|
|
if (is_array($this->results)) { |
|
|
|
|
397
|
|
|
for ($x = 0; $x < count($this->results); $x++) |
|
|
|
|
398
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
399
|
|
|
} else |
400
|
|
|
$this->results = $this->_striplinks($this->results); |
401
|
|
|
|
402
|
|
|
if ($this->expandlinks) |
403
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
404
|
|
|
return true; |
405
|
|
|
} else |
406
|
|
|
return false; |
407
|
|
|
} |
408
|
|
|
|
409
|
|
|
/*======================================================================*\ |
410
|
|
|
Function: fetchform |
411
|
|
|
Purpose: fetch the form elements from a web page |
412
|
|
|
Input: $URI where you are fetching from |
413
|
|
|
Output: $this->results the resulting html form |
414
|
|
|
\*======================================================================*/ |
415
|
|
|
|
416
|
|
|
function fetchform($URI) |
|
|
|
|
417
|
|
|
{ |
418
|
|
|
|
419
|
|
|
if ($this->fetch($URI)) { |
420
|
|
|
|
421
|
|
|
if (is_array($this->results)) { |
|
|
|
|
422
|
|
|
for ($x = 0; $x < count($this->results); $x++) |
|
|
|
|
423
|
|
|
$this->results[$x] = $this->_stripform($this->results[$x]); |
424
|
|
|
} else |
425
|
|
|
$this->results = $this->_stripform($this->results); |
426
|
|
|
|
427
|
|
|
return true; |
428
|
|
|
} else |
429
|
|
|
return false; |
430
|
|
|
} |
431
|
|
|
|
432
|
|
|
|
433
|
|
|
/*======================================================================*\ |
434
|
|
|
Function: fetchtext |
435
|
|
|
Purpose: fetch the text from a web page, stripping the links |
436
|
|
|
Input: $URI where you are fetching from |
437
|
|
|
Output: $this->results the text from the web page |
438
|
|
|
\*======================================================================*/ |
439
|
|
|
|
440
|
|
|
function fetchtext($URI) |
|
|
|
|
441
|
|
|
{ |
442
|
|
|
if ($this->fetch($URI)) { |
443
|
|
|
if (is_array($this->results)) { |
|
|
|
|
444
|
|
|
for ($x = 0; $x < count($this->results); $x++) |
|
|
|
|
445
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
446
|
|
|
} else |
447
|
|
|
$this->results = $this->_striptext($this->results); |
448
|
|
|
return true; |
449
|
|
|
} else |
450
|
|
|
return false; |
451
|
|
|
} |
452
|
|
|
|
453
|
|
|
/*======================================================================*\ |
454
|
|
|
Function: submitlinks |
455
|
|
|
Purpose: grab links from a form submission |
456
|
|
|
Input: $URI where you are submitting from |
457
|
|
|
Output: $this->results an array of the links from the post |
458
|
|
|
\*======================================================================*/ |
459
|
|
|
|
460
|
|
|
function submitlinks($URI, $formvars = "", $formfiles = "") |
|
|
|
|
461
|
|
|
{ |
462
|
|
|
if ($this->submit($URI, $formvars, $formfiles)) { |
463
|
|
|
if ($this->lastredirectaddr) |
464
|
|
|
$URI = $this->lastredirectaddr; |
465
|
|
|
if (is_array($this->results)) { |
|
|
|
|
466
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
467
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
468
|
|
|
if ($this->expandlinks) |
469
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x], $URI); |
470
|
|
|
} |
471
|
|
|
} else { |
472
|
|
|
$this->results = $this->_striplinks($this->results); |
473
|
|
|
if ($this->expandlinks) |
474
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
475
|
|
|
} |
476
|
|
|
return true; |
477
|
|
|
} else |
478
|
|
|
return false; |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
/*======================================================================*\ |
482
|
|
|
Function: submittext |
483
|
|
|
Purpose: grab text from a form submission |
484
|
|
|
Input: $URI where you are submitting from |
485
|
|
|
Output: $this->results the text from the web page |
486
|
|
|
\*======================================================================*/ |
487
|
|
|
|
488
|
|
|
function submittext($URI, $formvars = "", $formfiles = "") |
|
|
|
|
489
|
|
|
{ |
490
|
|
|
if ($this->submit($URI, $formvars, $formfiles)) { |
491
|
|
|
if ($this->lastredirectaddr) |
492
|
|
|
$URI = $this->lastredirectaddr; |
493
|
|
|
if (is_array($this->results)) { |
|
|
|
|
494
|
|
|
for ($x = 0; $x < count($this->results); $x++) { |
|
|
|
|
495
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
496
|
|
|
if ($this->expandlinks) |
497
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x], $URI); |
498
|
|
|
} |
499
|
|
|
} else { |
500
|
|
|
$this->results = $this->_striptext($this->results); |
501
|
|
|
if ($this->expandlinks) |
502
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
503
|
|
|
} |
504
|
|
|
return true; |
505
|
|
|
} else |
506
|
|
|
return false; |
507
|
|
|
} |
508
|
|
|
|
509
|
|
|
|
510
|
|
|
/*======================================================================*\ |
511
|
|
|
Function: set_submit_multipart |
512
|
|
|
Purpose: Set the form submission content type to |
513
|
|
|
multipart/form-data |
514
|
|
|
\*======================================================================*/ |
515
|
|
|
function set_submit_multipart() |
|
|
|
|
516
|
|
|
{ |
517
|
|
|
$this->_submit_type = "multipart/form-data"; |
518
|
|
|
} |
519
|
|
|
|
520
|
|
|
|
521
|
|
|
/*======================================================================*\ |
522
|
|
|
Function: set_submit_normal |
523
|
|
|
Purpose: Set the form submission content type to |
524
|
|
|
application/x-www-form-urlencoded |
525
|
|
|
\*======================================================================*/ |
526
|
|
|
function set_submit_normal() |
|
|
|
|
527
|
|
|
{ |
528
|
|
|
$this->_submit_type = "application/x-www-form-urlencoded"; |
529
|
|
|
} |
530
|
|
|
|
531
|
|
|
|
532
|
|
|
|
533
|
|
|
|
534
|
|
|
/*======================================================================*\ |
535
|
|
|
Private functions |
536
|
|
|
\*======================================================================*/ |
537
|
|
|
|
538
|
|
|
|
539
|
|
|
/*======================================================================*\ |
540
|
|
|
Function: _striplinks |
541
|
|
|
Purpose: strip the hyperlinks from an html document |
542
|
|
|
Input: $document document to strip. |
543
|
|
|
Output: $match an array of the links |
544
|
|
|
\*======================================================================*/ |
545
|
|
|
|
546
|
|
|
function _striplinks($document) |
|
|
|
|
547
|
|
|
{ |
548
|
|
|
preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
549
|
|
|
([\"\'])? # find single or double quote |
550
|
|
|
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
551
|
|
|
# quote, otherwise match up to next space |
552
|
|
|
'isx", $document, $links); |
553
|
|
|
|
554
|
|
|
|
555
|
|
|
// catenate the non-empty matches from the conditional subpattern |
556
|
|
|
|
557
|
|
|
while (list($key, $val) = each($links[2])) { |
|
|
|
|
558
|
|
|
if (!empty($val)) |
559
|
|
|
$match[] = $val; |
560
|
|
|
} |
561
|
|
|
|
562
|
|
|
while (list($key, $val) = each($links[3])) { |
|
|
|
|
563
|
|
|
if (!empty($val)) |
564
|
|
|
$match[] = $val; |
565
|
|
|
} |
566
|
|
|
|
567
|
|
|
// return the links |
568
|
|
|
return $match; |
|
|
|
|
569
|
|
|
} |
570
|
|
|
|
571
|
|
|
/*======================================================================*\ |
572
|
|
|
Function: _stripform |
573
|
|
|
Purpose: strip the form elements from an html document |
574
|
|
|
Input: $document document to strip. |
575
|
|
|
Output: $match an array of the links |
576
|
|
|
\*======================================================================*/ |
577
|
|
|
|
578
|
|
|
function _stripform($document) |
|
|
|
|
579
|
|
|
{ |
580
|
|
|
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements); |
581
|
|
|
|
582
|
|
|
// catenate the matches |
583
|
|
|
$match = implode("\r\n", $elements[0]); |
584
|
|
|
|
585
|
|
|
// return the links |
586
|
|
|
return $match; |
587
|
|
|
} |
588
|
|
|
|
589
|
|
|
|
590
|
|
|
/*======================================================================*\ |
591
|
|
|
Function: _striptext |
592
|
|
|
Purpose: strip the text from an html document |
593
|
|
|
Input: $document document to strip. |
594
|
|
|
Output: $text the resulting text |
595
|
|
|
\*======================================================================*/ |
596
|
|
|
|
597
|
|
|
function _striptext($document) |
|
|
|
|
598
|
|
|
{ |
599
|
|
|
|
600
|
|
|
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
601
|
|
|
// so, list your entities one by one here. I included some of the |
602
|
|
|
// more common ones. |
603
|
|
|
|
604
|
|
|
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
605
|
|
|
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
606
|
|
|
"'([\r\n])[\s]+'", // strip out white space |
607
|
|
|
"'&(quot|#34|#034|#x22);'i", // replace html entities |
608
|
|
|
"'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
609
|
|
|
"'&(lt|#60|#060|#x3c);'i", |
610
|
|
|
"'&(gt|#62|#062|#x3e);'i", |
611
|
|
|
"'&(nbsp|#160|#xa0);'i", |
612
|
|
|
"'&(iexcl|#161);'i", |
613
|
|
|
"'&(cent|#162);'i", |
614
|
|
|
"'&(pound|#163);'i", |
615
|
|
|
"'&(copy|#169);'i", |
616
|
|
|
"'&(reg|#174);'i", |
617
|
|
|
"'&(deg|#176);'i", |
618
|
|
|
"'&(#39|#039|#x27);'", |
619
|
|
|
"'&(euro|#8364);'i", // europe |
620
|
|
|
"'&a(uml|UML);'", // german |
621
|
|
|
"'&o(uml|UML);'", |
622
|
|
|
"'&u(uml|UML);'", |
623
|
|
|
"'&A(uml|UML);'", |
624
|
|
|
"'&O(uml|UML);'", |
625
|
|
|
"'&U(uml|UML);'", |
626
|
|
|
"'ß'i", |
627
|
|
|
); |
628
|
|
|
$replace = array("", |
629
|
|
|
"", |
630
|
|
|
"\\1", |
631
|
|
|
"\"", |
632
|
|
|
"&", |
633
|
|
|
"<", |
634
|
|
|
">", |
635
|
|
|
" ", |
636
|
|
|
chr(161), |
637
|
|
|
chr(162), |
638
|
|
|
chr(163), |
639
|
|
|
chr(169), |
640
|
|
|
chr(174), |
641
|
|
|
chr(176), |
642
|
|
|
chr(39), |
643
|
|
|
chr(128), |
644
|
|
|
"ä", |
645
|
|
|
"ö", |
646
|
|
|
"ü", |
647
|
|
|
"Ä", |
648
|
|
|
"Ö", |
649
|
|
|
"Ü", |
650
|
|
|
"ß", |
651
|
|
|
); |
652
|
|
|
|
653
|
|
|
$text = preg_replace($search, $replace, $document); |
654
|
|
|
|
655
|
|
|
return $text; |
656
|
|
|
} |
657
|
|
|
|
658
|
|
|
/*======================================================================*\ |
659
|
|
|
Function: _expandlinks |
660
|
|
|
Purpose: expand each link into a fully qualified URL |
661
|
|
|
Input: $links the links to qualify |
662
|
|
|
$URI the full URI to get the base from |
663
|
|
|
Output: $expandedLinks the expanded links |
664
|
|
|
\*======================================================================*/ |
665
|
|
|
|
666
|
|
|
function _expandlinks($links, $URI) |
|
|
|
|
667
|
|
|
{ |
668
|
|
|
|
669
|
|
|
preg_match("/^[^\?]+/", $URI, $match); |
670
|
|
|
|
671
|
|
|
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]); |
672
|
|
|
$match = preg_replace("|/$|", "", $match); |
673
|
|
|
$match_part = parse_url($match); |
674
|
|
|
$match_root = |
675
|
|
|
$match_part["scheme"] . "://" . $match_part["host"]; |
676
|
|
|
|
677
|
|
|
$search = array("|^http://" . preg_quote($this->host) . "|i", |
678
|
|
|
"|^(\/)|i", |
679
|
|
|
"|^(?!http://)(?!mailto:)|i", |
680
|
|
|
"|/\./|", |
681
|
|
|
"|/[^\/]+/\.\./|" |
682
|
|
|
); |
683
|
|
|
|
684
|
|
|
$replace = array("", |
685
|
|
|
$match_root . "/", |
686
|
|
|
$match . "/", |
687
|
|
|
"/", |
688
|
|
|
"/" |
689
|
|
|
); |
690
|
|
|
|
691
|
|
|
$expandedLinks = preg_replace($search, $replace, $links); |
692
|
|
|
|
693
|
|
|
return $expandedLinks; |
694
|
|
|
} |
695
|
|
|
|
696
|
|
|
/*======================================================================*\ |
697
|
|
|
Function: _httprequest |
698
|
|
|
Purpose: go get the http data from the server |
699
|
|
|
Input: $url the url to fetch |
700
|
|
|
$fp the current open file pointer |
701
|
|
|
$URI the full URI |
702
|
|
|
$body body contents to send if any (POST) |
703
|
|
|
Output: |
704
|
|
|
\*======================================================================*/ |
705
|
|
|
|
706
|
|
|
function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "") |
|
|
|
|
707
|
|
|
{ |
708
|
|
|
$cookie_headers = ''; |
709
|
|
|
if ($this->passcookies && $this->_redirectaddr) |
710
|
|
|
$this->setcookies(); |
711
|
|
|
|
712
|
|
|
$URI_PARTS = parse_url($URI); |
713
|
|
|
if (empty($url)) |
714
|
|
|
$url = "/"; |
715
|
|
|
$headers = $http_method . " " . $url . " " . $this->_httpversion . "\r\n"; |
716
|
|
|
if (!empty($this->agent)) |
717
|
|
|
$headers .= "User-Agent: " . $this->agent . "\r\n"; |
718
|
|
|
if (!empty($this->host) && !isset($this->rawheaders['Host'])) { |
719
|
|
|
$headers .= "Host: " . $this->host; |
720
|
|
|
if (!empty($this->port) && $this->port != '80') |
721
|
|
|
$headers .= ":" . $this->port; |
722
|
|
|
$headers .= "\r\n"; |
723
|
|
|
} |
724
|
|
|
if (!empty($this->accept)) |
725
|
|
|
$headers .= "Accept: " . $this->accept . "\r\n"; |
726
|
|
|
if ($this->use_gzip) { |
727
|
|
|
// make sure PHP was built with --with-zlib |
728
|
|
|
// and we can handle gzipp'ed data |
729
|
|
|
if (function_exists('gzinflate')) { |
730
|
|
|
$headers .= "Accept-encoding: gzip\r\n"; |
731
|
|
|
} else { |
732
|
|
|
trigger_error( |
733
|
|
|
"use_gzip is on, but PHP was built without zlib support." . |
734
|
|
|
" Requesting file(s) without gzip encoding.", |
735
|
|
|
E_USER_NOTICE); |
736
|
|
|
} |
737
|
|
|
} |
738
|
|
|
if (!empty($this->referer)) |
739
|
|
|
$headers .= "Referer: " . $this->referer . "\r\n"; |
740
|
|
|
if (!empty($this->cookies)) { |
741
|
|
|
if (!is_array($this->cookies)) |
|
|
|
|
742
|
|
|
$this->cookies = (array)$this->cookies; |
743
|
|
|
|
744
|
|
|
reset($this->cookies); |
745
|
|
|
if (count($this->cookies) > 0) { |
746
|
|
|
$cookie_headers .= 'Cookie: '; |
747
|
|
|
foreach ($this->cookies as $cookieKey => $cookieVal) { |
748
|
|
|
$cookie_headers .= $cookieKey . "=" . urlencode($cookieVal) . "; "; |
749
|
|
|
} |
750
|
|
|
$headers .= substr($cookie_headers, 0, -2) . "\r\n"; |
751
|
|
|
} |
752
|
|
|
} |
753
|
|
|
if (!empty($this->rawheaders)) { |
754
|
|
|
if (!is_array($this->rawheaders)) |
|
|
|
|
755
|
|
|
$this->rawheaders = (array)$this->rawheaders; |
756
|
|
|
while (list($headerKey, $headerVal) = each($this->rawheaders)) |
|
|
|
|
757
|
|
|
$headers .= $headerKey . ": " . $headerVal . "\r\n"; |
758
|
|
|
} |
759
|
|
|
if (!empty($content_type)) { |
760
|
|
|
$headers .= "Content-type: $content_type"; |
761
|
|
|
if ($content_type == "multipart/form-data") |
762
|
|
|
$headers .= "; boundary=" . $this->_mime_boundary; |
763
|
|
|
$headers .= "\r\n"; |
764
|
|
|
} |
765
|
|
|
if (!empty($body)) |
766
|
|
|
$headers .= "Content-length: " . strlen($body) . "\r\n"; |
767
|
|
|
if (!empty($this->user) || !empty($this->pass)) |
768
|
|
|
$headers .= "Authorization: Basic " . base64_encode($this->user . ":" . $this->pass) . "\r\n"; |
769
|
|
|
|
770
|
|
|
//add proxy auth headers |
771
|
|
|
if (!empty($this->proxy_user)) |
772
|
|
|
$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass) . "\r\n"; |
773
|
|
|
|
774
|
|
|
|
775
|
|
|
$headers .= "\r\n"; |
776
|
|
|
|
777
|
|
|
// set the read timeout if needed |
778
|
|
|
if ($this->read_timeout > 0) |
779
|
|
|
socket_set_timeout($fp, $this->read_timeout); |
780
|
|
|
$this->timed_out = false; |
781
|
|
|
|
782
|
|
|
fwrite($fp, $headers . $body, strlen($headers . $body)); |
783
|
|
|
|
784
|
|
|
$this->_redirectaddr = false; |
785
|
|
|
unset($this->headers); |
786
|
|
|
|
787
|
|
|
// content was returned gzip encoded? |
788
|
|
|
$is_gzipped = false; |
789
|
|
|
|
790
|
|
|
while ($currentHeader = fgets($fp, $this->_maxlinelen)) { |
791
|
|
|
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { |
792
|
|
|
$this->status = -100; |
793
|
|
|
return false; |
794
|
|
|
} |
795
|
|
|
|
796
|
|
|
if ($currentHeader == "\r\n") |
797
|
|
|
break; |
798
|
|
|
|
799
|
|
|
// if a header begins with Location: or URI:, set the redirect |
800
|
|
|
if (preg_match("/^(Location:|URI:)/i", $currentHeader)) { |
801
|
|
|
// get URL portion of the redirect |
802
|
|
|
preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches); |
803
|
|
|
// look for :// in the Location header to see if hostname is included |
804
|
|
|
if (!preg_match("|\:\/\/|", $matches[2])) { |
805
|
|
|
// no host in the path, so prepend |
806
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port; |
807
|
|
|
// eliminate double slash |
808
|
|
|
if (!preg_match("|^/|", $matches[2])) |
809
|
|
|
$this->_redirectaddr .= "/" . $matches[2]; |
810
|
|
|
else |
811
|
|
|
$this->_redirectaddr .= $matches[2]; |
812
|
|
|
} else |
813
|
|
|
$this->_redirectaddr = $matches[2]; |
814
|
|
|
} |
815
|
|
|
|
816
|
|
|
if (preg_match("|^HTTP/|", $currentHeader)) { |
817
|
|
|
if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) { |
818
|
|
|
$this->status = $status[1]; |
819
|
|
|
} |
820
|
|
|
$this->response_code = $currentHeader; |
821
|
|
|
} |
822
|
|
|
|
823
|
|
|
if (preg_match("/Content-Encoding: gzip/", $currentHeader)) { |
824
|
|
|
$is_gzipped = true; |
825
|
|
|
} |
826
|
|
|
|
827
|
|
|
$this->headers[] = $currentHeader; |
828
|
|
|
} |
829
|
|
|
|
830
|
|
|
$results = ''; |
831
|
|
|
do { |
832
|
|
|
$_data = fread($fp, $this->maxlength); |
833
|
|
|
if (strlen($_data) == 0) { |
834
|
|
|
break; |
835
|
|
|
} |
836
|
|
|
$results .= $_data; |
837
|
|
|
} while (true); |
838
|
|
|
|
839
|
|
|
// gunzip |
840
|
|
|
if ($is_gzipped) { |
841
|
|
|
// per http://www.php.net/manual/en/function.gzencode.php |
842
|
|
|
$results = substr($results, 10); |
843
|
|
|
$results = gzinflate($results); |
844
|
|
|
} |
845
|
|
|
|
846
|
|
|
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { |
847
|
|
|
$this->status = -100; |
848
|
|
|
return false; |
849
|
|
|
} |
850
|
|
|
|
851
|
|
|
// check if there is a a redirect meta tag |
852
|
|
|
|
853
|
|
|
if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) { |
854
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1], $URI); |
855
|
|
|
} |
856
|
|
|
|
857
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
858
|
|
|
if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) { |
859
|
|
|
$this->results[] = $results; |
860
|
|
|
for ($x = 0; $x < count($match[1]); $x++) |
|
|
|
|
861
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host); |
862
|
|
|
} // have we already fetched framed content? |
863
|
|
|
elseif (is_array($this->results)) |
|
|
|
|
864
|
|
|
$this->results[] = $results; |
865
|
|
|
// no framed content |
866
|
|
|
else |
867
|
|
|
$this->results = $results; |
868
|
|
|
|
869
|
|
|
return true; |
870
|
|
|
} |
871
|
|
|
|
872
|
|
|
/*======================================================================*\ |
873
|
|
|
Function: _httpsrequest |
874
|
|
|
Purpose: go get the https data from the server using curl |
875
|
|
|
Input: $url the url to fetch |
876
|
|
|
$URI the full URI |
877
|
|
|
$body body contents to send if any (POST) |
878
|
|
|
Output: |
879
|
|
|
\*======================================================================*/ |
880
|
|
|
|
881
|
|
|
function _httpsrequest($url, $URI, $http_method, $content_type = "", $body = "") |
|
|
|
|
882
|
|
|
{ |
883
|
|
|
if ($this->passcookies && $this->_redirectaddr) |
884
|
|
|
$this->setcookies(); |
885
|
|
|
|
886
|
|
|
$headers = array(); |
887
|
|
|
|
888
|
|
|
$URI_PARTS = parse_url($URI); |
889
|
|
|
if (empty($url)) |
890
|
|
|
$url = "/"; |
|
|
|
|
891
|
|
|
// GET ... header not needed for curl |
892
|
|
|
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
893
|
|
|
if (!empty($this->agent)) |
894
|
|
|
$headers[] = "User-Agent: " . $this->agent; |
895
|
|
|
if (!empty($this->host)) |
896
|
|
|
if (!empty($this->port)) |
897
|
|
|
$headers[] = "Host: " . $this->host . ":" . $this->port; |
898
|
|
|
else |
899
|
|
|
$headers[] = "Host: " . $this->host; |
900
|
|
|
if (!empty($this->accept)) |
901
|
|
|
$headers[] = "Accept: " . $this->accept; |
902
|
|
|
if (!empty($this->referer)) |
903
|
|
|
$headers[] = "Referer: " . $this->referer; |
904
|
|
|
if (!empty($this->cookies)) { |
905
|
|
|
if (!is_array($this->cookies)) |
|
|
|
|
906
|
|
|
$this->cookies = (array)$this->cookies; |
907
|
|
|
|
908
|
|
|
reset($this->cookies); |
909
|
|
|
if (count($this->cookies) > 0) { |
910
|
|
|
$cookie_str = 'Cookie: '; |
911
|
|
|
foreach ($this->cookies as $cookieKey => $cookieVal) { |
912
|
|
|
$cookie_str .= $cookieKey . "=" . urlencode($cookieVal) . "; "; |
913
|
|
|
} |
914
|
|
|
$headers[] = substr($cookie_str, 0, -2); |
915
|
|
|
} |
916
|
|
|
} |
917
|
|
|
if (!empty($this->rawheaders)) { |
918
|
|
|
if (!is_array($this->rawheaders)) |
|
|
|
|
919
|
|
|
$this->rawheaders = (array)$this->rawheaders; |
920
|
|
|
while (list($headerKey, $headerVal) = each($this->rawheaders)) |
|
|
|
|
921
|
|
|
$headers[] = $headerKey . ": " . $headerVal; |
922
|
|
|
} |
923
|
|
|
if (!empty($content_type)) { |
924
|
|
|
if ($content_type == "multipart/form-data") |
925
|
|
|
$headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary; |
926
|
|
|
else |
927
|
|
|
$headers[] = "Content-type: $content_type"; |
928
|
|
|
} |
929
|
|
|
if (!empty($body)) |
930
|
|
|
$headers[] = "Content-length: " . strlen($body); |
931
|
|
|
if (!empty($this->user) || !empty($this->pass)) |
932
|
|
|
$headers[] = "Authorization: BASIC " . base64_encode($this->user . ":" . $this->pass); |
933
|
|
|
|
934
|
|
|
for ($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
|
|
|
|
935
|
|
|
$safer_header = strtr($headers[$curr_header], "\"", " "); |
936
|
|
|
$cmdline_params .= " -H \"" . $safer_header . "\""; |
|
|
|
|
937
|
|
|
} |
938
|
|
|
|
939
|
|
|
if (!empty($body)) |
940
|
|
|
$cmdline_params .= " -d \"$body\""; |
941
|
|
|
|
942
|
|
|
if ($this->read_timeout > 0) |
943
|
|
|
$cmdline_params .= " -m " . $this->read_timeout; |
944
|
|
|
|
945
|
|
|
$headerfile = tempnam($temp_dir, "sno"); |
|
|
|
|
946
|
|
|
|
947
|
|
|
exec($this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " " . escapeshellarg($URI), $results, $return); |
948
|
|
|
|
949
|
|
|
if ($return) { |
950
|
|
|
$this->error = "Error: cURL could not retrieve the document, error $return."; |
951
|
|
|
return false; |
952
|
|
|
} |
953
|
|
|
|
954
|
|
|
|
955
|
|
|
$results = implode("\r\n", $results); |
956
|
|
|
|
957
|
|
|
$result_headers = file("$headerfile"); |
958
|
|
|
|
959
|
|
|
$this->_redirectaddr = false; |
960
|
|
|
unset($this->headers); |
961
|
|
|
|
962
|
|
|
for ($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) { |
|
|
|
|
963
|
|
|
|
964
|
|
|
// if a header begins with Location: or URI:, set the redirect |
965
|
|
|
if (preg_match("/^(Location: |URI: )/i", $result_headers[$currentHeader])) { |
966
|
|
|
// get URL portion of the redirect |
967
|
|
|
preg_match("/^(Location: |URI:)\s+(.*)/", chop($result_headers[$currentHeader]), $matches); |
968
|
|
|
// look for :// in the Location header to see if hostname is included |
969
|
|
|
if (!preg_match("|\:\/\/|", $matches[2])) { |
970
|
|
|
// no host in the path, so prepend |
971
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port; |
972
|
|
|
// eliminate double slash |
973
|
|
|
if (!preg_match("|^/|", $matches[2])) |
974
|
|
|
$this->_redirectaddr .= "/" . $matches[2]; |
975
|
|
|
else |
976
|
|
|
$this->_redirectaddr .= $matches[2]; |
977
|
|
|
} else |
978
|
|
|
$this->_redirectaddr = $matches[2]; |
979
|
|
|
} |
980
|
|
|
|
981
|
|
|
if (preg_match("|^HTTP/|", $result_headers[$currentHeader])) { |
982
|
|
|
$this->response_code = $result_headers[$currentHeader]; |
983
|
|
|
if (preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match)) { |
984
|
|
|
$this->status = $match[1]; |
985
|
|
|
} |
986
|
|
|
} |
987
|
|
|
|
988
|
|
|
$this->headers[] = $result_headers[$currentHeader]; |
989
|
|
|
} |
990
|
|
|
|
991
|
|
|
// check if there is a a redirect meta tag |
992
|
|
|
|
993
|
|
|
if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) { |
994
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1], $URI); |
995
|
|
|
} |
996
|
|
|
|
997
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
998
|
|
|
if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) { |
999
|
|
|
$this->results[] = $results; |
1000
|
|
|
for ($x = 0; $x < count($match[1]); $x++) |
|
|
|
|
1001
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host); |
1002
|
|
|
} // have we already fetched framed content? |
1003
|
|
|
elseif (is_array($this->results)) |
|
|
|
|
1004
|
|
|
$this->results[] = $results; |
1005
|
|
|
// no framed content |
1006
|
|
|
else |
1007
|
|
|
$this->results = $results; |
1008
|
|
|
|
1009
|
|
|
unlink("$headerfile"); |
1010
|
|
|
|
1011
|
|
|
return true; |
1012
|
|
|
} |
1013
|
|
|
|
1014
|
|
|
/*======================================================================*\ |
1015
|
|
|
Function: setcookies() |
1016
|
|
|
Purpose: set cookies for a redirection |
1017
|
|
|
\*======================================================================*/ |
1018
|
|
|
|
1019
|
|
|
function setcookies() |
|
|
|
|
1020
|
|
|
{ |
1021
|
|
|
for ($x = 0; $x < count($this->headers); $x++) { |
|
|
|
|
1022
|
|
|
if (preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match)) |
1023
|
|
|
$this->cookies[$match[1]] = urldecode($match[2]); |
1024
|
|
|
} |
1025
|
|
|
} |
1026
|
|
|
|
1027
|
|
|
|
1028
|
|
|
/*======================================================================*\ |
1029
|
|
|
Function: _check_timeout |
1030
|
|
|
Purpose: checks whether timeout has occurred |
1031
|
|
|
Input: $fp file pointer |
1032
|
|
|
\*======================================================================*/ |
1033
|
|
|
|
1034
|
|
|
function _check_timeout($fp) |
|
|
|
|
1035
|
|
|
{ |
1036
|
|
|
if ($this->read_timeout > 0) { |
1037
|
|
|
$fp_status = socket_get_status($fp); |
1038
|
|
|
if ($fp_status["timed_out"]) { |
1039
|
|
|
$this->timed_out = true; |
1040
|
|
|
return true; |
1041
|
|
|
} |
1042
|
|
|
} |
1043
|
|
|
return false; |
1044
|
|
|
} |
1045
|
|
|
|
1046
|
|
|
/*======================================================================*\ |
1047
|
|
|
Function: _connect |
1048
|
|
|
Purpose: make a socket connection |
1049
|
|
|
Input: $fp file pointer |
1050
|
|
|
\*======================================================================*/ |
1051
|
|
|
|
1052
|
|
|
function _connect(&$fp) |
|
|
|
|
1053
|
|
|
{ |
1054
|
|
|
if (!empty($this->proxy_host) && !empty($this->proxy_port)) { |
1055
|
|
|
$this->_isproxy = true; |
1056
|
|
|
|
1057
|
|
|
$host = $this->proxy_host; |
1058
|
|
|
$port = $this->proxy_port; |
1059
|
|
|
} else { |
1060
|
|
|
$host = $this->host; |
1061
|
|
|
$port = $this->port; |
1062
|
|
|
} |
1063
|
|
|
|
1064
|
|
|
$this->status = 0; |
1065
|
|
|
|
1066
|
|
|
if ($fp = fsockopen( |
1067
|
|
|
$host, |
1068
|
|
|
$port, |
|
|
|
|
1069
|
|
|
$errno, |
1070
|
|
|
$errstr, |
1071
|
|
|
$this->_fp_timeout |
1072
|
|
|
) |
1073
|
|
|
) { |
1074
|
|
|
// socket connection succeeded |
1075
|
|
|
|
1076
|
|
|
return true; |
1077
|
|
|
} else { |
1078
|
|
|
// socket connection failed |
1079
|
|
|
$this->status = $errno; |
1080
|
|
|
switch ($errno) { |
1081
|
|
|
case -3: |
1082
|
|
|
$this->error = "socket creation failed (-3)"; |
|
|
|
|
1083
|
|
|
case -4: |
1084
|
|
|
$this->error = "dns lookup failure (-4)"; |
|
|
|
|
1085
|
|
|
case -5: |
1086
|
|
|
$this->error = "connection refused or timed out (-5)"; |
|
|
|
|
1087
|
|
|
default: |
1088
|
|
|
$this->error = "connection failed (" . $errno . ")"; |
1089
|
|
|
} |
1090
|
|
|
return false; |
1091
|
|
|
} |
1092
|
|
|
} |
1093
|
|
|
|
1094
|
|
|
/*======================================================================*\ |
1095
|
|
|
Function: _disconnect |
1096
|
|
|
Purpose: disconnect a socket connection |
1097
|
|
|
Input: $fp file pointer |
1098
|
|
|
\*======================================================================*/ |
1099
|
|
|
|
1100
|
|
|
function _disconnect($fp) |
|
|
|
|
1101
|
|
|
{ |
1102
|
|
|
return (fclose($fp)); |
1103
|
|
|
} |
1104
|
|
|
|
1105
|
|
|
|
1106
|
|
|
/*======================================================================*\ |
1107
|
|
|
Function: _prepare_post_body |
1108
|
|
|
Purpose: Prepare post body according to encoding type |
1109
|
|
|
Input: $formvars - form variables |
1110
|
|
|
$formfiles - form upload files |
1111
|
|
|
Output: post body |
1112
|
|
|
\*======================================================================*/ |
1113
|
|
|
|
1114
|
|
|
function _prepare_post_body($formvars, $formfiles) |
|
|
|
|
1115
|
|
|
{ |
1116
|
|
|
settype($formvars, "array"); |
1117
|
|
|
settype($formfiles, "array"); |
1118
|
|
|
$postdata = ''; |
1119
|
|
|
|
1120
|
|
|
if (count($formvars) == 0 && count($formfiles) == 0) |
1121
|
|
|
return; |
1122
|
|
|
|
1123
|
|
|
switch ($this->_submit_type) { |
1124
|
|
|
case "application/x-www-form-urlencoded": |
1125
|
|
|
reset($formvars); |
1126
|
|
|
while (list($key, $val) = each($formvars)) { |
|
|
|
|
1127
|
|
|
if (is_array($val) || is_object($val)) { |
1128
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
|
|
|
|
1129
|
|
|
$postdata .= urlencode($key) . "[]=" . urlencode($cur_val) . "&"; |
1130
|
|
|
} |
1131
|
|
|
} else |
1132
|
|
|
$postdata .= urlencode($key) . "=" . urlencode($val) . "&"; |
1133
|
|
|
} |
1134
|
|
|
break; |
1135
|
|
|
|
1136
|
|
|
case "multipart/form-data": |
1137
|
|
|
$this->_mime_boundary = "Snoopy" . md5(uniqid(microtime())); |
1138
|
|
|
|
1139
|
|
|
reset($formvars); |
1140
|
|
|
while (list($key, $val) = each($formvars)) { |
|
|
|
|
1141
|
|
|
if (is_array($val) || is_object($val)) { |
1142
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
|
|
|
|
1143
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1144
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
1145
|
|
|
$postdata .= "$cur_val\r\n"; |
1146
|
|
|
} |
1147
|
|
|
} else { |
1148
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1149
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
1150
|
|
|
$postdata .= "$val\r\n"; |
1151
|
|
|
} |
1152
|
|
|
} |
1153
|
|
|
|
1154
|
|
|
reset($formfiles); |
1155
|
|
|
while (list($field_name, $file_names) = each($formfiles)) { |
|
|
|
|
1156
|
|
|
settype($file_names, "array"); |
1157
|
|
|
while (list(, $file_name) = each($file_names)) { |
|
|
|
|
1158
|
|
|
if (!is_readable($file_name)) continue; |
1159
|
|
|
|
1160
|
|
|
$fp = fopen($file_name, "r"); |
1161
|
|
|
$file_content = fread($fp, filesize($file_name)); |
|
|
|
|
1162
|
|
|
fclose($fp); |
|
|
|
|
1163
|
|
|
$base_name = basename($file_name); |
1164
|
|
|
|
1165
|
|
|
$postdata .= "--" . $this->_mime_boundary . "\r\n"; |
1166
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
1167
|
|
|
$postdata .= "$file_content\r\n"; |
1168
|
|
|
} |
1169
|
|
|
} |
1170
|
|
|
$postdata .= "--" . $this->_mime_boundary . "--\r\n"; |
1171
|
|
|
break; |
1172
|
|
|
} |
1173
|
|
|
|
1174
|
|
|
return $postdata; |
1175
|
|
|
} |
1176
|
|
|
} |
1177
|
|
|
|
1178
|
|
|
?> |
|
|
|
|
1179
|
|
|
|
Adding explicit visibility (
private
,protected
, orpublic
) is generally recommend to communicate to other developers how, and from where this method is intended to be used.