1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Deprecated. Use WP_HTTP (http.php) instead. |
5
|
|
|
*/ |
6
|
|
|
_deprecated_file( basename( __FILE__ ), '3.0.0', WPINC . '/http.php' ); |
7
|
|
|
|
8
|
|
|
if ( ! class_exists( 'Snoopy', false ) ) : |
9
|
|
|
/************************************************* |
10
|
|
|
|
11
|
|
|
Snoopy - the PHP net client |
12
|
|
|
Author: Monte Ohrt <[email protected]> |
13
|
|
|
Copyright (c): 1999-2008 New Digital Group, all rights reserved |
14
|
|
|
Version: 1.2.4 |
15
|
|
|
|
16
|
|
|
* This library is free software; you can redistribute it and/or |
17
|
|
|
* modify it under the terms of the GNU Lesser General Public |
18
|
|
|
* License as published by the Free Software Foundation; either |
19
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
20
|
|
|
* |
21
|
|
|
* This library is distributed in the hope that it will be useful, |
22
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
23
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24
|
|
|
* Lesser General Public License for more details. |
25
|
|
|
* |
26
|
|
|
* You should have received a copy of the GNU Lesser General Public |
27
|
|
|
* License along with this library; if not, write to the Free Software |
28
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
29
|
|
|
|
30
|
|
|
You may contact the author of Snoopy by e-mail at: |
31
|
|
|
[email protected] |
32
|
|
|
|
33
|
|
|
The latest version of Snoopy can be obtained from: |
34
|
|
|
http://snoopy.sourceforge.net/ |
35
|
|
|
|
36
|
|
|
*************************************************/ |
37
|
|
|
|
38
|
|
|
class Snoopy |
39
|
|
|
{ |
40
|
|
|
/**** Public variables ****/ |
41
|
|
|
|
42
|
|
|
/* user definable vars */ |
43
|
|
|
|
44
|
|
|
var $host = "www.php.net"; // host name we are connecting to |
45
|
|
|
var $port = 80; // port we are connecting to |
46
|
|
|
var $proxy_host = ""; // proxy host to use |
47
|
|
|
var $proxy_port = ""; // proxy port to use |
48
|
|
|
var $proxy_user = ""; // proxy user to use |
49
|
|
|
var $proxy_pass = ""; // proxy password to use |
50
|
|
|
|
51
|
|
|
var $agent = "Snoopy v1.2.4"; // agent we masquerade as |
52
|
|
|
var $referer = ""; // referer info to pass |
53
|
|
|
var $cookies = array(); // array of cookies to pass |
54
|
|
|
// $cookies["username"]="joe"; |
55
|
|
|
var $rawheaders = array(); // array of raw headers to send |
56
|
|
|
// $rawheaders["Content-type"]="text/html"; |
57
|
|
|
|
58
|
|
|
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
59
|
|
|
var $lastredirectaddr = ""; // contains address of last redirected address |
60
|
|
|
var $offsiteok = true; // allows redirection off-site |
61
|
|
|
var $maxframes = 0; // frame content depth maximum. 0 = disallow |
62
|
|
|
var $expandlinks = true; // expand links to fully qualified URLs. |
63
|
|
|
// this only applies to fetchlinks() |
64
|
|
|
// submitlinks(), and submittext() |
65
|
|
|
var $passcookies = true; // pass set cookies back through redirects |
66
|
|
|
// NOTE: this currently does not respect |
67
|
|
|
// dates, domains or paths. |
68
|
|
|
|
69
|
|
|
var $user = ""; // user for http authentication |
70
|
|
|
var $pass = ""; // password for http authentication |
71
|
|
|
|
72
|
|
|
// http accept types |
73
|
|
|
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
74
|
|
|
|
75
|
|
|
var $results = ""; // where the content is put |
76
|
|
|
|
77
|
|
|
var $error = ""; // error messages sent here |
78
|
|
|
var $response_code = ""; // response code returned from server |
79
|
|
|
var $headers = array(); // headers returned from server sent here |
80
|
|
|
var $maxlength = 500000; // max return data length (body) |
81
|
|
|
var $read_timeout = 0; // timeout on read operations, in seconds |
82
|
|
|
// supported only since PHP 4 Beta 4 |
83
|
|
|
// set to 0 to disallow timeouts |
84
|
|
|
var $timed_out = false; // if a read operation timed out |
85
|
|
|
var $status = 0; // http request status |
86
|
|
|
|
87
|
|
|
var $temp_dir = "/tmp"; // temporary directory that the webserver |
88
|
|
|
// has permission to write to. |
89
|
|
|
// under Windows, this should be C:\temp |
90
|
|
|
|
91
|
|
|
var $curl_path = "/usr/local/bin/curl"; |
92
|
|
|
// Snoopy will use cURL for fetching |
93
|
|
|
// SSL content if a full system path to |
94
|
|
|
// the cURL binary is supplied here. |
95
|
|
|
// set to false if you do not have |
96
|
|
|
// cURL installed. See http://curl.haxx.se |
97
|
|
|
// for details on installing cURL. |
98
|
|
|
// Snoopy does *not* use the cURL |
99
|
|
|
// library functions built into php, |
100
|
|
|
// as these functions are not stable |
101
|
|
|
// as of this Snoopy release. |
102
|
|
|
|
103
|
|
|
/**** Private variables ****/ |
104
|
|
|
|
105
|
|
|
var $_maxlinelen = 4096; // max line length (headers) |
106
|
|
|
|
107
|
|
|
var $_httpmethod = "GET"; // default http request method |
108
|
|
|
var $_httpversion = "HTTP/1.0"; // default http request version |
109
|
|
|
var $_submit_method = "POST"; // default submit method |
110
|
|
|
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
111
|
|
|
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
112
|
|
|
var $_redirectaddr = false; // will be set if page fetched is a redirect |
113
|
|
|
var $_redirectdepth = 0; // increments on an http redirect |
114
|
|
|
var $_frameurls = array(); // frame src urls |
115
|
|
|
var $_framedepth = 0; // increments on frame depth |
116
|
|
|
|
117
|
|
|
var $_isproxy = false; // set if using a proxy server |
118
|
|
|
var $_fp_timeout = 30; // timeout for socket connection |
119
|
|
|
|
120
|
|
|
/*======================================================================*\ |
121
|
|
|
Function: fetch |
122
|
|
|
Purpose: fetch the contents of a web page |
123
|
|
|
(and possibly other protocols in the |
124
|
|
|
future like ftp, nntp, gopher, etc.) |
125
|
|
|
Input: $URI the location of the page to fetch |
126
|
|
|
Output: $this->results the output text from the fetch |
127
|
|
|
\*======================================================================*/ |
128
|
|
|
|
129
|
|
|
function fetch($URI) |
130
|
|
|
{ |
131
|
|
|
|
132
|
|
|
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
133
|
|
|
$URI_PARTS = parse_url($URI); |
134
|
|
|
if (!empty($URI_PARTS["user"])) |
135
|
|
|
$this->user = $URI_PARTS["user"]; |
136
|
|
|
if (!empty($URI_PARTS["pass"])) |
137
|
|
|
$this->pass = $URI_PARTS["pass"]; |
138
|
|
|
if (empty($URI_PARTS["query"])) |
139
|
|
|
$URI_PARTS["query"] = ''; |
140
|
|
|
if (empty($URI_PARTS["path"])) |
141
|
|
|
$URI_PARTS["path"] = ''; |
142
|
|
|
|
143
|
|
|
switch(strtolower($URI_PARTS["scheme"])) |
144
|
|
|
{ |
145
|
|
|
case "http": |
146
|
|
|
$this->host = $URI_PARTS["host"]; |
147
|
|
|
if(!empty($URI_PARTS["port"])) |
148
|
|
|
$this->port = $URI_PARTS["port"]; |
149
|
|
|
if($this->_connect($fp)) |
150
|
|
|
{ |
151
|
|
View Code Duplication |
if($this->_isproxy) |
152
|
|
|
{ |
153
|
|
|
// using proxy, send entire URI |
154
|
|
|
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod); |
155
|
|
|
} |
156
|
|
|
else |
157
|
|
|
{ |
158
|
|
|
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
159
|
|
|
// no proxy, send only the path |
160
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
$this->_disconnect($fp); |
164
|
|
|
|
165
|
|
View Code Duplication |
if($this->_redirectaddr) |
166
|
|
|
{ |
167
|
|
|
/* url was redirected, check if we've hit the max depth */ |
168
|
|
|
if($this->maxredirs > $this->_redirectdepth) |
169
|
|
|
{ |
170
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
171
|
|
|
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
172
|
|
|
{ |
173
|
|
|
/* follow the redirect */ |
174
|
|
|
$this->_redirectdepth++; |
175
|
|
|
$this->lastredirectaddr=$this->_redirectaddr; |
|
|
|
|
176
|
|
|
$this->fetch($this->_redirectaddr); |
177
|
|
|
} |
178
|
|
|
} |
179
|
|
|
} |
180
|
|
|
|
181
|
|
View Code Duplication |
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
182
|
|
|
{ |
183
|
|
|
$frameurls = $this->_frameurls; |
184
|
|
|
$this->_frameurls = array(); |
185
|
|
|
|
186
|
|
|
while(list(,$frameurl) = each($frameurls)) |
187
|
|
|
{ |
188
|
|
|
if($this->_framedepth < $this->maxframes) |
189
|
|
|
{ |
190
|
|
|
$this->fetch($frameurl); |
191
|
|
|
$this->_framedepth++; |
192
|
|
|
} |
193
|
|
|
else |
194
|
|
|
break; |
195
|
|
|
} |
196
|
|
|
} |
197
|
|
|
} |
198
|
|
|
else |
199
|
|
|
{ |
200
|
|
|
return false; |
201
|
|
|
} |
202
|
|
|
return true; |
203
|
|
|
break; |
|
|
|
|
204
|
|
|
case "https": |
205
|
|
|
if(!$this->curl_path) |
206
|
|
|
return false; |
207
|
|
|
if(function_exists("is_executable")) |
208
|
|
|
if (!is_executable($this->curl_path)) |
209
|
|
|
return false; |
210
|
|
|
$this->host = $URI_PARTS["host"]; |
211
|
|
|
if(!empty($URI_PARTS["port"])) |
212
|
|
|
$this->port = $URI_PARTS["port"]; |
213
|
|
View Code Duplication |
if($this->_isproxy) |
214
|
|
|
{ |
215
|
|
|
// using proxy, send entire URI |
216
|
|
|
$this->_httpsrequest($URI,$URI,$this->_httpmethod); |
217
|
|
|
} |
218
|
|
|
else |
219
|
|
|
{ |
220
|
|
|
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
221
|
|
|
// no proxy, send only the path |
222
|
|
|
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
223
|
|
|
} |
224
|
|
|
|
225
|
|
View Code Duplication |
if($this->_redirectaddr) |
226
|
|
|
{ |
227
|
|
|
/* url was redirected, check if we've hit the max depth */ |
228
|
|
|
if($this->maxredirs > $this->_redirectdepth) |
229
|
|
|
{ |
230
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
231
|
|
|
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
232
|
|
|
{ |
233
|
|
|
/* follow the redirect */ |
234
|
|
|
$this->_redirectdepth++; |
235
|
|
|
$this->lastredirectaddr=$this->_redirectaddr; |
236
|
|
|
$this->fetch($this->_redirectaddr); |
237
|
|
|
} |
238
|
|
|
} |
239
|
|
|
} |
240
|
|
|
|
241
|
|
View Code Duplication |
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
242
|
|
|
{ |
243
|
|
|
$frameurls = $this->_frameurls; |
244
|
|
|
$this->_frameurls = array(); |
245
|
|
|
|
246
|
|
|
while(list(,$frameurl) = each($frameurls)) |
247
|
|
|
{ |
248
|
|
|
if($this->_framedepth < $this->maxframes) |
249
|
|
|
{ |
250
|
|
|
$this->fetch($frameurl); |
251
|
|
|
$this->_framedepth++; |
252
|
|
|
} |
253
|
|
|
else |
254
|
|
|
break; |
255
|
|
|
} |
256
|
|
|
} |
257
|
|
|
return true; |
258
|
|
|
break; |
|
|
|
|
259
|
|
|
default: |
260
|
|
|
// not a valid protocol |
261
|
|
|
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
262
|
|
|
return false; |
263
|
|
|
break; |
|
|
|
|
264
|
|
|
} |
265
|
|
|
return true; |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
/*======================================================================*\ |
269
|
|
|
Function: submit |
270
|
|
|
Purpose: submit an http form |
271
|
|
|
Input: $URI the location to post the data |
272
|
|
|
$formvars the formvars to use. |
273
|
|
|
format: $formvars["var"] = "val"; |
274
|
|
|
$formfiles an array of files to submit |
275
|
|
|
format: $formfiles["var"] = "/dir/filename.ext"; |
276
|
|
|
Output: $this->results the text output from the post |
277
|
|
|
\*======================================================================*/ |
278
|
|
|
|
279
|
|
|
function submit($URI, $formvars="", $formfiles="") |
280
|
|
|
{ |
281
|
|
|
unset($postdata); |
282
|
|
|
|
283
|
|
|
$postdata = $this->_prepare_post_body($formvars, $formfiles); |
284
|
|
|
|
285
|
|
|
$URI_PARTS = parse_url($URI); |
286
|
|
|
if (!empty($URI_PARTS["user"])) |
287
|
|
|
$this->user = $URI_PARTS["user"]; |
288
|
|
|
if (!empty($URI_PARTS["pass"])) |
289
|
|
|
$this->pass = $URI_PARTS["pass"]; |
290
|
|
|
if (empty($URI_PARTS["query"])) |
291
|
|
|
$URI_PARTS["query"] = ''; |
292
|
|
|
if (empty($URI_PARTS["path"])) |
293
|
|
|
$URI_PARTS["path"] = ''; |
294
|
|
|
|
295
|
|
|
switch(strtolower($URI_PARTS["scheme"])) |
296
|
|
|
{ |
297
|
|
|
case "http": |
298
|
|
|
$this->host = $URI_PARTS["host"]; |
299
|
|
|
if(!empty($URI_PARTS["port"])) |
300
|
|
|
$this->port = $URI_PARTS["port"]; |
301
|
|
|
if($this->_connect($fp)) |
302
|
|
|
{ |
303
|
|
View Code Duplication |
if($this->_isproxy) |
304
|
|
|
{ |
305
|
|
|
// using proxy, send entire URI |
306
|
|
|
$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata); |
307
|
|
|
} |
308
|
|
|
else |
309
|
|
|
{ |
310
|
|
|
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
311
|
|
|
// no proxy, send only the path |
312
|
|
|
$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
$this->_disconnect($fp); |
316
|
|
|
|
317
|
|
View Code Duplication |
if($this->_redirectaddr) |
318
|
|
|
{ |
319
|
|
|
/* url was redirected, check if we've hit the max depth */ |
320
|
|
|
if($this->maxredirs > $this->_redirectdepth) |
321
|
|
|
{ |
322
|
|
|
if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) |
323
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); |
324
|
|
|
|
325
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
326
|
|
|
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
327
|
|
|
{ |
328
|
|
|
/* follow the redirect */ |
329
|
|
|
$this->_redirectdepth++; |
330
|
|
|
$this->lastredirectaddr=$this->_redirectaddr; |
331
|
|
|
if( strpos( $this->_redirectaddr, "?" ) > 0 ) |
332
|
|
|
$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
333
|
|
|
else |
334
|
|
|
$this->submit($this->_redirectaddr,$formvars, $formfiles); |
335
|
|
|
} |
336
|
|
|
} |
337
|
|
|
} |
338
|
|
|
|
339
|
|
View Code Duplication |
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
340
|
|
|
{ |
341
|
|
|
$frameurls = $this->_frameurls; |
342
|
|
|
$this->_frameurls = array(); |
343
|
|
|
|
344
|
|
|
while(list(,$frameurl) = each($frameurls)) |
345
|
|
|
{ |
346
|
|
|
if($this->_framedepth < $this->maxframes) |
347
|
|
|
{ |
348
|
|
|
$this->fetch($frameurl); |
349
|
|
|
$this->_framedepth++; |
350
|
|
|
} |
351
|
|
|
else |
352
|
|
|
break; |
353
|
|
|
} |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
} |
357
|
|
|
else |
358
|
|
|
{ |
359
|
|
|
return false; |
360
|
|
|
} |
361
|
|
|
return true; |
362
|
|
|
break; |
|
|
|
|
363
|
|
|
case "https": |
364
|
|
|
if(!$this->curl_path) |
365
|
|
|
return false; |
366
|
|
|
if(function_exists("is_executable")) |
367
|
|
|
if (!is_executable($this->curl_path)) |
368
|
|
|
return false; |
369
|
|
|
$this->host = $URI_PARTS["host"]; |
370
|
|
|
if(!empty($URI_PARTS["port"])) |
371
|
|
|
$this->port = $URI_PARTS["port"]; |
372
|
|
View Code Duplication |
if($this->_isproxy) |
373
|
|
|
{ |
374
|
|
|
// using proxy, send entire URI |
375
|
|
|
$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
376
|
|
|
} |
377
|
|
|
else |
378
|
|
|
{ |
379
|
|
|
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
380
|
|
|
// no proxy, send only the path |
381
|
|
|
$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
382
|
|
|
} |
383
|
|
|
|
384
|
|
View Code Duplication |
if($this->_redirectaddr) |
385
|
|
|
{ |
386
|
|
|
/* url was redirected, check if we've hit the max depth */ |
387
|
|
|
if($this->maxredirs > $this->_redirectdepth) |
388
|
|
|
{ |
389
|
|
|
if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) |
390
|
|
|
$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); |
391
|
|
|
|
392
|
|
|
// only follow redirect if it's on this site, or offsiteok is true |
393
|
|
|
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
394
|
|
|
{ |
395
|
|
|
/* follow the redirect */ |
396
|
|
|
$this->_redirectdepth++; |
397
|
|
|
$this->lastredirectaddr=$this->_redirectaddr; |
398
|
|
|
if( strpos( $this->_redirectaddr, "?" ) > 0 ) |
399
|
|
|
$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
400
|
|
|
else |
401
|
|
|
$this->submit($this->_redirectaddr,$formvars, $formfiles); |
402
|
|
|
} |
403
|
|
|
} |
404
|
|
|
} |
405
|
|
|
|
406
|
|
View Code Duplication |
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
407
|
|
|
{ |
408
|
|
|
$frameurls = $this->_frameurls; |
409
|
|
|
$this->_frameurls = array(); |
410
|
|
|
|
411
|
|
|
while(list(,$frameurl) = each($frameurls)) |
412
|
|
|
{ |
413
|
|
|
if($this->_framedepth < $this->maxframes) |
414
|
|
|
{ |
415
|
|
|
$this->fetch($frameurl); |
416
|
|
|
$this->_framedepth++; |
417
|
|
|
} |
418
|
|
|
else |
419
|
|
|
break; |
420
|
|
|
} |
421
|
|
|
} |
422
|
|
|
return true; |
423
|
|
|
break; |
|
|
|
|
424
|
|
|
|
425
|
|
|
default: |
426
|
|
|
// not a valid protocol |
427
|
|
|
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
428
|
|
|
return false; |
429
|
|
|
break; |
|
|
|
|
430
|
|
|
} |
431
|
|
|
return true; |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
/*======================================================================*\ |
435
|
|
|
Function: fetchlinks |
436
|
|
|
Purpose: fetch the links from a web page |
437
|
|
|
Input: $URI where you are fetching from |
438
|
|
|
Output: $this->results an array of the URLs |
439
|
|
|
\*======================================================================*/ |
440
|
|
|
|
441
|
|
|
function fetchlinks($URI) |
442
|
|
|
{ |
443
|
|
|
if ($this->fetch($URI)) |
444
|
|
|
{ |
445
|
|
|
if($this->lastredirectaddr) |
446
|
|
|
$URI = $this->lastredirectaddr; |
447
|
|
|
if(is_array($this->results)) |
448
|
|
|
{ |
449
|
|
|
for($x=0;$x<count($this->results);$x++) |
|
|
|
|
450
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
451
|
|
|
} |
452
|
|
|
else |
453
|
|
|
$this->results = $this->_striplinks($this->results); |
|
|
|
|
454
|
|
|
|
455
|
|
|
if($this->expandlinks) |
456
|
|
|
$this->results = $this->_expandlinks($this->results, $URI); |
457
|
|
|
return true; |
458
|
|
|
} |
459
|
|
|
else |
460
|
|
|
return false; |
461
|
|
|
} |
462
|
|
|
|
463
|
|
|
/*======================================================================*\ |
464
|
|
|
Function: fetchform |
465
|
|
|
Purpose: fetch the form elements from a web page |
466
|
|
|
Input: $URI where you are fetching from |
467
|
|
|
Output: $this->results the resulting html form |
468
|
|
|
\*======================================================================*/ |
469
|
|
|
|
470
|
|
View Code Duplication |
function fetchform($URI) |
471
|
|
|
{ |
472
|
|
|
|
473
|
|
|
if ($this->fetch($URI)) |
474
|
|
|
{ |
475
|
|
|
|
476
|
|
|
if(is_array($this->results)) |
477
|
|
|
{ |
478
|
|
|
for($x=0;$x<count($this->results);$x++) |
|
|
|
|
479
|
|
|
$this->results[$x] = $this->_stripform($this->results[$x]); |
480
|
|
|
} |
481
|
|
|
else |
482
|
|
|
$this->results = $this->_stripform($this->results); |
483
|
|
|
|
484
|
|
|
return true; |
485
|
|
|
} |
486
|
|
|
else |
487
|
|
|
return false; |
488
|
|
|
} |
489
|
|
|
|
490
|
|
|
|
491
|
|
|
/*======================================================================*\ |
492
|
|
|
Function: fetchtext |
493
|
|
|
Purpose: fetch the text from a web page, stripping the links |
494
|
|
|
Input: $URI where you are fetching from |
495
|
|
|
Output: $this->results the text from the web page |
496
|
|
|
\*======================================================================*/ |
497
|
|
|
|
498
|
|
View Code Duplication |
function fetchtext($URI) |
499
|
|
|
{ |
500
|
|
|
if($this->fetch($URI)) |
501
|
|
|
{ |
502
|
|
|
if(is_array($this->results)) |
503
|
|
|
{ |
504
|
|
|
for($x=0;$x<count($this->results);$x++) |
|
|
|
|
505
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
506
|
|
|
} |
507
|
|
|
else |
508
|
|
|
$this->results = $this->_striptext($this->results); |
509
|
|
|
return true; |
510
|
|
|
} |
511
|
|
|
else |
512
|
|
|
return false; |
513
|
|
|
} |
514
|
|
|
|
515
|
|
|
/*======================================================================*\ |
516
|
|
|
Function: submitlinks |
517
|
|
|
Purpose: grab links from a form submission |
518
|
|
|
Input: $URI where you are submitting from |
519
|
|
|
Output: $this->results an array of the links from the post |
520
|
|
|
\*======================================================================*/ |
521
|
|
|
|
522
|
|
View Code Duplication |
function submitlinks($URI, $formvars="", $formfiles="") |
523
|
|
|
{ |
524
|
|
|
if($this->submit($URI,$formvars, $formfiles)) |
525
|
|
|
{ |
526
|
|
|
if($this->lastredirectaddr) |
527
|
|
|
$URI = $this->lastredirectaddr; |
528
|
|
|
if(is_array($this->results)) |
529
|
|
|
{ |
530
|
|
|
for($x=0;$x<count($this->results);$x++) |
|
|
|
|
531
|
|
|
{ |
532
|
|
|
$this->results[$x] = $this->_striplinks($this->results[$x]); |
533
|
|
|
if($this->expandlinks) |
534
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x],$URI); |
535
|
|
|
} |
536
|
|
|
} |
537
|
|
|
else |
538
|
|
|
{ |
539
|
|
|
$this->results = $this->_striplinks($this->results); |
|
|
|
|
540
|
|
|
if($this->expandlinks) |
541
|
|
|
$this->results = $this->_expandlinks($this->results,$URI); |
542
|
|
|
} |
543
|
|
|
return true; |
544
|
|
|
} |
545
|
|
|
else |
546
|
|
|
return false; |
547
|
|
|
} |
548
|
|
|
|
549
|
|
|
/*======================================================================*\ |
550
|
|
|
Function: submittext |
551
|
|
|
Purpose: grab text from a form submission |
552
|
|
|
Input: $URI where you are submitting from |
553
|
|
|
Output: $this->results the text from the web page |
554
|
|
|
\*======================================================================*/ |
555
|
|
|
|
556
|
|
View Code Duplication |
function submittext($URI, $formvars = "", $formfiles = "") |
557
|
|
|
{ |
558
|
|
|
if($this->submit($URI,$formvars, $formfiles)) |
559
|
|
|
{ |
560
|
|
|
if($this->lastredirectaddr) |
561
|
|
|
$URI = $this->lastredirectaddr; |
562
|
|
|
if(is_array($this->results)) |
563
|
|
|
{ |
564
|
|
|
for($x=0;$x<count($this->results);$x++) |
|
|
|
|
565
|
|
|
{ |
566
|
|
|
$this->results[$x] = $this->_striptext($this->results[$x]); |
567
|
|
|
if($this->expandlinks) |
568
|
|
|
$this->results[$x] = $this->_expandlinks($this->results[$x],$URI); |
569
|
|
|
} |
570
|
|
|
} |
571
|
|
|
else |
572
|
|
|
{ |
573
|
|
|
$this->results = $this->_striptext($this->results); |
574
|
|
|
if($this->expandlinks) |
575
|
|
|
$this->results = $this->_expandlinks($this->results,$URI); |
576
|
|
|
} |
577
|
|
|
return true; |
578
|
|
|
} |
579
|
|
|
else |
580
|
|
|
return false; |
581
|
|
|
} |
582
|
|
|
|
583
|
|
|
|
584
|
|
|
|
585
|
|
|
/*======================================================================*\ |
586
|
|
|
Function: set_submit_multipart |
587
|
|
|
Purpose: Set the form submission content type to |
588
|
|
|
multipart/form-data |
589
|
|
|
\*======================================================================*/ |
590
|
|
|
function set_submit_multipart() |
591
|
|
|
{ |
592
|
|
|
$this->_submit_type = "multipart/form-data"; |
593
|
|
|
} |
594
|
|
|
|
595
|
|
|
|
596
|
|
|
/*======================================================================*\ |
597
|
|
|
Function: set_submit_normal |
598
|
|
|
Purpose: Set the form submission content type to |
599
|
|
|
application/x-www-form-urlencoded |
600
|
|
|
\*======================================================================*/ |
601
|
|
|
function set_submit_normal() |
602
|
|
|
{ |
603
|
|
|
$this->_submit_type = "application/x-www-form-urlencoded"; |
604
|
|
|
} |
605
|
|
|
|
606
|
|
|
|
607
|
|
|
|
608
|
|
|
|
609
|
|
|
/*======================================================================*\ |
610
|
|
|
Private functions |
611
|
|
|
\*======================================================================*/ |
612
|
|
|
|
613
|
|
|
|
614
|
|
|
/*======================================================================*\ |
615
|
|
|
Function: _striplinks |
616
|
|
|
Purpose: strip the hyperlinks from an html document |
617
|
|
|
Input: $document document to strip. |
618
|
|
|
Output: $match an array of the links |
619
|
|
|
\*======================================================================*/ |
620
|
|
|
|
621
|
|
|
function _striplinks($document) |
622
|
|
|
{ |
623
|
|
|
preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
624
|
|
|
([\"\'])? # find single or double quote |
625
|
|
|
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
626
|
|
|
# quote, otherwise match up to next space |
627
|
|
|
'isx",$document,$links); |
628
|
|
|
|
629
|
|
|
|
630
|
|
|
// catenate the non-empty matches from the conditional subpattern |
631
|
|
|
|
632
|
|
View Code Duplication |
while(list($key,$val) = each($links[2])) |
|
|
|
|
633
|
|
|
{ |
634
|
|
|
if(!empty($val)) |
635
|
|
|
$match[] = $val; |
|
|
|
|
636
|
|
|
} |
637
|
|
|
|
638
|
|
View Code Duplication |
while(list($key,$val) = each($links[3])) |
|
|
|
|
639
|
|
|
{ |
640
|
|
|
if(!empty($val)) |
641
|
|
|
$match[] = $val; |
|
|
|
|
642
|
|
|
} |
643
|
|
|
|
644
|
|
|
// return the links |
645
|
|
|
return $match; |
646
|
|
|
} |
647
|
|
|
|
648
|
|
|
/*======================================================================*\ |
649
|
|
|
Function: _stripform |
650
|
|
|
Purpose: strip the form elements from an html document |
651
|
|
|
Input: $document document to strip. |
652
|
|
|
Output: $match an array of the links |
653
|
|
|
\*======================================================================*/ |
654
|
|
|
|
655
|
|
|
function _stripform($document) |
656
|
|
|
{ |
657
|
|
|
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); |
658
|
|
|
|
659
|
|
|
// catenate the matches |
660
|
|
|
$match = implode("\r\n",$elements[0]); |
661
|
|
|
|
662
|
|
|
// return the links |
663
|
|
|
return $match; |
664
|
|
|
} |
665
|
|
|
|
666
|
|
|
|
667
|
|
|
|
668
|
|
|
/*======================================================================*\ |
669
|
|
|
Function: _striptext |
670
|
|
|
Purpose: strip the text from an html document |
671
|
|
|
Input: $document document to strip. |
672
|
|
|
Output: $text the resulting text |
673
|
|
|
\*======================================================================*/ |
674
|
|
|
|
675
|
|
|
function _striptext($document) |
|
|
|
|
676
|
|
|
{ |
677
|
|
|
|
678
|
|
|
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
679
|
|
|
// so, list your entities one by one here. I included some of the |
680
|
|
|
// more common ones. |
681
|
|
|
|
682
|
|
|
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
683
|
|
|
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
684
|
|
|
"'([\r\n])[\s]+'", // strip out white space |
685
|
|
|
"'&(quot|#34|#034|#x22);'i", // replace html entities |
686
|
|
|
"'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
687
|
|
|
"'&(lt|#60|#060|#x3c);'i", |
688
|
|
|
"'&(gt|#62|#062|#x3e);'i", |
689
|
|
|
"'&(nbsp|#160|#xa0);'i", |
690
|
|
|
"'&(iexcl|#161);'i", |
691
|
|
|
"'&(cent|#162);'i", |
692
|
|
|
"'&(pound|#163);'i", |
693
|
|
|
"'&(copy|#169);'i", |
694
|
|
|
"'&(reg|#174);'i", |
695
|
|
|
"'&(deg|#176);'i", |
696
|
|
|
"'&(#39|#039|#x27);'", |
697
|
|
|
"'&(euro|#8364);'i", // europe |
698
|
|
|
"'&a(uml|UML);'", // german |
699
|
|
|
"'&o(uml|UML);'", |
700
|
|
|
"'&u(uml|UML);'", |
701
|
|
|
"'&A(uml|UML);'", |
702
|
|
|
"'&O(uml|UML);'", |
703
|
|
|
"'&U(uml|UML);'", |
704
|
|
|
"'ß'i", |
705
|
|
|
); |
706
|
|
|
$replace = array( "", |
707
|
|
|
"", |
708
|
|
|
"\\1", |
709
|
|
|
"\"", |
710
|
|
|
"&", |
711
|
|
|
"<", |
712
|
|
|
">", |
713
|
|
|
" ", |
714
|
|
|
chr(161), |
715
|
|
|
chr(162), |
716
|
|
|
chr(163), |
717
|
|
|
chr(169), |
718
|
|
|
chr(174), |
719
|
|
|
chr(176), |
720
|
|
|
chr(39), |
721
|
|
|
chr(128), |
722
|
|
|
chr(0xE4), // ANSI ä |
723
|
|
|
chr(0xF6), // ANSI ö |
724
|
|
|
chr(0xFC), // ANSI ü |
725
|
|
|
chr(0xC4), // ANSI Ä |
726
|
|
|
chr(0xD6), // ANSI Ö |
727
|
|
|
chr(0xDC), // ANSI Ü |
728
|
|
|
chr(0xDF), // ANSI ß |
729
|
|
|
); |
730
|
|
|
|
731
|
|
|
$text = preg_replace($search,$replace,$document); |
732
|
|
|
|
733
|
|
|
return $text; |
734
|
|
|
} |
735
|
|
|
|
736
|
|
|
/*======================================================================*\ |
737
|
|
|
Function: _expandlinks |
738
|
|
|
Purpose: expand each link into a fully qualified URL |
739
|
|
|
Input: $links the links to qualify |
740
|
|
|
$URI the full URI to get the base from |
741
|
|
|
Output: $expandedLinks the expanded links |
742
|
|
|
\*======================================================================*/ |
743
|
|
|
|
744
|
|
|
function _expandlinks($links,$URI) |
|
|
|
|
745
|
|
|
{ |
746
|
|
|
|
747
|
|
|
preg_match("/^[^\?]+/",$URI,$match); |
748
|
|
|
|
749
|
|
|
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); |
750
|
|
|
$match = preg_replace("|/$|","",$match); |
751
|
|
|
$match_part = parse_url($match); |
752
|
|
|
$match_root = |
753
|
|
|
$match_part["scheme"]."://".$match_part["host"]; |
754
|
|
|
|
755
|
|
|
$search = array( "|^http://".preg_quote($this->host)."|i", |
756
|
|
|
"|^(\/)|i", |
757
|
|
|
"|^(?!http://)(?!mailto:)|i", |
758
|
|
|
"|/\./|", |
759
|
|
|
"|/[^\/]+/\.\./|" |
760
|
|
|
); |
761
|
|
|
|
762
|
|
|
$replace = array( "", |
763
|
|
|
$match_root."/", |
764
|
|
|
$match."/", |
765
|
|
|
"/", |
766
|
|
|
"/" |
767
|
|
|
); |
768
|
|
|
|
769
|
|
|
$expandedLinks = preg_replace($search,$replace,$links); |
770
|
|
|
|
771
|
|
|
return $expandedLinks; |
772
|
|
|
} |
773
|
|
|
|
774
|
|
|
/*======================================================================*\ |
775
|
|
|
Function: _httprequest |
776
|
|
|
Purpose: go get the http data from the server |
777
|
|
|
Input: $url the url to fetch |
778
|
|
|
$fp the current open file pointer |
779
|
|
|
$URI the full URI |
780
|
|
|
$body body contents to send if any (POST) |
781
|
|
|
Output: |
782
|
|
|
\*======================================================================*/ |
783
|
|
|
|
784
|
|
|
function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") |
785
|
|
|
{ |
786
|
|
|
$cookie_headers = ''; |
787
|
|
|
if($this->passcookies && $this->_redirectaddr) |
788
|
|
|
$this->setcookies(); |
789
|
|
|
|
790
|
|
|
$URI_PARTS = parse_url($URI); |
791
|
|
|
if(empty($url)) |
792
|
|
|
$url = "/"; |
793
|
|
|
$headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; |
794
|
|
|
if(!empty($this->agent)) |
795
|
|
|
$headers .= "User-Agent: ".$this->agent."\r\n"; |
796
|
|
|
if(!empty($this->host) && !isset($this->rawheaders['Host'])) { |
797
|
|
|
$headers .= "Host: ".$this->host; |
798
|
|
|
if(!empty($this->port) && $this->port != 80) |
799
|
|
|
$headers .= ":".$this->port; |
800
|
|
|
$headers .= "\r\n"; |
801
|
|
|
} |
802
|
|
|
if(!empty($this->accept)) |
803
|
|
|
$headers .= "Accept: ".$this->accept."\r\n"; |
804
|
|
|
if(!empty($this->referer)) |
805
|
|
|
$headers .= "Referer: ".$this->referer."\r\n"; |
806
|
|
|
if(!empty($this->cookies)) |
807
|
|
|
{ |
808
|
|
|
if(!is_array($this->cookies)) |
809
|
|
|
$this->cookies = (array)$this->cookies; |
810
|
|
|
|
811
|
|
|
reset($this->cookies); |
812
|
|
|
if ( count($this->cookies) > 0 ) { |
813
|
|
|
$cookie_headers .= 'Cookie: '; |
814
|
|
View Code Duplication |
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
815
|
|
|
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; |
816
|
|
|
} |
817
|
|
|
$headers .= substr($cookie_headers,0,-2) . "\r\n"; |
818
|
|
|
} |
819
|
|
|
} |
820
|
|
View Code Duplication |
if(!empty($this->rawheaders)) |
821
|
|
|
{ |
822
|
|
|
if(!is_array($this->rawheaders)) |
823
|
|
|
$this->rawheaders = (array)$this->rawheaders; |
824
|
|
|
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
825
|
|
|
$headers .= $headerKey.": ".$headerVal."\r\n"; |
826
|
|
|
} |
827
|
|
|
if(!empty($content_type)) { |
828
|
|
|
$headers .= "Content-type: $content_type"; |
829
|
|
|
if ($content_type == "multipart/form-data") |
830
|
|
|
$headers .= "; boundary=".$this->_mime_boundary; |
831
|
|
|
$headers .= "\r\n"; |
832
|
|
|
} |
833
|
|
|
if(!empty($body)) |
834
|
|
|
$headers .= "Content-length: ".strlen($body)."\r\n"; |
835
|
|
View Code Duplication |
if(!empty($this->user) || !empty($this->pass)) |
836
|
|
|
$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n"; |
837
|
|
|
|
838
|
|
|
//add proxy auth headers |
839
|
|
View Code Duplication |
if(!empty($this->proxy_user)) |
840
|
|
|
$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n"; |
841
|
|
|
|
842
|
|
|
|
843
|
|
|
$headers .= "\r\n"; |
844
|
|
|
|
845
|
|
|
// set the read timeout if needed |
846
|
|
|
if ($this->read_timeout > 0) |
847
|
|
|
socket_set_timeout($fp, $this->read_timeout); |
848
|
|
|
$this->timed_out = false; |
849
|
|
|
|
850
|
|
|
fwrite($fp,$headers.$body,strlen($headers.$body)); |
851
|
|
|
|
852
|
|
|
$this->_redirectaddr = false; |
853
|
|
|
unset($this->headers); |
854
|
|
|
|
855
|
|
|
while($currentHeader = fgets($fp,$this->_maxlinelen)) |
856
|
|
|
{ |
857
|
|
View Code Duplication |
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
858
|
|
|
{ |
859
|
|
|
$this->status=-100; |
860
|
|
|
return false; |
861
|
|
|
} |
862
|
|
|
|
863
|
|
|
if($currentHeader == "\r\n") |
864
|
|
|
break; |
865
|
|
|
|
866
|
|
|
// if a header begins with Location: or URI:, set the redirect |
867
|
|
View Code Duplication |
if(preg_match("/^(Location:|URI:)/i",$currentHeader)) |
868
|
|
|
{ |
869
|
|
|
// get URL portion of the redirect |
870
|
|
|
preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches); |
871
|
|
|
// look for :// in the Location header to see if hostname is included |
872
|
|
|
if(!preg_match("|\:\/\/|",$matches[2])) |
873
|
|
|
{ |
874
|
|
|
// no host in the path, so prepend |
875
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
|
|
|
|
876
|
|
|
// eliminate double slash |
877
|
|
|
if(!preg_match("|^/|",$matches[2])) |
878
|
|
|
$this->_redirectaddr .= "/".$matches[2]; |
879
|
|
|
else |
880
|
|
|
$this->_redirectaddr .= $matches[2]; |
881
|
|
|
} |
882
|
|
|
else |
883
|
|
|
$this->_redirectaddr = $matches[2]; |
|
|
|
|
884
|
|
|
} |
885
|
|
|
|
886
|
|
|
if(preg_match("|^HTTP/|",$currentHeader)) |
887
|
|
|
{ |
888
|
|
|
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) |
889
|
|
|
{ |
890
|
|
|
$this->status= $status[1]; |
|
|
|
|
891
|
|
|
} |
892
|
|
|
$this->response_code = $currentHeader; |
893
|
|
|
} |
894
|
|
|
|
895
|
|
|
$this->headers[] = $currentHeader; |
896
|
|
|
} |
897
|
|
|
|
898
|
|
|
$results = ''; |
899
|
|
|
do { |
900
|
|
|
$_data = fread($fp, $this->maxlength); |
901
|
|
|
if (strlen($_data) == 0) { |
902
|
|
|
break; |
903
|
|
|
} |
904
|
|
|
$results .= $_data; |
905
|
|
|
} while(true); |
906
|
|
|
|
907
|
|
View Code Duplication |
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
908
|
|
|
{ |
909
|
|
|
$this->status=-100; |
910
|
|
|
return false; |
911
|
|
|
} |
912
|
|
|
|
913
|
|
|
// check if there is a redirect meta tag |
914
|
|
|
|
915
|
|
|
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
916
|
|
|
|
917
|
|
|
{ |
918
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
919
|
|
|
} |
920
|
|
|
|
921
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
922
|
|
View Code Duplication |
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
923
|
|
|
{ |
924
|
|
|
$this->results[] = $results; |
925
|
|
|
for($x=0; $x<count($match[1]); $x++) |
|
|
|
|
926
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
927
|
|
|
} |
928
|
|
|
// have we already fetched framed content? |
929
|
|
|
elseif(is_array($this->results)) |
930
|
|
|
$this->results[] = $results; |
931
|
|
|
// no framed content |
932
|
|
|
else |
933
|
|
|
$this->results = $results; |
934
|
|
|
|
935
|
|
|
return true; |
936
|
|
|
} |
937
|
|
|
|
938
|
|
|
/*======================================================================*\ |
939
|
|
|
Function: _httpsrequest |
940
|
|
|
Purpose: go get the https data from the server using curl |
941
|
|
|
Input: $url the url to fetch |
942
|
|
|
$URI the full URI |
943
|
|
|
$body body contents to send if any (POST) |
944
|
|
|
Output: |
945
|
|
|
\*======================================================================*/ |
946
|
|
|
|
947
|
|
|
function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") |
|
|
|
|
948
|
|
|
{ |
949
|
|
|
if($this->passcookies && $this->_redirectaddr) |
950
|
|
|
$this->setcookies(); |
951
|
|
|
|
952
|
|
|
$headers = array(); |
953
|
|
|
|
954
|
|
|
$URI_PARTS = parse_url($URI); |
955
|
|
|
if(empty($url)) |
956
|
|
|
$url = "/"; |
957
|
|
|
// GET ... header not needed for curl |
958
|
|
|
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
959
|
|
|
if(!empty($this->agent)) |
960
|
|
|
$headers[] = "User-Agent: ".$this->agent; |
961
|
|
|
if(!empty($this->host)) |
962
|
|
|
if(!empty($this->port)) |
963
|
|
|
$headers[] = "Host: ".$this->host.":".$this->port; |
964
|
|
|
else |
965
|
|
|
$headers[] = "Host: ".$this->host; |
966
|
|
|
if(!empty($this->accept)) |
967
|
|
|
$headers[] = "Accept: ".$this->accept; |
968
|
|
|
if(!empty($this->referer)) |
969
|
|
|
$headers[] = "Referer: ".$this->referer; |
970
|
|
|
if(!empty($this->cookies)) |
971
|
|
|
{ |
972
|
|
|
if(!is_array($this->cookies)) |
973
|
|
|
$this->cookies = (array)$this->cookies; |
974
|
|
|
|
975
|
|
|
reset($this->cookies); |
976
|
|
|
if ( count($this->cookies) > 0 ) { |
977
|
|
|
$cookie_str = 'Cookie: '; |
978
|
|
View Code Duplication |
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
979
|
|
|
$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; |
980
|
|
|
} |
981
|
|
|
$headers[] = substr($cookie_str,0,-2); |
982
|
|
|
} |
983
|
|
|
} |
984
|
|
View Code Duplication |
if(!empty($this->rawheaders)) |
985
|
|
|
{ |
986
|
|
|
if(!is_array($this->rawheaders)) |
987
|
|
|
$this->rawheaders = (array)$this->rawheaders; |
988
|
|
|
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
989
|
|
|
$headers[] = $headerKey.": ".$headerVal; |
990
|
|
|
} |
991
|
|
|
if(!empty($content_type)) { |
992
|
|
|
if ($content_type == "multipart/form-data") |
993
|
|
|
$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; |
994
|
|
|
else |
995
|
|
|
$headers[] = "Content-type: $content_type"; |
996
|
|
|
} |
997
|
|
|
if(!empty($body)) |
998
|
|
|
$headers[] = "Content-length: ".strlen($body); |
999
|
|
View Code Duplication |
if(!empty($this->user) || !empty($this->pass)) |
1000
|
|
|
$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); |
1001
|
|
|
|
1002
|
|
|
$headerfile = tempnam( $this->temp_dir, "sno" ); |
1003
|
|
|
$cmdline_params = '-k -D ' . escapeshellarg( $headerfile ); |
1004
|
|
|
|
1005
|
|
|
foreach ( $headers as $header ) { |
1006
|
|
|
$cmdline_params .= ' -H ' . escapeshellarg( $header ); |
1007
|
|
|
} |
1008
|
|
|
|
1009
|
|
|
if ( ! empty( $body ) ) { |
1010
|
|
|
$cmdline_params .= ' -d ' . escapeshellarg( $body ); |
1011
|
|
|
} |
1012
|
|
|
|
1013
|
|
|
if ( $this->read_timeout > 0 ) { |
1014
|
|
|
$cmdline_params .= ' -m ' . escapeshellarg( $this->read_timeout ); |
1015
|
|
|
} |
1016
|
|
|
|
1017
|
|
|
|
1018
|
|
|
exec( $this->curl_path . ' ' . $cmdline_params . ' ' . escapeshellarg( $URI ), $results, $return ); |
1019
|
|
|
|
1020
|
|
|
if($return) |
|
|
|
|
1021
|
|
|
{ |
1022
|
|
|
$this->error = "Error: cURL could not retrieve the document, error $return."; |
1023
|
|
|
return false; |
1024
|
|
|
} |
1025
|
|
|
|
1026
|
|
|
|
1027
|
|
|
$results = implode("\r\n",$results); |
1028
|
|
|
|
1029
|
|
|
$result_headers = file("$headerfile"); |
1030
|
|
|
|
1031
|
|
|
$this->_redirectaddr = false; |
1032
|
|
|
unset($this->headers); |
1033
|
|
|
|
1034
|
|
|
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) |
|
|
|
|
1035
|
|
|
{ |
1036
|
|
|
|
1037
|
|
|
// if a header begins with Location: or URI:, set the redirect |
1038
|
|
View Code Duplication |
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) |
1039
|
|
|
{ |
1040
|
|
|
// get URL portion of the redirect |
1041
|
|
|
preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches); |
1042
|
|
|
// look for :// in the Location header to see if hostname is included |
1043
|
|
|
if(!preg_match("|\:\/\/|",$matches[2])) |
1044
|
|
|
{ |
1045
|
|
|
// no host in the path, so prepend |
1046
|
|
|
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
|
|
|
|
1047
|
|
|
// eliminate double slash |
1048
|
|
|
if(!preg_match("|^/|",$matches[2])) |
1049
|
|
|
$this->_redirectaddr .= "/".$matches[2]; |
1050
|
|
|
else |
1051
|
|
|
$this->_redirectaddr .= $matches[2]; |
1052
|
|
|
} |
1053
|
|
|
else |
1054
|
|
|
$this->_redirectaddr = $matches[2]; |
|
|
|
|
1055
|
|
|
} |
1056
|
|
|
|
1057
|
|
|
if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) |
1058
|
|
|
$this->response_code = $result_headers[$currentHeader]; |
1059
|
|
|
|
1060
|
|
|
$this->headers[] = $result_headers[$currentHeader]; |
1061
|
|
|
} |
1062
|
|
|
|
1063
|
|
|
// check if there is a redirect meta tag |
1064
|
|
|
|
1065
|
|
|
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
1066
|
|
|
{ |
1067
|
|
|
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
1068
|
|
|
} |
1069
|
|
|
|
1070
|
|
|
// have we hit our frame depth and is there frame src to fetch? |
1071
|
|
View Code Duplication |
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
1072
|
|
|
{ |
1073
|
|
|
$this->results[] = $results; |
1074
|
|
|
for($x=0; $x<count($match[1]); $x++) |
|
|
|
|
1075
|
|
|
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
1076
|
|
|
} |
1077
|
|
|
// have we already fetched framed content? |
1078
|
|
|
elseif(is_array($this->results)) |
1079
|
|
|
$this->results[] = $results; |
1080
|
|
|
// no framed content |
1081
|
|
|
else |
1082
|
|
|
$this->results = $results; |
1083
|
|
|
|
1084
|
|
|
unlink("$headerfile"); |
1085
|
|
|
|
1086
|
|
|
return true; |
1087
|
|
|
} |
1088
|
|
|
|
1089
|
|
|
/*======================================================================*\ |
1090
|
|
|
Function: setcookies() |
1091
|
|
|
Purpose: set cookies for a redirection |
1092
|
|
|
\*======================================================================*/ |
1093
|
|
|
|
1094
|
|
|
function setcookies() |
1095
|
|
|
{ |
1096
|
|
|
for($x=0; $x<count($this->headers); $x++) |
|
|
|
|
1097
|
|
|
{ |
1098
|
|
|
if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match)) |
1099
|
|
|
$this->cookies[$match[1]] = urldecode($match[2]); |
1100
|
|
|
} |
1101
|
|
|
} |
1102
|
|
|
|
1103
|
|
|
|
1104
|
|
|
/*======================================================================*\ |
1105
|
|
|
Function: _check_timeout |
1106
|
|
|
Purpose: checks whether timeout has occurred |
1107
|
|
|
Input: $fp file pointer |
1108
|
|
|
\*======================================================================*/ |
1109
|
|
|
|
1110
|
|
|
function _check_timeout($fp) |
1111
|
|
|
{ |
1112
|
|
|
if ($this->read_timeout > 0) { |
1113
|
|
|
$fp_status = socket_get_status($fp); |
1114
|
|
|
if ($fp_status["timed_out"]) { |
1115
|
|
|
$this->timed_out = true; |
1116
|
|
|
return true; |
1117
|
|
|
} |
1118
|
|
|
} |
1119
|
|
|
return false; |
1120
|
|
|
} |
1121
|
|
|
|
1122
|
|
|
/*======================================================================*\ |
1123
|
|
|
Function: _connect |
1124
|
|
|
Purpose: make a socket connection |
1125
|
|
|
Input: $fp file pointer |
1126
|
|
|
\*======================================================================*/ |
1127
|
|
|
|
1128
|
|
|
function _connect(&$fp) |
1129
|
|
|
{ |
1130
|
|
|
if(!empty($this->proxy_host) && !empty($this->proxy_port)) |
1131
|
|
|
{ |
1132
|
|
|
$this->_isproxy = true; |
1133
|
|
|
|
1134
|
|
|
$host = $this->proxy_host; |
1135
|
|
|
$port = $this->proxy_port; |
1136
|
|
|
} |
1137
|
|
|
else |
1138
|
|
|
{ |
1139
|
|
|
$host = $this->host; |
1140
|
|
|
$port = $this->port; |
1141
|
|
|
} |
1142
|
|
|
|
1143
|
|
|
$this->status = 0; |
1144
|
|
|
|
1145
|
|
|
if($fp = fsockopen( |
1146
|
|
|
$host, |
1147
|
|
|
$port, |
1148
|
|
|
$errno, |
1149
|
|
|
$errstr, |
1150
|
|
|
$this->_fp_timeout |
1151
|
|
|
)) |
1152
|
|
|
{ |
1153
|
|
|
// socket connection succeeded |
1154
|
|
|
|
1155
|
|
|
return true; |
1156
|
|
|
} |
1157
|
|
|
else |
1158
|
|
|
{ |
1159
|
|
|
// socket connection failed |
1160
|
|
|
$this->status = $errno; |
1161
|
|
|
switch($errno) |
1162
|
|
|
{ |
1163
|
|
|
case -3: |
1164
|
|
|
$this->error="socket creation failed (-3)"; |
1165
|
|
|
case -4: |
1166
|
|
|
$this->error="dns lookup failure (-4)"; |
1167
|
|
|
case -5: |
1168
|
|
|
$this->error="connection refused or timed out (-5)"; |
1169
|
|
|
default: |
1170
|
|
|
$this->error="connection failed (".$errno.")"; |
1171
|
|
|
} |
1172
|
|
|
return false; |
1173
|
|
|
} |
1174
|
|
|
} |
1175
|
|
|
/*======================================================================*\ |
1176
|
|
|
Function: _disconnect |
1177
|
|
|
Purpose: disconnect a socket connection |
1178
|
|
|
Input: $fp file pointer |
1179
|
|
|
\*======================================================================*/ |
1180
|
|
|
|
1181
|
|
|
function _disconnect($fp) |
1182
|
|
|
{ |
1183
|
|
|
return(fclose($fp)); |
1184
|
|
|
} |
1185
|
|
|
|
1186
|
|
|
|
1187
|
|
|
/*======================================================================*\ |
1188
|
|
|
Function: _prepare_post_body |
1189
|
|
|
Purpose: Prepare post body according to encoding type |
1190
|
|
|
Input: $formvars - form variables |
1191
|
|
|
$formfiles - form upload files |
1192
|
|
|
Output: post body |
1193
|
|
|
\*======================================================================*/ |
1194
|
|
|
|
1195
|
|
|
function _prepare_post_body($formvars, $formfiles) |
1196
|
|
|
{ |
1197
|
|
|
settype($formvars, "array"); |
1198
|
|
|
settype($formfiles, "array"); |
1199
|
|
|
$postdata = ''; |
1200
|
|
|
|
1201
|
|
|
if (count($formvars) == 0 && count($formfiles) == 0) |
1202
|
|
|
return; |
1203
|
|
|
|
1204
|
|
|
switch ($this->_submit_type) { |
1205
|
|
|
case "application/x-www-form-urlencoded": |
1206
|
|
|
reset($formvars); |
1207
|
|
|
while(list($key,$val) = each($formvars)) { |
1208
|
|
|
if (is_array($val) || is_object($val)) { |
1209
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
|
|
|
|
1210
|
|
|
$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; |
1211
|
|
|
} |
1212
|
|
|
} else |
1213
|
|
|
$postdata .= urlencode($key)."=".urlencode($val)."&"; |
1214
|
|
|
} |
1215
|
|
|
break; |
1216
|
|
|
|
1217
|
|
|
case "multipart/form-data": |
1218
|
|
|
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); |
1219
|
|
|
|
1220
|
|
|
reset($formvars); |
1221
|
|
|
while(list($key,$val) = each($formvars)) { |
1222
|
|
|
if (is_array($val) || is_object($val)) { |
1223
|
|
|
while (list($cur_key, $cur_val) = each($val)) { |
|
|
|
|
1224
|
|
|
$postdata .= "--".$this->_mime_boundary."\r\n"; |
1225
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
1226
|
|
|
$postdata .= "$cur_val\r\n"; |
1227
|
|
|
} |
1228
|
|
|
} else { |
1229
|
|
|
$postdata .= "--".$this->_mime_boundary."\r\n"; |
1230
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
1231
|
|
|
$postdata .= "$val\r\n"; |
1232
|
|
|
} |
1233
|
|
|
} |
1234
|
|
|
|
1235
|
|
|
reset($formfiles); |
1236
|
|
|
while (list($field_name, $file_names) = each($formfiles)) { |
1237
|
|
|
settype($file_names, "array"); |
1238
|
|
|
while (list(, $file_name) = each($file_names)) { |
1239
|
|
|
if (!is_readable($file_name)) continue; |
1240
|
|
|
|
1241
|
|
|
$fp = fopen($file_name, "r"); |
1242
|
|
|
$file_content = fread($fp, filesize($file_name)); |
1243
|
|
|
fclose($fp); |
1244
|
|
|
$base_name = basename($file_name); |
1245
|
|
|
|
1246
|
|
|
$postdata .= "--".$this->_mime_boundary."\r\n"; |
1247
|
|
|
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
1248
|
|
|
$postdata .= "$file_content\r\n"; |
1249
|
|
|
} |
1250
|
|
|
} |
1251
|
|
|
$postdata .= "--".$this->_mime_boundary."--\r\n"; |
1252
|
|
|
break; |
1253
|
|
|
} |
1254
|
|
|
|
1255
|
|
|
return $postdata; |
1256
|
|
|
} |
1257
|
|
|
} |
1258
|
|
|
endif; |
1259
|
|
|
?> |
|
|
|
|
1260
|
|
|
|
This check looks for assignments to scalar types that may be of the wrong type.
To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.