Completed
Pull Request — master (#2)
by Stephen
09:27
created

Snoopy::fetch()   F

Complexity

Conditions 33
Paths 3536

Size

Total Lines 138
Code Lines 77

Duplication

Lines 84
Ratio 60.87 %

Importance

Changes 0
Metric Value
cc 33
eloc 77
nc 3536
nop 1
dl 84
loc 138
rs 2
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Deprecated. Use WP_HTTP (http.php) instead.
5
 */
6
_deprecated_file( basename( __FILE__ ), '3.0.0', WPINC . '/http.php' );
7
8
if ( ! class_exists( 'Snoopy', false ) ) :
9
/*************************************************
10
11
Snoopy - the PHP net client
12
Author: Monte Ohrt <[email protected]>
13
Copyright (c): 1999-2008 New Digital Group, all rights reserved
14
Version: 1.2.4
15
16
 * This library is free software; you can redistribute it and/or
17
 * modify it under the terms of the GNU Lesser General Public
18
 * License as published by the Free Software Foundation; either
19
 * version 2.1 of the License, or (at your option) any later version.
20
 *
21
 * This library is distributed in the hope that it will be useful,
22
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
 * Lesser General Public License for more details.
25
 *
26
 * You should have received a copy of the GNU Lesser General Public
27
 * License along with this library; if not, write to the Free Software
28
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
29
30
You may contact the author of Snoopy by e-mail at:
31
[email protected]
32
33
The latest version of Snoopy can be obtained from:
34
http://snoopy.sourceforge.net/
35
36
*************************************************/
37
38
class Snoopy
39
{
40
	/**** Public variables ****/
41
42
	/* user definable vars */
43
44
	var $host			=	"www.php.net";		// host name we are connecting to
45
	var $port			=	80;					// port we are connecting to
46
	var $proxy_host		=	"";					// proxy host to use
47
	var $proxy_port		=	"";					// proxy port to use
48
	var $proxy_user		=	"";					// proxy user to use
49
	var $proxy_pass		=	"";					// proxy password to use
50
51
	var $agent			=	"Snoopy v1.2.4";	// agent we masquerade as
52
	var	$referer		=	"";					// referer info to pass
53
	var $cookies		=	array();			// array of cookies to pass
54
												// $cookies["username"]="joe";
55
	var	$rawheaders		=	array();			// array of raw headers to send
56
												// $rawheaders["Content-type"]="text/html";
57
58
	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
59
	var $lastredirectaddr	=	"";				// contains address of last redirected address
60
	var	$offsiteok		=	true;				// allows redirection off-site
61
	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
62
	var $expandlinks	=	true;				// expand links to fully qualified URLs.
63
												// this only applies to fetchlinks()
64
												// submitlinks(), and submittext()
65
	var $passcookies	=	true;				// pass set cookies back through redirects
66
												// NOTE: this currently does not respect
67
												// dates, domains or paths.
68
69
	var	$user			=	"";					// user for http authentication
70
	var	$pass			=	"";					// password for http authentication
71
72
	// http accept types
73
	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
74
75
	var $results		=	"";					// where the content is put
76
77
	var $error			=	"";					// error messages sent here
78
	var	$response_code	=	"";					// response code returned from server
79
	var	$headers		=	array();			// headers returned from server sent here
80
	var	$maxlength		=	500000;				// max return data length (body)
81
	var $read_timeout	=	0;					// timeout on read operations, in seconds
82
												// supported only since PHP 4 Beta 4
83
												// set to 0 to disallow timeouts
84
	var $timed_out		=	false;				// if a read operation timed out
85
	var	$status			=	0;					// http request status
86
87
	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
88
												// has permission to write to.
89
												// under Windows, this should be C:\temp
90
91
	var	$curl_path		=	"/usr/local/bin/curl";
92
												// Snoopy will use cURL for fetching
93
												// SSL content if a full system path to
94
												// the cURL binary is supplied here.
95
												// set to false if you do not have
96
												// cURL installed. See http://curl.haxx.se
97
												// for details on installing cURL.
98
												// Snoopy does *not* use the cURL
99
												// library functions built into php,
100
												// as these functions are not stable
101
												// as of this Snoopy release.
102
103
	/**** Private variables ****/
104
105
	var	$_maxlinelen	=	4096;				// max line length (headers)
106
107
	var $_httpmethod	=	"GET";				// default http request method
108
	var $_httpversion	=	"HTTP/1.0";			// default http request version
109
	var $_submit_method	=	"POST";				// default submit method
110
	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
111
	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
112
	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
113
	var $_redirectdepth	=	0;					// increments on an http redirect
114
	var $_frameurls		= 	array();			// frame src urls
115
	var $_framedepth	=	0;					// increments on frame depth
116
117
	var $_isproxy		=	false;				// set if using a proxy server
118
	var $_fp_timeout	=	30;					// timeout for socket connection
119
120
/*======================================================================*\
121
	Function:	fetch
122
	Purpose:	fetch the contents of a web page
123
				(and possibly other protocols in the
124
				future like ftp, nntp, gopher, etc.)
125
	Input:		$URI	the location of the page to fetch
126
	Output:		$this->results	the output text from the fetch
127
\*======================================================================*/
128
129
	function fetch($URI)
130
	{
131
132
		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
133
		$URI_PARTS = parse_url($URI);
134
		if (!empty($URI_PARTS["user"]))
135
			$this->user = $URI_PARTS["user"];
136
		if (!empty($URI_PARTS["pass"]))
137
			$this->pass = $URI_PARTS["pass"];
138
		if (empty($URI_PARTS["query"]))
139
			$URI_PARTS["query"] = '';
140
		if (empty($URI_PARTS["path"]))
141
			$URI_PARTS["path"] = '';
142
143
		switch(strtolower($URI_PARTS["scheme"]))
144
		{
145
			case "http":
146
				$this->host = $URI_PARTS["host"];
147
				if(!empty($URI_PARTS["port"]))
148
					$this->port = $URI_PARTS["port"];
149
				if($this->_connect($fp))
150
				{
151
					if($this->_isproxy)
152
					{
153
						// using proxy, send entire URI
154
						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
155
					}
156
					else
157
					{
158
						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
159
						// no proxy, send only the path
160
						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
161
					}
162
163
					$this->_disconnect($fp);
164
165
					if($this->_redirectaddr)
166
					{
167
						/* url was redirected, check if we've hit the max depth */
168
						if($this->maxredirs > $this->_redirectdepth)
169
						{
170
							// only follow redirect if it's on this site, or offsiteok is true
171
							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
172
							{
173
								/* follow the redirect */
174
								$this->_redirectdepth++;
175
								$this->lastredirectaddr=$this->_redirectaddr;
176
								$this->fetch($this->_redirectaddr);
177
							}
178
						}
179
					}
180
181
					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
182
					{
183
						$frameurls = $this->_frameurls;
184
						$this->_frameurls = array();
185
186
						while(list(,$frameurl) = each($frameurls))
187
						{
188
							if($this->_framedepth < $this->maxframes)
189
							{
190
								$this->fetch($frameurl);
191
								$this->_framedepth++;
192
							}
193
							else
194
								break;
195
						}
196
					}
197
				}
198
				else
199
				{
200
					return false;
201
				}
202
				return true;
203
				break;
204
			case "https":
205
				if(!$this->curl_path)
206
					return false;
207
				if(function_exists("is_executable"))
208
				    if (!is_executable($this->curl_path))
209
				        return false;
210
				$this->host = $URI_PARTS["host"];
211
				if(!empty($URI_PARTS["port"]))
212
					$this->port = $URI_PARTS["port"];
213
				if($this->_isproxy)
214
				{
215
					// using proxy, send entire URI
216
					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
217
				}
218
				else
219
				{
220
					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
221
					// no proxy, send only the path
222
					$this->_httpsrequest($path, $URI, $this->_httpmethod);
223
				}
224
225
				if($this->_redirectaddr)
226
				{
227
					/* url was redirected, check if we've hit the max depth */
228
					if($this->maxredirs > $this->_redirectdepth)
229
					{
230
						// only follow redirect if it's on this site, or offsiteok is true
231
						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
232
						{
233
							/* follow the redirect */
234
							$this->_redirectdepth++;
235
							$this->lastredirectaddr=$this->_redirectaddr;
236
							$this->fetch($this->_redirectaddr);
237
						}
238
					}
239
				}
240
241
				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
242
				{
243
					$frameurls = $this->_frameurls;
244
					$this->_frameurls = array();
245
246
					while(list(,$frameurl) = each($frameurls))
247
					{
248
						if($this->_framedepth < $this->maxframes)
249
						{
250
							$this->fetch($frameurl);
251
							$this->_framedepth++;
252
						}
253
						else
254
							break;
255
					}
256
				}
257
				return true;
258
				break;
259
			default:
260
				// not a valid protocol
261
				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
262
				return false;
263
				break;
264
		}
265
		return true;
266
	}
267
268
/*======================================================================*\
269
	Function:	submit
270
	Purpose:	submit an http form
271
	Input:		$URI	the location to post the data
272
				$formvars	the formvars to use.
273
					format: $formvars["var"] = "val";
274
				$formfiles  an array of files to submit
275
					format: $formfiles["var"] = "/dir/filename.ext";
276
	Output:		$this->results	the text output from the post
277
\*======================================================================*/
278
279
	function submit($URI, $formvars="", $formfiles="")
280
	{
281
		unset($postdata);
282
283
		$postdata = $this->_prepare_post_body($formvars, $formfiles);
284
285
		$URI_PARTS = parse_url($URI);
286
		if (!empty($URI_PARTS["user"]))
287
			$this->user = $URI_PARTS["user"];
288
		if (!empty($URI_PARTS["pass"]))
289
			$this->pass = $URI_PARTS["pass"];
290
		if (empty($URI_PARTS["query"]))
291
			$URI_PARTS["query"] = '';
292
		if (empty($URI_PARTS["path"]))
293
			$URI_PARTS["path"] = '';
294
295
		switch(strtolower($URI_PARTS["scheme"]))
296
		{
297
			case "http":
298
				$this->host = $URI_PARTS["host"];
299
				if(!empty($URI_PARTS["port"]))
300
					$this->port = $URI_PARTS["port"];
301
				if($this->_connect($fp))
302
				{
303
					if($this->_isproxy)
304
					{
305
						// using proxy, send entire URI
306
						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
307
					}
308
					else
309
					{
310
						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
311
						// no proxy, send only the path
312
						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
313
					}
314
315
					$this->_disconnect($fp);
316
317
					if($this->_redirectaddr)
318
					{
319
						/* url was redirected, check if we've hit the max depth */
320
						if($this->maxredirs > $this->_redirectdepth)
321
						{
322
							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
323
								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
324
325
							// only follow redirect if it's on this site, or offsiteok is true
326
							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
327
							{
328
								/* follow the redirect */
329
								$this->_redirectdepth++;
330
								$this->lastredirectaddr=$this->_redirectaddr;
331
								if( strpos( $this->_redirectaddr, "?" ) > 0 )
332
									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
333
								else
334
									$this->submit($this->_redirectaddr,$formvars, $formfiles);
335
							}
336
						}
337
					}
338
339
					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
340
					{
341
						$frameurls = $this->_frameurls;
342
						$this->_frameurls = array();
343
344
						while(list(,$frameurl) = each($frameurls))
345
						{
346
							if($this->_framedepth < $this->maxframes)
347
							{
348
								$this->fetch($frameurl);
349
								$this->_framedepth++;
350
							}
351
							else
352
								break;
353
						}
354
					}
355
356
				}
357
				else
358
				{
359
					return false;
360
				}
361
				return true;
362
				break;
363
			case "https":
364
				if(!$this->curl_path)
365
					return false;
366
				if(function_exists("is_executable"))
367
				    if (!is_executable($this->curl_path))
368
				        return false;
369
				$this->host = $URI_PARTS["host"];
370
				if(!empty($URI_PARTS["port"]))
371
					$this->port = $URI_PARTS["port"];
372
				if($this->_isproxy)
373
				{
374
					// using proxy, send entire URI
375
					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376
				}
377
				else
378
				{
379
					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
380
					// no proxy, send only the path
381
					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
382
				}
383
384
				if($this->_redirectaddr)
385
				{
386
					/* url was redirected, check if we've hit the max depth */
387
					if($this->maxredirs > $this->_redirectdepth)
388
					{
389
						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
390
							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
391
392
						// only follow redirect if it's on this site, or offsiteok is true
393
						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
394
						{
395
							/* follow the redirect */
396
							$this->_redirectdepth++;
397
							$this->lastredirectaddr=$this->_redirectaddr;
398
							if( strpos( $this->_redirectaddr, "?" ) > 0 )
399
								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
400
							else
401
								$this->submit($this->_redirectaddr,$formvars, $formfiles);
402
						}
403
					}
404
				}
405
406
				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
407
				{
408
					$frameurls = $this->_frameurls;
409
					$this->_frameurls = array();
410
411
					while(list(,$frameurl) = each($frameurls))
412
					{
413
						if($this->_framedepth < $this->maxframes)
414
						{
415
							$this->fetch($frameurl);
416
							$this->_framedepth++;
417
						}
418
						else
419
							break;
420
					}
421
				}
422
				return true;
423
				break;
424
425
			default:
426
				// not a valid protocol
427
				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
428
				return false;
429
				break;
430
		}
431
		return true;
432
	}
433
434
/*======================================================================*\
435
	Function:	fetchlinks
436
	Purpose:	fetch the links from a web page
437
	Input:		$URI	where you are fetching from
438
	Output:		$this->results	an array of the URLs
439
\*======================================================================*/
440
441
	function fetchlinks($URI)
442
	{
443
		if ($this->fetch($URI))
444
		{
445
			if($this->lastredirectaddr)
446
				$URI = $this->lastredirectaddr;
447
			if(is_array($this->results))
448
			{
449
				for($x=0;$x<count($this->results);$x++)
450
					$this->results[$x] = $this->_striplinks($this->results[$x]);
451
			}
452
			else
453
				$this->results = $this->_striplinks($this->results);
454
455
			if($this->expandlinks)
456
				$this->results = $this->_expandlinks($this->results, $URI);
457
			return true;
458
		}
459
		else
460
			return false;
461
	}
462
463
/*======================================================================*\
464
	Function:	fetchform
465
	Purpose:	fetch the form elements from a web page
466
	Input:		$URI	where you are fetching from
467
	Output:		$this->results	the resulting html form
468
\*======================================================================*/
469
470
	function fetchform($URI)
471
	{
472
473
		if ($this->fetch($URI))
474
		{
475
476
			if(is_array($this->results))
477
			{
478
				for($x=0;$x<count($this->results);$x++)
479
					$this->results[$x] = $this->_stripform($this->results[$x]);
480
			}
481
			else
482
				$this->results = $this->_stripform($this->results);
483
484
			return true;
485
		}
486
		else
487
			return false;
488
	}
489
490
491
/*======================================================================*\
492
	Function:	fetchtext
493
	Purpose:	fetch the text from a web page, stripping the links
494
	Input:		$URI	where you are fetching from
495
	Output:		$this->results	the text from the web page
496
\*======================================================================*/
497
498
	function fetchtext($URI)
499
	{
500
		if($this->fetch($URI))
501
		{
502
			if(is_array($this->results))
503
			{
504
				for($x=0;$x<count($this->results);$x++)
505
					$this->results[$x] = $this->_striptext($this->results[$x]);
506
			}
507
			else
508
				$this->results = $this->_striptext($this->results);
509
			return true;
510
		}
511
		else
512
			return false;
513
	}
514
515
/*======================================================================*\
516
	Function:	submitlinks
517
	Purpose:	grab links from a form submission
518
	Input:		$URI	where you are submitting from
519
	Output:		$this->results	an array of the links from the post
520
\*======================================================================*/
521
522
	function submitlinks($URI, $formvars="", $formfiles="")
523
	{
524
		if($this->submit($URI,$formvars, $formfiles))
525
		{
526
			if($this->lastredirectaddr)
527
				$URI = $this->lastredirectaddr;
528
			if(is_array($this->results))
529
			{
530
				for($x=0;$x<count($this->results);$x++)
531
				{
532
					$this->results[$x] = $this->_striplinks($this->results[$x]);
533
					if($this->expandlinks)
534
						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
535
				}
536
			}
537
			else
538
			{
539
				$this->results = $this->_striplinks($this->results);
540
				if($this->expandlinks)
541
					$this->results = $this->_expandlinks($this->results,$URI);
542
			}
543
			return true;
544
		}
545
		else
546
			return false;
547
	}
548
549
/*======================================================================*\
550
	Function:	submittext
551
	Purpose:	grab text from a form submission
552
	Input:		$URI	where you are submitting from
553
	Output:		$this->results	the text from the web page
554
\*======================================================================*/
555
556
	function submittext($URI, $formvars = "", $formfiles = "")
557
	{
558
		if($this->submit($URI,$formvars, $formfiles))
559
		{
560
			if($this->lastredirectaddr)
561
				$URI = $this->lastredirectaddr;
562
			if(is_array($this->results))
563
			{
564
				for($x=0;$x<count($this->results);$x++)
565
				{
566
					$this->results[$x] = $this->_striptext($this->results[$x]);
567
					if($this->expandlinks)
568
						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
569
				}
570
			}
571
			else
572
			{
573
				$this->results = $this->_striptext($this->results);
574
				if($this->expandlinks)
575
					$this->results = $this->_expandlinks($this->results,$URI);
576
			}
577
			return true;
578
		}
579
		else
580
			return false;
581
	}
582
583
584
585
/*======================================================================*\
586
	Function:	set_submit_multipart
587
	Purpose:	Set the form submission content type to
588
				multipart/form-data
589
\*======================================================================*/
590
	function set_submit_multipart()
591
	{
592
		$this->_submit_type = "multipart/form-data";
593
	}
594
595
596
/*======================================================================*\
597
	Function:	set_submit_normal
598
	Purpose:	Set the form submission content type to
599
				application/x-www-form-urlencoded
600
\*======================================================================*/
601
	function set_submit_normal()
602
	{
603
		$this->_submit_type = "application/x-www-form-urlencoded";
604
	}
605
606
607
608
609
/*======================================================================*\
610
	Private functions
611
\*======================================================================*/
612
613
614
/*======================================================================*\
615
	Function:	_striplinks
616
	Purpose:	strip the hyperlinks from an html document
617
	Input:		$document	document to strip.
618
	Output:		$match		an array of the links
619
\*======================================================================*/
620
621
	function _striplinks($document)
622
	{
623
		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
624
						([\"\'])?					# find single or double quote
625
						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
626
													# quote, otherwise match up to next space
627
						'isx",$document,$links);
628
629
630
		// catenate the non-empty matches from the conditional subpattern
631
632
		while(list($key,$val) = each($links[2]))
633
		{
634
			if(!empty($val))
635
				$match[] = $val;
636
		}
637
638
		while(list($key,$val) = each($links[3]))
639
		{
640
			if(!empty($val))
641
				$match[] = $val;
642
		}
643
644
		// return the links
645
		return $match;
646
	}
647
648
/*======================================================================*\
649
	Function:	_stripform
650
	Purpose:	strip the form elements from an html document
651
	Input:		$document	document to strip.
652
	Output:		$match		an array of the links
653
\*======================================================================*/
654
655
	function _stripform($document)
656
	{
657
		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
658
659
		// catenate the matches
660
		$match = implode("\r\n",$elements[0]);
661
662
		// return the links
663
		return $match;
664
	}
665
666
667
668
/*======================================================================*\
669
	Function:	_striptext
670
	Purpose:	strip the text from an html document
671
	Input:		$document	document to strip.
672
	Output:		$text		the resulting text
673
\*======================================================================*/
674
675
	function _striptext($document)
676
	{
677
678
		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
679
		// so, list your entities one by one here. I included some of the
680
		// more common ones.
681
682
		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
683
						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
684
						"'([\r\n])[\s]+'",					// strip out white space
685
						"'&(quot|#34|#034|#x22);'i",		// replace html entities
686
						"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
687
						"'&(lt|#60|#060|#x3c);'i",
688
						"'&(gt|#62|#062|#x3e);'i",
689
						"'&(nbsp|#160|#xa0);'i",
690
						"'&(iexcl|#161);'i",
691
						"'&(cent|#162);'i",
692
						"'&(pound|#163);'i",
693
						"'&(copy|#169);'i",
694
						"'&(reg|#174);'i",
695
						"'&(deg|#176);'i",
696
						"'&(#39|#039|#x27);'",
697
						"'&(euro|#8364);'i",				// europe
698
						"'&a(uml|UML);'",					// german
699
						"'&o(uml|UML);'",
700
						"'&u(uml|UML);'",
701
						"'&A(uml|UML);'",
702
						"'&O(uml|UML);'",
703
						"'&U(uml|UML);'",
704
						"'&szlig;'i",
705
						);
706
		$replace = array(	"",
707
							"",
708
							"\\1",
709
							"\"",
710
							"&",
711
							"<",
712
							">",
713
							" ",
714
							chr(161),
715
							chr(162),
716
							chr(163),
717
							chr(169),
718
							chr(174),
719
							chr(176),
720
							chr(39),
721
							chr(128),
722
							chr(0xE4), // ANSI &auml;
723
							chr(0xF6), // ANSI &ouml;
724
							chr(0xFC), // ANSI &uuml;
725
							chr(0xC4), // ANSI &Auml;
726
							chr(0xD6), // ANSI &Ouml;
727
							chr(0xDC), // ANSI &Uuml;
728
							chr(0xDF), // ANSI &szlig;
729
						);
730
731
		$text = preg_replace($search,$replace,$document);
732
733
		return $text;
734
	}
735
736
/*======================================================================*\
737
	Function:	_expandlinks
738
	Purpose:	expand each link into a fully qualified URL
739
	Input:		$links			the links to qualify
740
				$URI			the full URI to get the base from
741
	Output:		$expandedLinks	the expanded links
742
\*======================================================================*/
743
744
	function _expandlinks($links,$URI)
745
	{
746
747
		preg_match("/^[^\?]+/",$URI,$match);
748
749
		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
750
		$match = preg_replace("|/$|","",$match);
751
		$match_part = parse_url($match);
752
		$match_root =
753
		$match_part["scheme"]."://".$match_part["host"];
754
755
		$search = array( 	"|^http://".preg_quote($this->host)."|i",
756
							"|^(\/)|i",
757
							"|^(?!http://)(?!mailto:)|i",
758
							"|/\./|",
759
							"|/[^\/]+/\.\./|"
760
						);
761
762
		$replace = array(	"",
763
							$match_root."/",
764
							$match."/",
765
							"/",
766
							"/"
767
						);
768
769
		$expandedLinks = preg_replace($search,$replace,$links);
770
771
		return $expandedLinks;
772
	}
773
774
/*======================================================================*\
775
	Function:	_httprequest
776
	Purpose:	go get the http data from the server
777
	Input:		$url		the url to fetch
778
				$fp			the current open file pointer
779
				$URI		the full URI
780
				$body		body contents to send if any (POST)
781
	Output:
782
\*======================================================================*/
783
784
	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
785
	{
786
		$cookie_headers = '';
787
		if($this->passcookies && $this->_redirectaddr)
788
			$this->setcookies();
789
790
		$URI_PARTS = parse_url($URI);
791
		if(empty($url))
792
			$url = "/";
793
		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
794
		if(!empty($this->agent))
795
			$headers .= "User-Agent: ".$this->agent."\r\n";
796
		if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
797
			$headers .= "Host: ".$this->host;
798
			if(!empty($this->port) && $this->port != 80)
799
				$headers .= ":".$this->port;
800
			$headers .= "\r\n";
801
		}
802
		if(!empty($this->accept))
803
			$headers .= "Accept: ".$this->accept."\r\n";
804
		if(!empty($this->referer))
805
			$headers .= "Referer: ".$this->referer."\r\n";
806
		if(!empty($this->cookies))
807
		{
808
			if(!is_array($this->cookies))
809
				$this->cookies = (array)$this->cookies;
810
811
			reset($this->cookies);
812
			if ( count($this->cookies) > 0 ) {
813
				$cookie_headers .= 'Cookie: ';
814
				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
815
				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
816
				}
817
				$headers .= substr($cookie_headers,0,-2) . "\r\n";
818
			}
819
		}
820
		if(!empty($this->rawheaders))
821
		{
822
			if(!is_array($this->rawheaders))
823
				$this->rawheaders = (array)$this->rawheaders;
824
			while(list($headerKey,$headerVal) = each($this->rawheaders))
825
				$headers .= $headerKey.": ".$headerVal."\r\n";
826
		}
827
		if(!empty($content_type)) {
828
			$headers .= "Content-type: $content_type";
829
			if ($content_type == "multipart/form-data")
830
				$headers .= "; boundary=".$this->_mime_boundary;
831
			$headers .= "\r\n";
832
		}
833
		if(!empty($body))
834
			$headers .= "Content-length: ".strlen($body)."\r\n";
835
		if(!empty($this->user) || !empty($this->pass))
836
			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
837
838
		//add proxy auth headers
839
		if(!empty($this->proxy_user))
840
			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
841
842
843
		$headers .= "\r\n";
844
845
		// set the read timeout if needed
846
		if ($this->read_timeout > 0)
847
			socket_set_timeout($fp, $this->read_timeout);
848
		$this->timed_out = false;
849
850
		fwrite($fp,$headers.$body,strlen($headers.$body));
851
852
		$this->_redirectaddr = false;
853
		unset($this->headers);
854
855
		while($currentHeader = fgets($fp,$this->_maxlinelen))
856
		{
857
			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
858
			{
859
				$this->status=-100;
860
				return false;
861
			}
862
863
			if($currentHeader == "\r\n")
864
				break;
865
866
			// if a header begins with Location: or URI:, set the redirect
867
			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
868
			{
869
				// get URL portion of the redirect
870
				preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
871
				// look for :// in the Location header to see if hostname is included
872
				if(!preg_match("|\:\/\/|",$matches[2]))
873
				{
874
					// no host in the path, so prepend
875
					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
876
					// eliminate double slash
877
					if(!preg_match("|^/|",$matches[2]))
878
							$this->_redirectaddr .= "/".$matches[2];
879
					else
880
							$this->_redirectaddr .= $matches[2];
881
				}
882
				else
883
					$this->_redirectaddr = $matches[2];
884
			}
885
886
			if(preg_match("|^HTTP/|",$currentHeader))
887
			{
888
                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
889
				{
890
					$this->status= $status[1];
891
                }
892
				$this->response_code = $currentHeader;
893
			}
894
895
			$this->headers[] = $currentHeader;
896
		}
897
898
		$results = '';
899
		do {
900
    		$_data = fread($fp, $this->maxlength);
901
    		if (strlen($_data) == 0) {
902
        		break;
903
    		}
904
    		$results .= $_data;
905
		} while(true);
906
907
		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
908
		{
909
			$this->status=-100;
910
			return false;
911
		}
912
913
		// check if there is a redirect meta tag
914
915
		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
916
917
		{
918
			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
919
		}
920
921
		// have we hit our frame depth and is there frame src to fetch?
922
		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
923
		{
924
			$this->results[] = $results;
925
			for($x=0; $x<count($match[1]); $x++)
926
				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
927
		}
928
		// have we already fetched framed content?
929
		elseif(is_array($this->results))
930
			$this->results[] = $results;
931
		// no framed content
932
		else
933
			$this->results = $results;
934
935
		return true;
936
	}
937
938
/*======================================================================*\
939
	Function:	_httpsrequest
940
	Purpose:	go get the https data from the server using curl
941
	Input:		$url		the url to fetch
942
				$URI		the full URI
943
				$body		body contents to send if any (POST)
944
	Output:
945
\*======================================================================*/
946
947
	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
948
	{
949
		if($this->passcookies && $this->_redirectaddr)
950
			$this->setcookies();
951
952
		$headers = array();
953
954
		$URI_PARTS = parse_url($URI);
955
		if(empty($url))
956
			$url = "/";
957
		// GET ... header not needed for curl
958
		//$headers[] = $http_method." ".$url." ".$this->_httpversion;
959
		if(!empty($this->agent))
960
			$headers[] = "User-Agent: ".$this->agent;
961
		if(!empty($this->host))
962
			if(!empty($this->port))
963
				$headers[] = "Host: ".$this->host.":".$this->port;
964
			else
965
				$headers[] = "Host: ".$this->host;
966
		if(!empty($this->accept))
967
			$headers[] = "Accept: ".$this->accept;
968
		if(!empty($this->referer))
969
			$headers[] = "Referer: ".$this->referer;
970
		if(!empty($this->cookies))
971
		{
972
			if(!is_array($this->cookies))
973
				$this->cookies = (array)$this->cookies;
974
975
			reset($this->cookies);
976
			if ( count($this->cookies) > 0 ) {
977
				$cookie_str = 'Cookie: ';
978
				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
979
				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
980
				}
981
				$headers[] = substr($cookie_str,0,-2);
982
			}
983
		}
984
		if(!empty($this->rawheaders))
985
		{
986
			if(!is_array($this->rawheaders))
987
				$this->rawheaders = (array)$this->rawheaders;
988
			while(list($headerKey,$headerVal) = each($this->rawheaders))
989
				$headers[] = $headerKey.": ".$headerVal;
990
		}
991
		if(!empty($content_type)) {
992
			if ($content_type == "multipart/form-data")
993
				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
994
			else
995
				$headers[] = "Content-type: $content_type";
996
		}
997
		if(!empty($body))
998
			$headers[] = "Content-length: ".strlen($body);
999
		if(!empty($this->user) || !empty($this->pass))
1000
			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1001
1002
		$headerfile = tempnam( $this->temp_dir, "sno" );
1003
		$cmdline_params = '-k -D ' . escapeshellarg( $headerfile );
1004
1005
		foreach ( $headers as $header ) {
1006
			$cmdline_params .= ' -H ' . escapeshellarg( $header );
1007
		}
1008
1009
		if ( ! empty( $body ) ) {
1010
			$cmdline_params .= ' -d ' . escapeshellarg( $body );
1011
		}
1012
1013
		if ( $this->read_timeout > 0 ) {
1014
			$cmdline_params .= ' -m ' . escapeshellarg( $this->read_timeout );
1015
		}
1016
1017
1018
		exec( $this->curl_path . ' ' . $cmdline_params . ' ' . escapeshellarg( $URI ), $results, $return );
1019
1020
		if($return)
1021
		{
1022
			$this->error = "Error: cURL could not retrieve the document, error $return.";
1023
			return false;
1024
		}
1025
1026
1027
		$results = implode("\r\n",$results);
1028
1029
		$result_headers = file("$headerfile");
1030
1031
		$this->_redirectaddr = false;
1032
		unset($this->headers);
1033
1034
		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1035
		{
1036
1037
			// if a header begins with Location: or URI:, set the redirect
1038
			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1039
			{
1040
				// get URL portion of the redirect
1041
				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1042
				// look for :// in the Location header to see if hostname is included
1043
				if(!preg_match("|\:\/\/|",$matches[2]))
1044
				{
1045
					// no host in the path, so prepend
1046
					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1047
					// eliminate double slash
1048
					if(!preg_match("|^/|",$matches[2]))
1049
							$this->_redirectaddr .= "/".$matches[2];
1050
					else
1051
							$this->_redirectaddr .= $matches[2];
1052
				}
1053
				else
1054
					$this->_redirectaddr = $matches[2];
1055
			}
1056
1057
			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1058
				$this->response_code = $result_headers[$currentHeader];
1059
1060
			$this->headers[] = $result_headers[$currentHeader];
1061
		}
1062
1063
		// check if there is a redirect meta tag
1064
1065
		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1066
		{
1067
			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1068
		}
1069
1070
		// have we hit our frame depth and is there frame src to fetch?
1071
		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1072
		{
1073
			$this->results[] = $results;
1074
			for($x=0; $x<count($match[1]); $x++)
1075
				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1076
		}
1077
		// have we already fetched framed content?
1078
		elseif(is_array($this->results))
1079
			$this->results[] = $results;
1080
		// no framed content
1081
		else
1082
			$this->results = $results;
1083
1084
		unlink("$headerfile");
1085
1086
		return true;
1087
	}
1088
1089
/*======================================================================*\
1090
	Function:	setcookies()
1091
	Purpose:	set cookies for a redirection
1092
\*======================================================================*/
1093
1094
	function setcookies()
1095
	{
1096
		for($x=0; $x<count($this->headers); $x++)
1097
		{
1098
		if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1099
			$this->cookies[$match[1]] = urldecode($match[2]);
1100
		}
1101
	}
1102
1103
1104
/*======================================================================*\
1105
	Function:	_check_timeout
1106
	Purpose:	checks whether timeout has occurred
1107
	Input:		$fp	file pointer
1108
\*======================================================================*/
1109
1110
	function _check_timeout($fp)
1111
	{
1112
		if ($this->read_timeout > 0) {
1113
			$fp_status = socket_get_status($fp);
1114
			if ($fp_status["timed_out"]) {
1115
				$this->timed_out = true;
1116
				return true;
1117
			}
1118
		}
1119
		return false;
1120
	}
1121
1122
/*======================================================================*\
1123
	Function:	_connect
1124
	Purpose:	make a socket connection
1125
	Input:		$fp	file pointer
1126
\*======================================================================*/
1127
1128
	function _connect(&$fp)
1129
	{
1130
		if(!empty($this->proxy_host) && !empty($this->proxy_port))
1131
			{
1132
				$this->_isproxy = true;
1133
1134
				$host = $this->proxy_host;
1135
				$port = $this->proxy_port;
1136
			}
1137
		else
1138
		{
1139
			$host = $this->host;
1140
			$port = $this->port;
1141
		}
1142
1143
		$this->status = 0;
1144
1145
		if($fp = fsockopen(
1146
					$host,
1147
					$port,
1148
					$errno,
1149
					$errstr,
1150
					$this->_fp_timeout
1151
					))
1152
		{
1153
			// socket connection succeeded
1154
1155
			return true;
1156
		}
1157
		else
1158
		{
1159
			// socket connection failed
1160
			$this->status = $errno;
1161
			switch($errno)
1162
			{
1163
				case -3:
1164
					$this->error="socket creation failed (-3)";
1165
				case -4:
1166
					$this->error="dns lookup failure (-4)";
1167
				case -5:
1168
					$this->error="connection refused or timed out (-5)";
1169
				default:
1170
					$this->error="connection failed (".$errno.")";
1171
			}
1172
			return false;
1173
		}
1174
	}
1175
/*======================================================================*\
1176
	Function:	_disconnect
1177
	Purpose:	disconnect a socket connection
1178
	Input:		$fp	file pointer
1179
\*======================================================================*/
1180
1181
	function _disconnect($fp)
1182
	{
1183
		return(fclose($fp));
1184
	}
1185
1186
1187
/*======================================================================*\
1188
	Function:	_prepare_post_body
1189
	Purpose:	Prepare post body according to encoding type
1190
	Input:		$formvars  - form variables
1191
				$formfiles - form upload files
1192
	Output:		post body
1193
\*======================================================================*/
1194
1195
	function _prepare_post_body($formvars, $formfiles)
1196
	{
1197
		settype($formvars, "array");
1198
		settype($formfiles, "array");
1199
		$postdata = '';
1200
1201
		if (count($formvars) == 0 && count($formfiles) == 0)
1202
			return;
1203
1204
		switch ($this->_submit_type) {
1205
			case "application/x-www-form-urlencoded":
1206
				reset($formvars);
1207
				while(list($key,$val) = each($formvars)) {
1208
					if (is_array($val) || is_object($val)) {
1209
						while (list($cur_key, $cur_val) = each($val)) {
1210
							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1211
						}
1212
					} else
1213
						$postdata .= urlencode($key)."=".urlencode($val)."&";
1214
				}
1215
				break;
1216
1217
			case "multipart/form-data":
1218
				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1219
1220
				reset($formvars);
1221
				while(list($key,$val) = each($formvars)) {
1222
					if (is_array($val) || is_object($val)) {
1223
						while (list($cur_key, $cur_val) = each($val)) {
1224
							$postdata .= "--".$this->_mime_boundary."\r\n";
1225
							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1226
							$postdata .= "$cur_val\r\n";
1227
						}
1228
					} else {
1229
						$postdata .= "--".$this->_mime_boundary."\r\n";
1230
						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1231
						$postdata .= "$val\r\n";
1232
					}
1233
				}
1234
1235
				reset($formfiles);
1236
				while (list($field_name, $file_names) = each($formfiles)) {
1237
					settype($file_names, "array");
1238
					while (list(, $file_name) = each($file_names)) {
1239
						if (!is_readable($file_name)) continue;
1240
1241
						$fp = fopen($file_name, "r");
1242
						$file_content = fread($fp, filesize($file_name));
1243
						fclose($fp);
1244
						$base_name = basename($file_name);
1245
1246
						$postdata .= "--".$this->_mime_boundary."\r\n";
1247
						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1248
						$postdata .= "$file_content\r\n";
1249
					}
1250
				}
1251
				$postdata .= "--".$this->_mime_boundary."--\r\n";
1252
				break;
1253
		}
1254
1255
		return $postdata;
1256
	}
1257
}
1258
endif;
1259
?>
1260