Completed
Pull Request — 1.10.x (#1154)
by
unknown
45:16
created

Snoopy::_connect()   C

Complexity

Conditions 7
Paths 10

Size

Total Lines 46
Code Lines 28

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 7
eloc 28
nc 10
nop 1
dl 0
loc 46
rs 6.7272
1
<?php
2
3
/*************************************************
4
5
Snoopy - the PHP net client
6
Author: Monte Ohrt <[email protected]>
7
Copyright (c): 1999-2000 ispi, all rights reserved
8
Version: 1.0
9
10
 * This library is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * This library is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with this library; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24
You may contact the author of Snoopy by e-mail at:
25
[email protected]
26
27
Or, write to:
28
Monte Ohrt
29
CTO, ispi
30
237 S. 70th suite 220
31
Lincoln, NE 68510
32
33
The latest version of Snoopy can be obtained from:
34
http://snoopy.sourceforge.com
35
36
*************************************************/
37
38
class Snoopy {
39
	/**** Public variables ****/
40
	
41
	/* user definable vars */
42
43
	public $host			=	"www.php.net";		// host name we are connecting to
44
	public $port			=	80;					// port we are connecting to
45
	public $proxy_host		=	"";					// proxy host to use
46
	public $proxy_port		=	"";					// proxy port to use
47
	public $agent			=	"Snoopy v1.0";		// agent we masquerade as
48
	public $referer		=	"";					// referer info to pass
49
	public $cookies		=	array();			// array of cookies to pass
50
												// $cookies["username"]="joe";
51
	public	$rawheaders		=	array();			// array of raw headers to send
52
												// $rawheaders["Content-type"]="text/html";
53
54
	public $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
55
	public $lastredirectaddr	=	"";				// contains address of last redirected address
56
	public	$offsiteok		=	true;				// allows redirection off-site
57
	public $maxframes		=	0;					// frame content depth maximum. 0 = disallow
58
	public $expandlinks	=	true;				// expand links to fully qualified URLs.
59
												// this only applies to fetchlinks()
60
												// or submitlinks()
61
	public $passcookies	=	true;				// pass set cookies back through redirects
62
												// NOTE: this currently does not respect
63
												// dates, domains or paths.
64
	
65
	public	$user			=	"";					// user for http authentication
66
	public	$pass			=	"";					// password for http authentication
67
	
68
	// http accept types
69
	public $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
70
	
71
	public $results		=	"";					// where the content is put
72
		
73
	public $error			=	"";					// error messages sent here
74
	public	$response_code	=	"";					// response code returned from server
75
	public	$headers		=	array();			// headers returned from server sent here
76
	public	$maxlength		=	500000;				// max return data length (body)
77
	public $read_timeout	=	0;					// timeout on read operations, in seconds
78
												// supported only since PHP 4 Beta 4
79
												// set to 0 to disallow timeouts
80
	public $timed_out		=	false;				// if a read operation timed out
81
	public	$status			=	0;					// http request status
82
	
83
	public	$curl_path		=	"/usr/bin/curl";
84
												// Snoopy will use cURL for fetching
85
												// SSL content if a full system path to
86
												// the cURL binary is supplied here.
87
												// set to false if you do not have
88
												// cURL installed. See http://curl.haxx.se
89
												// for details on installing cURL.
90
												// Snoopy does *not* use the cURL
91
												// library functions built into php,
92
												// as these functions are not stable
93
												// as of this Snoopy release.
94
	
95
	// send Accept-encoding: gzip?
96
	public $use_gzip		= true;	
97
	
98
	/**** Private variables ****/	
99
	
100
	private	$_maxlinelen	=	4096;				// max line length (headers)
101
	
102
	private $_httpmethod	=	"GET";				// default http request method
103
	private $_httpversion	=	"HTTP/1.0";			// default http request version
104
	private $_submit_method	=	"POST";				// default submit method
105
	private $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
106
	private $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
107
	private $_redirectaddr	=	false;				// will be set if page fetched is a redirect
108
	private $_redirectdepth	=	0;					// increments on an http redirect
109
	private $_frameurls		= 	array();			// frame src urls
110
	private $_framedepth	=	0;					// increments on frame depth
111
	
112
	private $_isproxy		=	false;				// set if using a proxy server
113
	private $_fp_timeout	=	30;					// timeout for socket connection
114
115
/*======================================================================*\
116
	Function:	fetch
117
	Purpose:	fetch the contents of a web page
118
				(and possibly other protocols in the
119
				future like ftp, nntp, gopher, etc.)
120
	Input:		$URI	the location of the page to fetch
121
	Output:		$this->results	the output text from the fetch
122
\*======================================================================*/
123
124
	public function fetch($URI) {
125
		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
126
		$URI_PARTS = parse_url($URI);
127
		if (!empty($URI_PARTS["user"]))
128
			$this->user = $URI_PARTS["user"];
129
		if (!empty($URI_PARTS["pass"]))
130
			$this->pass = $URI_PARTS["pass"];
131
		if (!isset($fp)) { $fp = false; }
0 ignored issues
show
Bug introduced by
The variable $fp seems only to be defined at a later point. As such the call to isset() seems to always evaluate to false.

This check marks calls to isset(...) or empty(...) that are found before the variable itself is defined. These will always have the same result.

This is likely the result of code being shifted around. Consider removing these calls.

Loading history...
132
		switch ($URI_PARTS["scheme"]) {
133
			case "http":
134
				$this->host = $URI_PARTS["host"];
135
				if(!empty($URI_PARTS["port"]))
136
					$this->port = $URI_PARTS["port"];
137
				if($this->_connect($fp))
138
				{
139 View Code Duplication
					if($this->_isproxy)
140
					{
141
						// using proxy, send entire URI
142
						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
143
					}
144
					else
145
					{
146
						$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
147
						// no proxy, send only the path
148
						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
149
					}
150
					
151
					$this->_disconnect($fp);
152
153 View Code Duplication
					if($this->_redirectaddr)
154
					{
155
						/* url was redirected, check if we've hit the max depth */
156
						if($this->maxredirs > $this->_redirectdepth)
157
						{
158
							// only follow redirect if it's on this site, or offsiteok is true
159
							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
160
							{
161
								/* follow the redirect */
162
								$this->_redirectdepth++;
163
								$this->lastredirectaddr=$this->_redirectaddr;
0 ignored issues
show
Documentation Bug introduced by
The property $lastredirectaddr was declared of type string, but $this->_redirectaddr is of type boolean. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
164
								$this->fetch($this->_redirectaddr);
165
							}
166
						}
167
					}
168
169 View Code Duplication
					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
170
					{
171
						$frameurls = $this->_frameurls;
172
						$this->_frameurls = array();
173
						
174
						while(list(,$frameurl) = each($frameurls))
175
						{
176
							if($this->_framedepth < $this->maxframes)
177
							{
178
								$this->fetch($frameurl);
179
								$this->_framedepth++;
180
							}
181
							else
182
								break;
183
						}
184
					}					
185
				}
186
				else
187
				{
188
					return false;
189
				}
190
				return true;					
191
				break;
192
			case "https":
193
				if(!$this->curl_path || (!is_executable($this->curl_path))) {
194
					$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
195
					return false;
196
				}
197
				$this->host = $URI_PARTS["host"];
198
				if(!empty($URI_PARTS["port"]))
199
					$this->port = $URI_PARTS["port"];
200 View Code Duplication
				if($this->_isproxy)
201
				{
202
					// using proxy, send entire URI
203
					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
204
				}
205
				else
206
				{
207
					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
208
					// no proxy, send only the path
209
					$this->_httpsrequest($path, $URI, $this->_httpmethod);
210
				}
211
212 View Code Duplication
				if($this->_redirectaddr)
213
				{
214
					/* url was redirected, check if we've hit the max depth */
215
					if($this->maxredirs > $this->_redirectdepth)
216
					{
217
						// only follow redirect if it's on this site, or offsiteok is true
218
						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
219
						{
220
							/* follow the redirect */
221
							$this->_redirectdepth++;
222
							$this->lastredirectaddr=$this->_redirectaddr;
223
							$this->fetch($this->_redirectaddr);
224
						}
225
					}
226
				}
227
228 View Code Duplication
				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
229
				{
230
					$frameurls = $this->_frameurls;
231
					$this->_frameurls = array();
232
233
					while(list(,$frameurl) = each($frameurls))
234
					{
235
						if($this->_framedepth < $this->maxframes)
236
						{
237
							$this->fetch($frameurl);
238
							$this->_framedepth++;
239
						}
240
						else
241
							break;
242
					}
243
				}					
244
				return true;					
245
				break;
246
			default:
247
				// not a valid protocol
248
				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
249
				return false;
250
				break;
251
		}		
252
		return true;
253
	}
254
255
256
257
/*======================================================================*\
258
	Private functions
259
\*======================================================================*/
260
	
261
	
262
/*======================================================================*\
263
	Function:	_striplinks
264
	Purpose:	strip the hyperlinks from an html document
265
	Input:		$document	document to strip.
266
	Output:		$match		an array of the links
267
\*======================================================================*/
268
269
	private function _striplinks($document)
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
270
	{	
271
		preg_match_all("'<\s*a\s+.*href\s*=\s*			# find <a href=
272
						([\"\'])?					# find single or double quote
273
						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
274
													# quote, otherwise match up to next space
275
						'isx",$document,$links);
276
						
277
278
		// catenate the non-empty matches from the conditional subpattern
279
280 View Code Duplication
		while(list($key,$val) = each($links[2]))
0 ignored issues
show
Unused Code introduced by
The assignment to $key is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
281
		{
282
			if(!empty($val))
283
				$match[] = $val;
284
		}				
285
		
286 View Code Duplication
		while(list($key,$val) = each($links[3]))
0 ignored issues
show
Unused Code introduced by
The assignment to $key is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
287
		{
288
			if(!empty($val))
289
				$match[] = $val;
290
		}		
291
		
292
		// return the links
293
		return $match;
294
	}
295
296
/*======================================================================*\
297
	Function:	_stripform
298
	Purpose:	strip the form elements from an html document
299
	Input:		$document	document to strip.
300
	Output:		$match		an array of the links
301
\*======================================================================*/
302
303
	private function _stripform($document)
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
304
	{	
305
		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
306
		
307
		// catenate the matches
308
		$match = implode("\r\n",$elements[0]);
309
				
310
		// return the links
311
		return $match;
312
	}
313
314
	
315
	
316
/*======================================================================*\
317
	Function:	_striptext
318
	Purpose:	strip the text from an html document
319
	Input:		$document	document to strip.
320
	Output:		$text		the resulting text
321
\*======================================================================*/
322
323
	private function _striptext($document)
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
324
	{
325
		
326
		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
327
		// so, list your entities one by one here. I included some of the
328
		// more common ones.
329
								
330
		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
331
						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
332
						"'([\r\n])[\s]+'",					// strip out white space
333
						"'&(quote|#34);'i",					// replace html entities
334
						"'&(amp|#38);'i",
335
						"'&(lt|#60);'i",
336
						"'&(gt|#62);'i",
337
						"'&(nbsp|#160);'i",
338
						"'&(iexcl|#161);'i",
339
						"'&(cent|#162);'i",
340
						"'&(pound|#163);'i",
341
						"'&(copy|#169);'i"
342
						);				
343
		$replace = array(	"",
344
							"",
345
							"\\1",
346
							"\"",
347
							"&",
348
							"<",
349
							">",
350
							" ",
351
							chr(161),
352
							chr(162),
353
							chr(163),
354
							chr(169));
355
					
356
		$text = preg_replace($search,$replace,$document);
357
								
358
		return $text;
359
	}
360
361
/*======================================================================*\
362
	Function:	_expandlinks
363
	Purpose:	expand each link into a fully qualified URL
364
	Input:		$links			the links to qualify
365
				$URI			the full URI to get the base from
366
	Output:		$expandedLinks	the expanded links
367
\*======================================================================*/
368
369
	private function _expandlinks($links,$URI)
370
	{
371
		
372
		preg_match("/^[^\?]+/",$URI,$match);
373
374
		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
375
				
376
		$search = array( 	"|^http://".preg_quote($this->host)."|i",
377
							"|^(?!http://)(\/)?(?!mailto:)|i",
378
							"|/\./|",
379
							"|/[^\/]+/\.\./|"
380
						);
381
						
382
		$replace = array(	"",
383
							$match."/",
384
							"/",
385
							"/"
386
						);			
387
				
388
		$expandedLinks = preg_replace($search,$replace,$links);
389
390
		return $expandedLinks;
391
	}
392
393
/*======================================================================*\
394
	Function:	_httprequest
395
	Purpose:	go get the http data from the server
396
	Input:		$url		the url to fetch
397
				$fp			the current open file pointer
398
				$URI		the full URI
399
				$body		body contents to send if any (POST)
400
	Output:		
401
\*======================================================================*/
402
	
403
	private function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
404
	{
405
		if($this->passcookies && $this->_redirectaddr)
406
			$this->setcookies();
407
			
408
		$URI_PARTS = parse_url($URI);
409
		if(empty($url))
410
			$url = "/";
411
		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";		
412
		if(!empty($this->agent))
413
			$headers .= "User-Agent: ".$this->agent."\r\n";
414
		if(!empty($this->host) && !isset($this->rawheaders['Host']))
415
			$headers .= "Host: ".$this->host."\r\n";
416
		if(!empty($this->accept))
417
			$headers .= "Accept: ".$this->accept."\r\n";
418
		
419
		if($this->use_gzip) {
420
			// make sure PHP was built with --with-zlib
421
			// and we can handle gzipp'ed data
422
			if ( function_exists(gzinflate) ) {
423
			   $headers .= "Accept-encoding: gzip\r\n";
424
			}
425
			else {
426
			   trigger_error(
427
			   	"use_gzip is on, but PHP was built without zlib support.".
428
				"  Requesting file(s) without gzip encoding.", 
429
				E_USER_NOTICE);
430
			}
431
		}
432
		
433
		if(!empty($this->referer))
434
			$headers .= "Referer: ".$this->referer."\r\n";
435
		if(!empty($this->cookies))
436
		{			
437
			if(!is_array($this->cookies))
438
				$this->cookies = (array)$this->cookies;
439
	
440
			reset($this->cookies);
441
			if ( count($this->cookies) > 0 ) {
442
				$cookie_headers .= 'Cookie: ';
0 ignored issues
show
Bug introduced by
The variable $cookie_headers does not exist. Did you mean $headers?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
443 View Code Duplication
				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
444
				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
445
				}
446
				$headers .= substr($cookie_headers,0,-2) . "\r\n";
447
			} 
448
		}
449 View Code Duplication
		if(!empty($this->rawheaders))
450
		{
451
			if(!is_array($this->rawheaders))
452
				$this->rawheaders = (array)$this->rawheaders;
453
			while(list($headerKey,$headerVal) = each($this->rawheaders))
454
				$headers .= $headerKey.": ".$headerVal."\r\n";
455
		}
456
		if(!empty($content_type)) {
457
			$headers .= "Content-type: $content_type";
458
			if ($content_type == "multipart/form-data")
459
				$headers .= "; boundary=".$this->_mime_boundary;
460
			$headers .= "\r\n";
461
		}
462
		if(!empty($body))	
463
			$headers .= "Content-length: ".strlen($body)."\r\n";
464 View Code Duplication
		if(!empty($this->user) || !empty($this->pass))	
465
			$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
466
467
		$headers .= "\r\n";
468
		
469
		// set the read timeout if needed
470
		if ($this->read_timeout > 0)
471
			socket_set_timeout($fp, $this->read_timeout);
472
		$this->timed_out = false;
473
		
474
		fwrite($fp,$headers.$body,strlen($headers.$body));
475
		
476
		$this->_redirectaddr = false;
477
		unset($this->headers);
478
		
479
		// content was returned gzip encoded?
480
		$is_gzipped = false;
481
						
482
		while($currentHeader = fgets($fp,$this->_maxlinelen))
483
		{
484 View Code Duplication
			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
485
			{
486
				$this->status=-100;
487
				return false;
488
			}
489
				
490
		//	if($currentHeader == "\r\n")
491
			if(preg_match("/^\r?\n$/", $currentHeader) )
492
			      break;
493
						
494
			// if a header begins with Location: or URI:, set the redirect
495 View Code Duplication
			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
496
			{
497
				// get URL portion of the redirect
498
				preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
499
				// look for :// in the Location header to see if hostname is included
500
				if(!preg_match("|\:\/\/|",$matches[2]))
501
				{
502
					// no host in the path, so prepend
503
					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
0 ignored issues
show
Documentation Bug introduced by
The property $_redirectaddr was declared of type boolean, but $URI_PARTS['scheme'] . '...ost . ':' . $this->port is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
504
					// eliminate double slash
505
					if(!preg_match("|^/|",$matches[2]))
506
							$this->_redirectaddr .= "/".$matches[2];
507
					else
508
							$this->_redirectaddr .= $matches[2];
509
				}
510
				else
511
					$this->_redirectaddr = $matches[2];
0 ignored issues
show
Documentation Bug introduced by
The property $_redirectaddr was declared of type boolean, but $matches[2] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
512
			}
513
		
514 View Code Duplication
			if(preg_match("|^HTTP/|",$currentHeader))
515
			{
516
                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
517
				{
518
					$this->status= $status[1];
0 ignored issues
show
Documentation Bug introduced by
The property $status was declared of type integer, but $status[1] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
519
                }				
520
				$this->response_code = $currentHeader;
521
			}
522
			
523
			if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
524
				$is_gzipped = true;
525
			}
526
			
527
			$this->headers[] = $currentHeader;
528
		}
529
530
		# $results = fread($fp, $this->maxlength);
531
		$results = "";
532
		while ( $data = fread($fp, $this->maxlength) ) {
533
		    $results .= $data;
534
		    if (
535
		        strlen($results) > $this->maxlength ) {
536
		        break;
537
		    }
538
		}
539
		
540
		// gunzip
541
		if ( $is_gzipped ) {
542
			// per http://www.php.net/manual/en/function.gzencode.php
543
			$results = substr($results, 10);
544
			$results = gzinflate($results);
545
		}
546
		
547 View Code Duplication
		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
548
		{
549
			$this->status=-100;
550
			return false;
551
		}
552
		
553
		// check if there is a a redirect meta tag
554
		
555
		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
556
		{
557
			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
558
		}
559
560
		// have we hit our frame depth and is there frame src to fetch?
561 View Code Duplication
		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
562
		{
563
			$this->results[] = $results;
564
			for($x=0; $x<count($match[1]); $x++)
565
				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
566
		}
567
		// have we already fetched framed content?
568
		elseif(is_array($this->results))
569
			$this->results[] = $results;
570
		// no framed content
571
		else
572
			$this->results = $results;
573
		
574
		return true;
575
	}
576
577
/*======================================================================*\
578
	Function:	_httpsrequest
579
	Purpose:	go get the https data from the server using curl
580
	Input:		$url		the url to fetch
581
				$URI		the full URI
582
				$body		body contents to send if any (POST)
583
	Output:		
584
\*======================================================================*/
585
	
586
	private function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
587
	{
588
		if($this->passcookies && $this->_redirectaddr)
589
			$this->setcookies();
590
591
		$headers = array();		
592
					
593
		$URI_PARTS = parse_url($URI);
594
		if(empty($url))
595
			$url = "/";
596
		// GET ... header not needed for curl
597
		//$headers[] = $http_method." ".$url." ".$this->_httpversion;		
598
		if(!empty($this->agent))
599
			$headers[] = "User-Agent: ".$this->agent;
600
		if(!empty($this->host))
601
			$headers[] = "Host: ".$this->host;
602
		if(!empty($this->accept))
603
			$headers[] = "Accept: ".$this->accept;
604
		if(!empty($this->referer))
605
			$headers[] = "Referer: ".$this->referer;
606
		if(!empty($this->cookies))
607
		{			
608
			if(!is_array($this->cookies))
609
				$this->cookies = (array)$this->cookies;
610
	
611
			reset($this->cookies);
612
			if ( count($this->cookies) > 0 ) {
613
				$cookie_str = 'Cookie: ';
614 View Code Duplication
				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
615
				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
616
				}
617
				$headers[] = substr($cookie_str,0,-2);
618
			}
619
		}
620 View Code Duplication
		if(!empty($this->rawheaders))
621
		{
622
			if(!is_array($this->rawheaders))
623
				$this->rawheaders = (array)$this->rawheaders;
624
			while(list($headerKey,$headerVal) = each($this->rawheaders))
625
				$headers[] = $headerKey.": ".$headerVal;
626
		}
627
		if(!empty($content_type)) {
628
			if ($content_type == "multipart/form-data")
629
				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
630
			else
631
				$headers[] = "Content-type: $content_type";
632
		}
633
		if(!empty($body))	
634
			$headers[] = "Content-length: ".strlen($body);
635 View Code Duplication
		if(!empty($this->user) || !empty($this->pass))	
636
			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
637
			
638
		for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
639
			$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
640
		}
641
			  	                         
642
		if(!empty($body))
643
			$cmdline_params .= " -d \"$body\"";
644
		
645
		if($this->read_timeout > 0)
646
			$cmdline_params .= " -m ".$this->read_timeout;
647
		
648
		$headerfile = uniqid(time());
649
		
650
		# accept self-signed certs
651
		$cmdline_params .= " -k";
652
        $results = array();
653
        $return = 0;
654
		exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
655
		
656
		if($return)
0 ignored issues
show
Bug Best Practice introduced by
The expression $return of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
657
		{
658
			$this->error = "Error: cURL could not retrieve the document, error $return.";
659
			return false;
660
		}
661
			
662
			
663
		$results = implode("\r\n",$results);
664
		
665
		$result_headers = file("/tmp/$headerfile");
666
						
667
		$this->_redirectaddr = false;
668
		unset($this->headers);
669
						
670
		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
671
		{
672
			
673
			// if a header begins with Location: or URI:, set the redirect
674 View Code Duplication
			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
675
			{
676
				// get URL portion of the redirect
677
				preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
678
				// look for :// in the Location header to see if hostname is included
679
				if(!preg_match("|\:\/\/|",$matches[2]))
680
				{
681
					// no host in the path, so prepend
682
					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
0 ignored issues
show
Documentation Bug introduced by
The property $_redirectaddr was declared of type boolean, but $URI_PARTS['scheme'] . '...ost . ':' . $this->port is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
683
					// eliminate double slash
684
					if(!preg_match("|^/|",$matches[2]))
685
							$this->_redirectaddr .= "/".$matches[2];
686
					else
687
							$this->_redirectaddr .= $matches[2];
688
				}
689
				else
690
					$this->_redirectaddr = $matches[2];
0 ignored issues
show
Documentation Bug introduced by
The property $_redirectaddr was declared of type boolean, but $matches[2] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
691
			}
692
		
693 View Code Duplication
			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
694
			{
695
			    $this->response_code = $result_headers[$currentHeader];
696
			    if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
697
			    {
698
				$this->status= $match[1];
0 ignored issues
show
Documentation Bug introduced by
The property $status was declared of type integer, but $match[1] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
699
                	    }
700
			}
701
			$this->headers[] = $result_headers[$currentHeader];
702
		}
703
704
		// check if there is a a redirect meta tag
705
		
706
		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
707
		{
708
			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
709
		}
710
711
		// have we hit our frame depth and is there frame src to fetch?
712 View Code Duplication
		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
713
		{
714
			$this->results[] = $results;
715
			for($x=0; $x<count($match[1]); $x++)
716
				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
717
		}
718
		// have we already fetched framed content?
719
		elseif(is_array($this->results))
720
			$this->results[] = $results;
721
		// no framed content
722
		else
723
			$this->results = $results;
724
725
		unlink("/tmp/$headerfile");
726
		
727
		return true;
728
	}
729
730
/*======================================================================*\
731
	Function:	setcookies()
732
	Purpose:	set cookies for a redirection
733
\*======================================================================*/
734
	
735
	public function setcookies()
736
	{
737
		for($x=0; $x<count($this->headers); $x++)
738
		{
739
		if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
740
			$this->cookies[$match[1]] = $match[2];
741
		}
742
	}
743
744
	
745
/*======================================================================*\
746
	Function:	_check_timeout
747
	Purpose:	checks whether timeout has occurred
748
	Input:		$fp	file pointer
749
\*======================================================================*/
750
751
	private function _check_timeout($fp)
752
	{
753
		if ($this->read_timeout > 0) {
754
			$fp_status = socket_get_status($fp);
755
			if ($fp_status["timed_out"]) {
756
				$this->timed_out = true;
757
				return true;
758
			}
759
		}
760
		return false;
761
	}
762
763
/*======================================================================*\
764
	Function:	_connect
765
	Purpose:	make a socket connection
766
	Input:		$fp	file pointer
767
\*======================================================================*/
768
	
769
	private function _connect(&$fp)
770
	{
771
		if(!empty($this->proxy_host) && !empty($this->proxy_port))
772
			{
773
				$this->_isproxy = true;
774
				$host = $this->proxy_host;
775
				$port = $this->proxy_port;
776
			}
777
		else
778
		{
779
			$host = $this->host;
780
			$port = $this->port;
781
		}
782
	
783
		$this->status = 0;
784
		
785
		if($fp = fsockopen(
786
					$host,
787
					$port,
788
					$errno,
789
					$errstr,
790
					$this->_fp_timeout
791
					))
792
		{
793
			// socket connection succeeded
794
795
			return true;
796
		}
797
		else
798
		{
799
			// socket connection failed
800
			$this->status = $errno;
801
			switch($errno)
802
			{
803
				case -3:
804
					$this->error="socket creation failed (-3)";
805
				case -4:
806
					$this->error="dns lookup failure (-4)";
807
				case -5:
808
					$this->error="connection refused or timed out (-5)";
809
				default:
810
					$this->error="connection failed (".$errno.")";
811
			}
812
			return false;
813
		}
814
	}
815
/*======================================================================*\
816
	Function:	_disconnect
817
	Purpose:	disconnect a socket connection
818
	Input:		$fp	file pointer
819
\*======================================================================*/
820
	
821
	private function _disconnect($fp)
822
	{
823
		return(fclose($fp));
824
	}
825
826
	
827
/*======================================================================*\
828
	Function:	_prepare_post_body
829
	Purpose:	Prepare post body according to encoding type
830
	Input:		$formvars  - form variables
831
				$formfiles - form upload files
832
	Output:		post body
833
\*======================================================================*/
834
	
835
	private function _prepare_post_body($formvars, $formfiles)
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
836
	{
837
		settype($formvars, "array");
838
		settype($formfiles, "array");
839
840
		if (count($formvars) == 0 && count($formfiles) == 0)
841
			return;
842
		
843
		switch ($this->_submit_type) {
844
			case "application/x-www-form-urlencoded":
845
				reset($formvars);
846
				while(list($key,$val) = each($formvars)) {
847
					if (is_array($val) || is_object($val)) {
848
						while (list($cur_key, $cur_val) = each($val)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $cur_key is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
849
							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
850
						}
851
					} else
852
						$postdata .= urlencode($key)."=".urlencode($val)."&";
853
				}
854
				break;
855
856
			case "multipart/form-data":
857
				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
858
				
859
				reset($formvars);
860
				while(list($key,$val) = each($formvars)) {
861
					if (is_array($val) || is_object($val)) {
862
						while (list($cur_key, $cur_val) = each($val)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $cur_key is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
863
							$postdata .= "--".$this->_mime_boundary."\r\n";
864
							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
865
							$postdata .= "$cur_val\r\n";
866
						}
867
					} else {
868
						$postdata .= "--".$this->_mime_boundary."\r\n";
869
						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
870
						$postdata .= "$val\r\n";
871
					}
872
				}
873
				
874
				reset($formfiles);
875
				while (list($field_name, $file_names) = each($formfiles)) {
876
					settype($file_names, "array");
877
					while (list(, $file_name) = each($file_names)) {
878
						if (!is_readable($file_name)) continue;
879
880
						$fp = fopen($file_name, "r");
881
						$file_content = fread($fp, filesize($file_name));
882
						fclose($fp);
883
						$base_name = basename($file_name);
884
885
						$postdata .= "--".$this->_mime_boundary."\r\n";
886
						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
887
						$postdata .= "$file_content\r\n";
888
					}
889
				}
890
				$postdata .= "--".$this->_mime_boundary."--\r\n";
891
				break;
892
		}
893
894
		return $postdata;
895
	}
896
}