Issues (1686)

sources/ElkArte/Http/FsockFetchWebdata.php (3 issues)

Labels
Severity
1
<?php
2
3
/**
4
 * This will fetch a web resource http/https and return the headers and page data.  It is capable of following
5
 * redirects and interpreting chunked data.  It will work with allow_url_fopen off.
6
 *
7
 * @package   ElkArte Forum
8
 * @copyright ElkArte Forum contributors
9
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
10
 *
11
 * @version 2.0 dev
12
 *
13
 */
14
15
namespace ElkArte\Http;
16
17
use Exception;
18
19
/**
20
 * Class FsockFetchWebdata
21
 *
22
 * @package ElkArte
23
 */
24
class FsockFetchWebdata
25
{
26
	/** @var bool Use the same connection on redirects */
27
	private $_keep_alive;
28
29
	/** @var int Holds the passed or default value for redirects */
30
	private $_max_redirect;
31
32
	/** @var int Holds the current redirect count for the request */
33
	private $_current_redirect = 0;
34
35
	/** @var null|string Used on redirect when keep alive is true */
36
	private $_keep_alive_host;
37
38
	/** @var null|resource the fp resource to reuse */
39
	private $_keep_alive_fp;
40
41
	/** @var int how much we will read */
42
	private $_content_length = 0;
43
44
	/** @var array the parsed url with host, port, path, etc */
45
	private $_url = [];
46
47
	/** @var null|resource the fsockopen resource */
48
	private $_fp;
49
50
	/** @var array Holds the passed user options array (only option is max_length) */
51
	private $_user_options;
52
53
	/** @var string|string[] Holds any data that will be posted to a form */
54
	private $_post_data = '';
55
56
	/** @var string[] Holds the response to the request, headers, data, code */
57
	private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => ''];
58
59
	/** @var array() Holds the last header response to the request */
60
	private $_headers = [];
61
62
	/** @var string the HTTP response from the server 200/404/302 etc */
63
	private $_server_response;
64
65
	/** @var bool if the response body is transfer encoded chunked */
66
	private $_chunked = false;
67
68
	/**
69
	 * FsockFetchWebdata constructor.
70
	 *
71
	 * @param array $options
72
	 * @param int $max_redirect
73 6
	 * @param bool $keep_alive
74
	 */
75
	public function __construct($options = [], $max_redirect = 3, $keep_alive = false)
76 6
	{
77 6
		// Initialize class variables
78 6
		$this->_max_redirect = (int) $max_redirect;
79 6
		$this->_user_options = $options;
80
		$this->_keep_alive = $keep_alive;
81
	}
82
83
	/**
84
	 * Prepares any post data supplied and then makes the request for data
85
	 *
86
	 * @param string $url
87
	 * @param string|string[] $post_data
88 6
	 *
89
	 */
90
	public function get_url_data($url, $post_data = '')
91 6
	{
92
		// Prepare any given post data
93 2
		if (!empty($post_data))
94
		{
95 2
			if (is_array($post_data))
96
			{
97
				$this->_post_data = http_build_query($post_data, '', '&');
98
			}
99
			else
100
			{
101
				$this->_post_data = http_build_query([trim($post_data)], '', '&');
102
			}
103
		}
104 6
105 6
		// Set the options and get it
106 6
		$this->_current_redirect = 0;
107
		$this->_fopenRequest($url);
108
	}
109
110
	/**
111
	 * Main processing loop, connects, parses responses, redirects, fetches body
112
	 *
113
	 * @param string $url site to fetch
114
	 *
115 6
	 * @return bool
116
	 */
117
	private function _fopenRequest($url)
118 6
	{
119 6
		// We do have a url I hope
120
		$this->_setOptions($url);
121
		if (empty($this->_url))
122
		{
123
			return false;
124
		}
125 6
126
		// Reuse the socket if this is a keep alive
127
		if ($this->_keep_alive && $this->_url['host'] === $this->_keep_alive_host)
128
		{
129
			$this->_fp = $this->_keep_alive_fp;
130
		}
131 6
132
		// Open a connection to the host & port
133
		if (!$this->_sockOpen())
134
		{
135
			return false;
136
		}
137 6
138
		// I want this, from there, and I'm not going to be bothering you for more (probably.)
139
		$this->_makeRequest();
140 6
141 6
		// Is it where we thought?
142 6
		$this->_readHeaders();
143
		$location = $this->_checkRedirect();
144 6
		if (empty($location))
145 6
		{
146
			preg_match('~^HTTP/\S+\s+(\d{3})~i', $this->_server_response, $code);
147
			$this->_response['code'] = isset($code[1]) ? (int) $code[1] : '???';
148 6
149
			// Make sure we ended up with a 200 OK.
150
			if (in_array($this->_response['code'], [200, 201, 206], true))
151 6
			{
152
				// Provide a common valid 200 return code to the caller
153
				$this->_response['code'] = 200;
154 6
			}
155 6
156
			$this->_fetchData();
157 6
			fclose($this->_fp);
158
159
			return true;
160
		}
161
162 2
		// To the new location we go
163
		$this->_fopenRequest($location);
164
165 2
		return false;
166
	}
167
168
	/**
169
	 * Parses a url into the components we need
170
	 *
171
	 * @param string $url
172
	 */
173 6
	private function _setOptions($url)
174
	{
175 6
		$this->_url = [];
176 6
		$this->_response['url'] = $url;
177 6
		$this->_content_length = empty($this->_user_options['max_length']) ? 0 : (int) $this->_user_options['max_length'];
178
179
		// Make sure its valid before we parse it out
180 6
		if (filter_var($url, FILTER_VALIDATE_URL))
181
		{
182
			// Get the elements for this url
183 6
			$url_parse = parse_url($url);
184 6
			$this->_url['host_raw'] = $url_parse['host'];
185
186
			// Handle SSL connections
187 6
			if ($url_parse['scheme'] === 'https')
188
			{
189 6
				$this->_url['host'] = 'ssl://' . $url_parse['host'];
190 6
				$this->_url['port'] = empty($this->_url['port']) ? 443 : $this->_url['port'];
191
			}
192
			else
193
			{
194 4
				$this->_url['host'] = $url_parse['host'];
195 4
				$this->_url['port'] = empty($this->_url['port']) ? 80 : $this->_url['port'];
196
			}
197
198
			// Fix/Finalize the data path
199 6
			$this->_url['path'] = ($url_parse['path'] ?? '/') . (isset($url_parse['query']) ? '?' . $url_parse['query'] : '');
200
		}
201 6
	}
202
203
	/**
204
	 * Connect to the host/port as requested
205
	 *
206
	 * @return bool
207
	 */
208 6
	private function _sockOpen()
209
	{
210
		// no socket, then we need to open one to do much
211 6
		if (!is_resource($this->_fp))
212
		{
213
			set_error_handler(static function () { /* ignore errors */ });
214
			try
215 6
			{
216 6
				$this->_fp = fsockopen($this->_url['host'], $this->_url['port'], $errno, $errstr, 5);
217
				$this->_response['error'] = empty($errstr) ? false : $errno . ' :: ' . $errstr;
218
			}
219
			catch (Exception)
220
			{
221
				return false;
222
			}
223
			finally
224 6
			{
225
				restore_error_handler();
226
			}
227
		}
228
229
		return is_resource($this->_fp);
230 6
	}
231
232 6
	/**
233 6
	 * Make the request to the host, either get or post, and get the initial response.
234 6
	 */
235 6
	private function _makeRequest()
236 6
	{
237 6
		$request = (empty($this->_post_data) ? 'GET ' : 'POST ') . $this->_url['path'] . ' HTTP/1.1' . "\r\n";
238
		$request .= 'Host: ' . $this->_url['host_raw'] . "\r\n";
239 6
		$request .= $this->_keepAlive();
240
		$request .= 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931' . "\r\n";
241 2
		$request .= 'Content-Type: application/x-www-form-urlencoded' . "\r\n";
242
243
		if (!empty($this->_content_length))
244 6
		{
245
			$request .= 'Range: bytes=0-' . ($this->_content_length - 1) . "\r\n";
246 2
		}
247 2
248
		if (!empty($this->_post_data))
249
		{
250
			$request .= 'Content-Length: ' . strlen($this->_post_data) . "\r\n\r\n";
0 ignored issues
show
It seems like $this->_post_data can also be of type string[]; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

250
			$request .= 'Content-Length: ' . strlen(/** @scrutinizer ignore-type */ $this->_post_data) . "\r\n\r\n";
Loading history...
251 4
			$request .= $this->_post_data;
252
		}
253
		else
254
		{
255 6
			$request .= "\r\n\r\n";
256 6
		}
257 6
258
		// Make the request and read the first line of the server response, ending at the first CRLF
259
		fwrite($this->_fp, $request);
260
		$this->_server_response = fgets($this->_fp);
261
	}
262 6
263
	/**
264 6
	 * Sets the proper Keep-Alive header and sets the fp/host if the option is enabled
265
	 */
266
	private function _keepAlive()
267
	{
268
		if ($this->_keep_alive)
269
		{
270
			$request = 'Connection: Keep-Alive' . "\r\n";
271
			$this->_keep_alive_host = $this->_url['host'];
272 6
			$this->_keep_alive_fp = $this->_fp;
273
		}
274
		else
275 6
		{
276
			$request = 'Connection: close' . "\r\n";
277
		}
278
279
		return $request;
280
	}
281 6
282
	/**
283 6
	 * Reads the stream until the end of the headers section and then parses those headers
284 6
	 */
285
	private function _readHeaders()
286
	{
287 6
		$this->_headers = [];
288
		$headers = '';
289 6
290
		// Read / request more data, Looking for a blank line which separates headers from body
291
		while (!feof($this->_fp) && trim($header = fgets($this->_fp)) !== '')
292
		{
293 6
			$headers .= $header;
294 6
		}
295 6
296
		// Separate the data into standard headers
297
		$headers = explode("\r\n", $headers);
298 6
		array_pop($headers);
299
		foreach ($headers as $header)
300
		{
301 6
			// Get name and value
302 6
			[$name, $value] = explode(':', $header, 2);
303
304
			// Normalize / clean
305 6
			$name = strtolower($name);
306
			$value = trim($value);
307 6
308
			// If its already there, then add to it as an array
309 6
			if (isset($this->_headers[$name]))
310
			{
311
				if (is_string($this->_headers[$name]))
312 6
				{
313
					$this->_headers[$name] = array($this->_headers[$name]);
314
				}
315
316 6
				$this->_headers[$name][] = $value;
317
			}
318
			else
319 6
			{
320
				$this->_headers[$name] = $value;
321
			}
322
		}
323
	}
324
325
	/**
326 6
	 * Looks at the server response and header array to determine if we are redirecting
327
	 *
328
	 * @return string
329 6
	 */
330
	private function _checkRedirect()
331
	{
332 2
		// Redirect in case this location is permanently or temporarily moved (301, 302, 307)
333 2
		if ($this->_current_redirect < $this->_max_redirect && preg_match('~^HTTP/\S+\s+(30[127])~i', $this->_server_response, $code) === 1)
334 2
		{
335
			// Maintain our status responses
336
			$this->_response['code'] = (int) $code[1];
337 2
			$this->_response['redirects'] = ++$this->_current_redirect;
338
			$this->_response['headers'] = $this->_headers;
339
340
			// redirection with no location, just like working in a corporation
341
			if (empty($this->_headers['location']))
342
			{
343
				return '';
344 2
			}
345
346 2
			// Use the same connection or new?
347
			if (!$this->_keep_alive)
348
			{
349 2
				fclose($this->_fp);
350
			}
351
352
			return $this->_headers['location'];
353 6
		}
354
355
		return '';
356
	}
357
358
	/**
359 6
	 * Fetch the data for the selected site.
360
	 */
361
	private function _fetchData()
362 6
	{
363
		// Respect the headers
364
		$this->_processHeaders();
365 6
366
		// Now the body of the response
367 6
		$response = '';
368
369 2
		if (!empty($this->_content_length))
370
		{
371
			$response = stream_get_contents($this->_fp, $this->_content_length);
372
		}
373 4
		else
374
		{
375
			$response .= stream_get_contents($this->_fp);
376 6
		}
377 6
378 6
		$this->_response['body'] = $this->_unChunk($response);
379
		$this->_response['size'] = strlen($this->_response['body']);
380
	}
381
382
	/**
383 6
	 * Read the response up to the end of the headers
384
	 */
385
	private function _processHeaders()
386 6
	{
387
		// If told to close the connection, do so
388 6
		if (isset($this->_headers['connection']) && $this->_headers['connection'] === 'close')
389 6
		{
390
			$this->_keep_alive_host = null;
391
			$this->_keep_alive = false;
392
		}
393 6
394
		// If its chunked we need to decode the body
395 4
		if (isset($this->_headers['transfer-encoding']) && $this->_headers['transfer-encoding'] === 'chunked')
396
		{
397
			$this->_chunked = true;
398 6
		}
399 6
400
		$this->_response['headers'] = $this->_headers;
401
	}
402
403
	/**
404
	 * Decodes the response body if its transfer-encoded as chunked
405
	 *
406
	 * @param string $body
407 6
	 * @return string
408
	 */
409 6
	private function _unChunk($body)
410
	{
411 4
		if (!$this->_chunked)
412
		{
413
			return $body;
414 4
		}
415 4
416
		$decoded_body = '';
417
		while (trim($body))
418 4
		{
419
			// It only claimed to be chunked, but its not.
420 2
			if (!preg_match('~^([\da-fA-F]+)[^\r\n]*\r\n~m', $body, $match))
421 2
			{
422
				$decoded_body = $body;
423
				break;
424 4
			}
425
426 4
			$length = hexdec(trim($match[1]));
427
428
			if ($length === 0)
429
			{
430
				break;
431 4
			}
432 4
433 4
			$cut = strlen($match[0]);
434
			$decoded_body .= substr($body, $cut, $length);
0 ignored issues
show
It seems like $length can also be of type double; however, parameter $length of substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

434
			$decoded_body .= substr($body, $cut, /** @scrutinizer ignore-type */ $length);
Loading history...
435
			$body = substr($body, $cut + $length + 2);
0 ignored issues
show
$cut + $length + 2 of type double is incompatible with the type integer expected by parameter $offset of substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

435
			$body = substr($body, /** @scrutinizer ignore-type */ $cut + $length + 2);
Loading history...
436 4
		}
437
438
		return $decoded_body;
439
	}
440
441
	/**
442
	 * Used to return the results to the calling program
443
	 *
444
	 * What it does:
445
	 *
446
	 * - Called as ->result() will return the full final array
447
	 * - Called as ->result('body') to just return the page source of the result
448
	 *
449
	 * @param string $area used to return an area such as body, header, error
450
	 *
451 6
	 * @return string|string[]
452
	 */
453
	public function result($area = '')
454 6
	{
455
		// Just return a specified area or the entire result?
456
		if (trim($area) === '')
457
		{
458
			return $this->_response;
459
		}
460 6
461
		return $this->_response[$area] ?? $this->_response;
462
	}
463
}