Issues (1686)

sources/ElkArte/Http/StreamFetchWebdata.php (1 issue)

Labels
Severity
1
<?php
2
3
/**
4
 * This will fetch a web resource http/https and return the headers and page data.  It is capable of following
5
 * redirects and interpreting chunked data, etc.  It will NOT work with ini allow_url_fopen off.
6
 *
7
 * @package   ElkArte Forum
8
 * @copyright ElkArte Forum contributors
9
 * @license   BSD https://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
10
 *
11
 * @version 2.0 dev
12
 *
13
 */
14
15
namespace ElkArte\Http;
16
17
use Exception;
18
19
/**
20
 * Class StreamFetchWebdata
21
 *
22
 * @package ElkArte
23
 */
24
class StreamFetchWebdata
25
{
26
	/** @var bool Use the same connection on redirects */
27
	private $_keep_alive;
28
29
	/** @var int Holds the passed or default value for redirects */
30
	private $_max_redirect;
31
32
	/** @var int how much we will read */
33
	private $_content_length = 0;
34
35
	/** @var array the parsed url with host, port, path, etc */
36
	private $_url = [];
37
38
	/** @var null|resource the fopen resource */
39
	private $_fp;
40
41
	/** @var array Holds the passed user options array (only option is max_length) */
42
	private $_user_options;
43
44
	/** @var string|string[] Holds any data that will be posted to a form */
45
	private $_post_data = '';
46
47
	/** @var string[] Holds the response to the request, headers, data, code */
48
	private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => ''];
49
50
	/** @var array the context options for the stream */
51
	private $_options = [];
52
53
	/**
54
	 * StreamFetchWebdata constructor.
55
	 *
56
	 * @param array $options
57
	 * @param int $max_redirect
58 4
	 * @param bool $keep_alive
59
	 */
60
	public function __construct($options = [], $max_redirect = 3, $keep_alive = false)
61 4
	{
62 4
		// Initialize class variables
63 4
		$this->_max_redirect = (int) $max_redirect;
64 4
		$this->_user_options = $options;
65
		$this->_keep_alive = $keep_alive;
66
	}
67
68
	/**
69
	 * Prepares any post data supplied and then makes the request for data
70
	 *
71
	 * @param string $url
72 4
	 * @param string|string[] $post_data
73
	 */
74
	public function get_url_data($url, $post_data = '')
75 4
	{
76
		// Prepare any given post data
77 2
		if (!empty($post_data))
78
		{
79 2
			if (is_array($post_data))
80
			{
81
				$this->_post_data = http_build_query($post_data, '', '&');
82
			}
83
			else
84
			{
85
				$this->_post_data = http_build_query([trim($post_data)], '', '&');
86
			}
87
		}
88 4
89 4
		// Set the options and get it
90
		$this->_openRequest($url);
91
	}
92
93
	/**
94
	 * Makes the actual data call
95
	 *
96
	 * What it does
97
	 * - Calls setOptions to build the stream context array
98
	 * - Makes the data request and parses the results
99
	 *
100
	 * @param string $url site to fetch
101
	 *
102 4
	 * @return bool
103
	 */
104
	private function _openRequest($url)
105 4
	{
106
		// Build the stream options array
107
		$this->_setOptions($url);
108 4
109
		// We do have a url I hope
110
		if (empty($this->_url))
111
		{
112
			return false;
113
		}
114 4
115
		// I want this, from there, and I'm not going to be bothering you for more (probably.)
116 4
		if ($this->_makeRequest())
117 4
		{
118
			$this->_parseRequest();
119 4
			$this->_processHeaders();
120
121
			return $this->_fetchData();
122
		}
123
124
		return false;
125
	}
126
127
	/**
128
	 * Prepares the options needed from this request
129
	 *
130 4
	 * @param string $url
131
	 */
132 4
	private function _setOptions($url)
133
	{
134
		$this->_url = [];
135 4
136
		// Ensure the url is valid
137
		if (filter_var($url, FILTER_VALIDATE_URL))
138 4
		{
139
			// Get the elements for the url
140 4
			$this->_url = parse_url($url);
141 4
142
			$this->_url['path'] = ($this->_url['path'] ?? '/') . (isset($this->_url['query']) ? '?' . $this->_url['query'] : '');
143
			$this->_response['url'] = $this->_url['scheme'] . '://' . $this->_url['host'] . $this->_url['path'];
144
		}
145 4
146 2
		// Build out the options for our context stream
147
		$this->_options = [
148 4
			'ssl' => [
149 4
				'verify_peer' => false,
150
				'verify_peername' => false
151 4
			],
152 4
			'http' =>
153
				[
154 4
					'method' => 'GET',
155 4
					'max_redirects' => $this->_max_redirect,
156 4
					'ignore_errors' => true,
157
					'protocol_version' => 1.1,
158
					'follow_location' => 1,
159
					'timeout' => 10,
160
					'header' => [
161
						'Connection: ' . ($this->_keep_alive ? 'Keep-Alive' : 'close'),
162 4
						'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931',
163
						'Content-Type: application/x-www-form-urlencoded',
164
					],
165
				]
166
		];
167
168 4
		// Try to limit the body of the response?
169
		if (!empty($this->_user_options['max_length']))
170 2
		{
171 2
			$this->_content_length = (int) $this->_user_options['max_length'];
172 2
			$this->_options['http']['header'][] = 'Range: bytes=0-' . ($this->_content_length - 1);
173
		}
174 4
175
		if (!empty($this->_post_data))
176
		{
177
			$this->_options['http']['method'] = 'POST';
178
			$this->_options['http']['header'][] = 'Content-Length: ' . strlen($this->_post_data);
0 ignored issues
show
It seems like $this->_post_data can also be of type string[]; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

178
			$this->_options['http']['header'][] = 'Content-Length: ' . strlen(/** @scrutinizer ignore-type */ $this->_post_data);
Loading history...
179
			$this->_options['http']['content'] = $this->_post_data;
180
		}
181 4
	}
182
183
	/**
184
	 * Connect to the host/port with the steam options defined
185 4
	 *
186 4
	 * @return bool
187
	 */
188
	private function _makeRequest()
189
	{
190
		try
191
		{
192
			$context = stream_context_create($this->_options);
193
			$this->_fp = fopen($this->_response['url'], 'rb', false, $context);
194
		}
195 4
		catch (Exception $exception)
196
		{
197
			$this->_response['error'] = $exception->getMessage();
198
199
			return false;
200
		}
201 4
202
		return is_resource($this->_fp);
203
	}
204 4
205 4
	/**
206 4
	 * Fetch the headers and parse the meta data into the results we need
207 4
	 */
208
	private function _parseRequest()
209
	{
210 4
		// header information as well as meta data
211
		$headers = stream_get_meta_data($this->_fp);
212
		$this->_response['headers'] = array();
213 4
		$this->_response['redirects'] = 0;
214
		$this->_response['code'] = '???';
215
216 4
		// Loop and process the headers
217 4
		foreach ($headers['wrapper_data'] as $header)
218
		{
219
			// Create the final header array
220 4
			$temp = explode(':', $header, 2);
221
222 2
			// Normalize / clean
223
			$name = isset($temp[0]) ? strtolower($temp[0]) : '';
224
			$value = isset($temp[1]) ? trim($temp[1]) : null;
225
226 4
			// How many redirects
227
			if ($name === 'location')
228 4
			{
229
				$this->_response['redirects']++;
230
			}
231 4
232
			// Server response is mixed in with the real headers
233 4
			if ($value === null)
234
			{
235 4
				$this->_response['headers']['status'] = $name;
236
			}
237
			// If its already there overwrite with the new value, unless its a cookie
238 4
			elseif (isset($this->_response['headers'][$name]) && $name === 'set-cookie')
239
			{
240
				if (is_string($this->_response['headers'][$name]))
241
				{
242 4
					$this->_response['headers'][$name] = array($this->_response['headers'][$name]);
243
				}
244
245 4
				$this->_response['headers'][$name][] = $value;
246
			}
247
			else
248
			{
249
				$this->_response['headers'][$name] = $value;
250 4
			}
251
		}
252
	}
253 4
254
	/**
255
	 * Read the response up to the end of the headers
256 2
	 */
257
	private function _processHeaders()
258
	{
259
		// Were we redirected, if so lets find out where
260 4
		if (!empty($this->_response['headers']['location']))
261
		{
262
			// update $url with where we were ultimately redirected to
263 4
			$this->_response['url'] = $this->_response['headers']['location'];
264
		}
265
266
		// What about our status code?
267 4
		if (!empty($this->_response['headers']['status']))
268
		{
269 4
			// Update with last status code found, its for this final navigated point
270 4
			$this->_response['code'] = substr($this->_response['headers']['status'], 9, 3);
271
		}
272 4
273
		// Provide a common "valid" return code to the caller
274
		if (in_array($this->_response['code'], array(200, 201, 206)))
275
		{
276
			$this->_response['code_orig'] = $this->_response['code'];
277 4
			$this->_response['code'] = 200;
278
		}
279
	}
280 4
281
	/**
282
	 * Fetch the body for the selected site.
283
	 */
284
	private function _fetchData()
285
	{
286 4
		// Get the contents of the url
287
		if (!empty($this->_content_length))
288
		{
289 4
			$this->_response['body'] = stream_get_contents($this->_fp, $this->_content_length);
290
		}
291 4
		else
292
		{
293 4
			$this->_response['body'] = stream_get_contents($this->_fp);
294
		}
295
296
		fclose($this->_fp);
297
298
		$this->_response['size'] = strlen($this->_response['body']);
299
300
		return $this->_response['body'];
301
	}
302
303
	/**
304
	 * Used to return the results to the calling program
305
	 *
306
	 * What it does:
307
	 *
308 4
	 * - Called as ->result() will return the full final array
309
	 * - Called as ->result('body') to just return the page source of the result
310
	 *
311 4
	 * @param string $area used to return an area such as body, header, error
312
	 *
313
	 * @return string|string[]
314
	 */
315
	public function result($area = '')
316
	{
317 4
		// Just return a specified area or the entire result?
318
		if ($area === '')
319
		{
320
			return $this->_response;
321
		}
322
323
		return $this->_response[$area] ?? $this->_response;
324
	}
325
}