Issues (1061)

Sources/Class-CurlFetchWeb.php (1 issue)

1
<?php
2
/**
3
 * Simple Machines Forum (SMF)
4
 *
5
 * @package SMF
6
 * @author Simple Machines https://www.simplemachines.org
7
 * @copyright 2020 Simple Machines and individual contributors
8
 * @license https://www.simplemachines.org/about/smf/license.php BSD
9
 *
10
 * @version 2.1 RC2
11
 */
12
13
if (!defined('SMF'))
14
	die('No direct access...');
15
16
/**
17
 * Class curl_fetch_web_data
18
 * Simple cURL class to fetch a web page
19
 * Properly redirects even with safe mode and basedir restrictions
20
 * Can provide simple post options to a page
21
 *
22
 * ### Load class
23
 * Initiate as
24
 * ```
25
 * $fetch_data = new cURL_fetch_web_data();
26
 * ```
27
 * Optionally pass an array of cURL options and redirect count
28
 * ```
29
 * $fetch_data = new cURL_fetch_web_data(array(CURLOPT_SSL_VERIFYPEER => 1), 5);
30
 * ```
31
 *
32
 * ### Make the call
33
 * Fetch a page
34
 * ```
35
 * $fetch_data->get_url_data('https://www.simplemachines.org');
36
 * ```
37
 * Post to a page providing an array
38
 * ```
39
 * $fetch_data->get_url_data('https://www.simplemachines.org', array('user' => 'name', 'password' => 'password'));
40
 * ```
41
 * Post to a page providing a string
42
 * ```
43
 * $fetch_data->get_url_data('https://www.simplemachines.org', parameter1&parameter2&parameter3);
44
 * ```
45
 *
46
 * ### Get the data
47
 * Just the page content
48
 * ```
49
 * $fetch_data->result('body');
50
 * ```
51
 * An array of results, body, header, http result codes
52
 * ```
53
 * $fetch_data->result();
54
 * ```
55
 * Show all results of all calls (in the event of a redirect)
56
 * ```
57
 * $fetch_data->result_raw();
58
 * ```
59
 * Show the results of a specific call (in the event of a redirect)
60
 * ```
61
 * $fetch_data->result_raw(0);
62
 * ```
63
 */
64
class curl_fetch_web_data
65
{
66
	/**
67
	 * Set the default items for this class
68
	 *
69
	 * @var array $default_options
70
	 */
71
	private $default_options = array(
72
		CURLOPT_RETURNTRANSFER	=> 1, // Get returned value as a string (don't output it)
73
		CURLOPT_HEADER			=> 1, // We need the headers to do our own redirect
74
		CURLOPT_FOLLOWLOCATION	=> 0, // Don't follow, we will do it ourselves so safe mode and open_basedir will dig it
75
		CURLOPT_USERAGENT		=> SMF_USER_AGENT, // set a normal looking useragent
76
		CURLOPT_CONNECTTIMEOUT	=> 15, // Don't wait forever on a connection
77
		CURLOPT_TIMEOUT			=> 90, // A page should load in this amount of time
78
		CURLOPT_MAXREDIRS		=> 5, // stop after this many redirects
79
		CURLOPT_ENCODING		=> 'gzip,deflate', // accept gzip and decode it
80
		CURLOPT_SSL_VERIFYPEER	=> 0, // stop cURL from verifying the peer's certificate
81
		CURLOPT_SSL_VERIFYHOST	=> 0, // stop cURL from verifying the peer's host
82
		CURLOPT_POST			=> 0, // no post data unless its passed
83
	);
84
85
	/**
86
	 * @var int Maximum number of redirects
87
	 */
88
	public $max_redirect;
89
90
	/**
91
	 * @var array An array of cURL options
92
	 */
93
	public $user_options = array();
94
95
	/**
96
	 * @var string Any post data as form name => value
97
	 */
98
	public $post_data;
99
100
	/**
101
	 * @var array An array of cURL options
102
	 */
103
	public $options;
104
105
	/**
106
	 * @var int ???
107
	 */
108
	public $current_redirect;
109
110
	/**
111
	 * @var array Stores responses (url, code, error, headers, body) in the response array
112
	 */
113
	public $response = array();
114
115
	/**
116
	 * @var string The header
117
	 */
118
	public $headers;
119
120
	/**
121
	 * Start the curl object
122
	 * - allow for user override values
123
	 *
124
	 * @param array $options An array of cURL options
125
	 * @param int $max_redirect Maximum number of redirects
126
	 */
127
	public function __construct($options = array(), $max_redirect = 3)
128
	{
129
		// Initialize class variables
130
		$this->max_redirect = intval($max_redirect);
131
		$this->user_options = $options;
132
	}
133
134
	/**
135
	 * Main calling function,
136
	 *  - will request the page data from a given $url
137
	 *  - optionally will post data to the page form if post data is supplied
138
	 *  - passed arrays will be converted to a post string joined with &'s
139
	 *  - calls set_options to set the curl opts array values based on the defaults and user input
140
	 *
141
	 * @param string $url the site we are going to fetch
142
	 * @param array $post_data any post data as form name => value
143
	 * @return object An instance of the curl_fetch_web_data class
144
	 */
145
	public function get_url_data($url, $post_data = array())
146
	{
147
		// POSTing some data perhaps?
148
		if (!empty($post_data) && is_array($post_data))
149
			$this->post_data = $this->build_post_data($post_data);
150
		elseif (!empty($post_data))
151
			$this->post_data = trim($post_data);
152
153
		// set the options and get it
154
		$this->set_options();
155
		$this->curl_request(str_replace(' ', '%20', $url));
156
157
		return $this;
158
	}
159
160
	/**
161
	 * Makes the actual cURL call
162
	 *  - stores responses (url, code, error, headers, body) in the response array
163
	 *  - detects 301, 302, 307 codes and will redirect to the given response header location
164
	 *
165
	 * @param string $url The site to fetch
166
	 * @param bool $redirect Whether or not this was a redirect request
167
	 * @return void|bool Sets various properties of the class or returns false if the URL isn't specified
168
	 */
169
	private function curl_request($url, $redirect = false)
170
	{
171
		// we do have a url I hope
172
		if ($url == '')
173
			return false;
174
		else
175
			$this->options[CURLOPT_URL] = $url;
176
177
		// if we have not already been redirected, set it up so we can if needed
178
		if (!$redirect)
179
		{
180
			$this->current_redirect = 1;
181
			$this->response = array();
182
		}
183
184
		// Initialize the curl object and make the call
185
		$cr = curl_init();
186
		curl_setopt_array($cr, $this->options);
187
		curl_exec($cr);
188
189
		// Get what was returned
190
		$curl_info = curl_getinfo($cr);
191
		$curl_content = curl_multi_getcontent($cr);
192
		$url = $curl_info['url']; // Last effective URL
193
		$http_code = $curl_info['http_code']; // Last HTTP code
194
		$body = (!curl_error($cr)) ? substr($curl_content, $curl_info['header_size']) : false;
195
		$error = (curl_error($cr)) ? curl_error($cr) : false;
196
197
		// close this request
198
		curl_close($cr);
199
200
		// store this 'loops' data, someone may want all of these :O
201
		$this->response[] = array(
202
			'url' => $url,
203
			'code' => $http_code,
204
			'error' => $error,
205
			'headers' => isset($this->headers) ? $this->headers : false,
206
			'body' => $body,
207
			'size' => $curl_info['download_content_length'],
208
		);
209
210
		// If this a redirect with a location header and we have not given up, then do it again
211
		if (preg_match('~30[127]~i', $http_code) === 1 && $this->headers['location'] != '' && $this->current_redirect <= $this->max_redirect)
212
		{
213
			$this->current_redirect++;
214
			$header_location = $this->get_redirect_url($url, $this->headers['location']);
215
			$this->redirect($header_location, $url);
216
		}
217
	}
218
219
	/**
220
	 * Used if being redirected to ensure we have a fully qualified address
221
	 *
222
	 * @param string $last_url The URL we went to
223
	 * @param string $new_url The URL we were redirected to
224
	 * @return string The new URL that was in the HTTP header
225
	 */
226
	private function get_redirect_url($last_url = '', $new_url = '')
227
	{
228
		// Get the elements for these urls
229
		$last_url_parse = parse_url($last_url);
230
		$new_url_parse = parse_url($new_url);
231
232
		// redirect headers are often incomplete or relative so we need to make sure they are fully qualified
233
		$new_url_parse['scheme'] = isset($new_url_parse['scheme']) ? $new_url_parse['scheme'] : $last_url_parse['scheme'];
234
		$new_url_parse['host'] = isset($new_url_parse['host']) ? $new_url_parse['host'] : $last_url_parse['host'];
235
		$new_url_parse['path'] = isset($new_url_parse['path']) ? $new_url_parse['path'] : $last_url_parse['path'];
236
		$new_url_parse['query'] = isset($new_url_parse['query']) ? $new_url_parse['query'] : '';
237
238
		// Build the new URL that was in the http header
239
		return $new_url_parse['scheme'] . '://' . $new_url_parse['host'] . $new_url_parse['path'] . (!empty($new_url_parse['query']) ? '?' . $new_url_parse['query'] : '');
240
	}
241
242
	/**
243
	 * Used to return the results to the calling program
244
	 *  - called as ->result() will return the full final array
245
	 *  - called as ->result('body') to just return the page source of the result
246
	 *
247
	 * @param string $area Used to return an area such as body, header, error
248
	 * @return string The response
249
	 */
250
	public function result($area = '')
251
	{
252
		$max_result = count($this->response) - 1;
253
254
		// just return a specifed area or the entire result?
255
		if ($area == '')
256
			return $this->response[$max_result];
257
		else
258
			return isset($this->response[$max_result][$area]) ? $this->response[$max_result][$area] : $this->response[$max_result];
259
	}
260
261
	/**
262
	 * Will return all results from all loops (redirects)
263
	 *  - Can be called as ->result_raw(x) where x is a specific loop results.
264
	 *  - Call as ->result_raw() for everything.
265
	 *
266
	 * @param string $response_number Which response we want to get
267
	 * @return array|string The entire response array or just the specified response
268
	 */
269
	public function result_raw($response_number = '')
270
	{
271
		if (!is_numeric($response_number))
272
			return $this->response;
273
		else
274
		{
275
			$response_number = min($response_number, count($this->response) - 1);
276
			return $this->response[$response_number];
277
		}
278
	}
279
280
	/**
281
	 * Takes supplied POST data and url encodes it
282
	 *  - forms the date (for post) in to a string var=xyz&var2=abc&var3=123
283
	 *  - drops vars with @ since we don't support sending files (uploading)
284
	 *
285
	 * @param array|string $post_data The raw POST data
286
	 * @return string A string of post data
287
	 */
288
	private function build_post_data($post_data)
289
	{
290
		if (is_array($post_data))
291
		{
292
			$postvars = array();
293
294
			// build the post data, drop ones with leading @'s since those can be used to send files, we don't support that.
295
			foreach ($post_data as $name => $value)
296
				$postvars[] = $name . '=' . urlencode($value[0] == '@' ? '' : $value);
297
298
			return implode('&', $postvars);
299
		}
300
		else
301
			return $post_data;
302
	}
303
304
	/**
305
	 * Sets the final cURL options for the current call
306
	 *  - overwrites our default values with user supplied ones or appends new user ones to what we have
307
	 *  - sets the callback function now that $this is existing
308
	 *
309
	 * @return void
310
	 */
311
	private function set_options()
312
	{
313
		// Callback to parse the returned headers, if any
314
		$this->default_options[CURLOPT_HEADERFUNCTION] = array($this, 'header_callback');
315
316
		// Any user options to account for
317
		if (is_array($this->user_options))
318
		{
319
			$keys = array_merge(array_keys($this->default_options), array_keys($this->user_options));
320
			$vals = array_merge($this->default_options, $this->user_options);
321
			$this->options = array_combine($keys, $vals);
0 ignored issues
show
Documentation Bug introduced by
It seems like array_combine($keys, $vals) can also be of type false. However, the property $options is declared as type array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
322
		}
323
		else
324
			$this->options = $this->default_options;
325
326
		// POST data options, here we don't allow any overide
327
		if (isset($this->post_data))
328
		{
329
			$this->options[CURLOPT_POST] = 1;
330
			$this->options[CURLOPT_POSTFIELDS] = $this->post_data;
331
		}
332
	}
333
334
	/**
335
	 * Called to initiate a redirect from a 301, 302 or 307 header
336
	 *  - resets the cURL options for the loop, sets the referrer flag
337
	 *
338
	 * @param string $target_url The URL we want to redirect to
339
	 * @param string $referer_url The URL that we're redirecting from
340
	 */
341
	private function redirect($target_url, $referer_url)
342
	{
343
		// no no I last saw that over there ... really, 301, 302, 307
344
		$this->set_options();
345
		$this->options[CURLOPT_REFERER] = $referer_url;
346
		$this->curl_request($target_url, true);
347
	}
348
349
	/**
350
	 * Callback function to parse returned headers
351
	 *  - lowercases everything to make it consistent
352
	 *
353
	 * @param curl_fetch_web_data $cr The curl request
354
	 * @param string $header The header
355
	 * @return int The length of the header
356
	 */
357
	private function header_callback($cr, $header)
358
	{
359
		$_header = trim($header);
360
		$temp = explode(': ', $_header, 2);
361
362
		// set proper headers only
363
		if (isset($temp[0]) && isset($temp[1]))
364
			$this->headers[strtolower($temp[0])] = strtolower(trim($temp[1]));
365
366
		// return the length of what was passed unless you want a Failed writing header error ;)
367
		return strlen($header);
368
	}
369
}
370
371
?>