curl_fetch_web_data::curl_request()   B
last analyzed

Complexity

Conditions 9
Paths 33

Size

Total Lines 47
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 28
nop 2
dl 0
loc 47
rs 8.0555
c 0
b 0
f 0
nc 33
1
<?php
2
/**
3
 * Simple Machines Forum (SMF)
4
 *
5
 * @package SMF
6
 * @author Simple Machines http://www.simplemachines.org
7
 * @copyright 2019 Simple Machines and individual contributors
8
 * @license http://www.simplemachines.org/about/smf/license.php BSD
9
 *
10
 * @version 2.1 RC2
11
 */
12
13
if (!defined('SMF'))
14
	die('No direct access...');
15
16
/**
17
 * Class curl_fetch_web_data
18
 * Simple cURL class to fetch a web page
19
 * Properly redirects even with safe mode and basedir restrictions
20
 * Can provide simple post options to a page
21
 *
22
 * Load class
23
 * Initiate as
24
 *  - $fetch_data = new cURL_fetch_web_data();
25
 *	- optionally pass an array of cURL options and redirect count
26
 *	- cURL_fetch_web_data(cURL options array, Max redirects);
27
 *  - $fetch_data = new cURL_fetch_web_data(array(CURLOPT_SSL_VERIFYPEER => 1), 5);
28
 *
29
 * Make the call
30
 *  - $fetch_data('https://www.simplemachines.org'); // fetch a page
31
 *  - $fetch_data('https://www.simplemachines.org', array('user' => 'name', 'password' => 'password')); // post to a page
32
 *  - $fetch_data('https://www.simplemachines.org', parameter1&parameter2&parameter3); // post to a page
33
 *
34
 * Get the data
35
 *  - $fetch_data->result('body'); // just the page content
36
 *  - $fetch_data->result(); // an array of results, body, header, http result codes
37
 *  - $fetch_data->result_raw(); // show all results of all calls (in the event of a redirect)
38
 *  - $fetch_data->result_raw(0); // show all results of call x
39
 */
40
class curl_fetch_web_data
41
{
42
	/**
43
	 * Set the default items for this class
44
	 *
45
	 * @var array $default_options
46
	 */
47
	private $default_options = array(
48
		CURLOPT_RETURNTRANSFER	=> 1, // Get returned value as a string (don't output it)
49
		CURLOPT_HEADER			=> 1, // We need the headers to do our own redirect
50
		CURLOPT_FOLLOWLOCATION	=> 0, // Don't follow, we will do it ourselves so safe mode and open_basedir will dig it
51
		CURLOPT_USERAGENT		=> 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko Firefox/11.0', // set a normal looking useragent
52
		CURLOPT_CONNECTTIMEOUT	=> 15, // Don't wait forever on a connection
53
		CURLOPT_TIMEOUT			=> 90, // A page should load in this amount of time
54
		CURLOPT_MAXREDIRS		=> 5, // stop after this many redirects
55
		CURLOPT_ENCODING		=> 'gzip,deflate', // accept gzip and decode it
56
		CURLOPT_SSL_VERIFYPEER	=> 0, // stop cURL from verifying the peer's certificate
57
		CURLOPT_SSL_VERIFYHOST	=> 0, // stop cURL from verifying the peer's host
58
		CURLOPT_POST			=> 0, // no post data unless its passed
59
	);
60
61
	/**
62
	 * @var int Maximum number of redirects
63
	 */
64
	public $max_redirect;
65
66
	/**
67
	 * @var array An array of cURL options
68
	 */
69
	public $user_options = array();
70
71
	/**
72
	 * @var string Any post data as form name => value
73
	 */
74
	public $post_data;
75
76
	/**
77
	 * @var array An array of cURL options
78
	 */
79
	public $options;
80
81
	/**
82
	 * @var int ???
83
	 */
84
	public $current_redirect;
85
86
	/**
87
	 * @var array Stores responses (url, code, error, headers, body) in the response array
88
	 */
89
	public $response = array();
90
91
	/**
92
	 * @var string The header
93
	 */
94
	public $headers;
95
96
	/**
97
	 * Start the curl object
98
	 * - allow for user override values
99
	 *
100
	 * @param array $options An array of cURL options
101
	 * @param int $max_redirect Maximum number of redirects
102
	 */
103
	public function __construct($options = array(), $max_redirect = 3)
104
	{
105
		// Initialize class variables
106
		$this->max_redirect = intval($max_redirect);
107
		$this->user_options = $options;
108
	}
109
110
	/**
111
	 * Main calling function,
112
	 *  - will request the page data from a given $url
113
	 *  - optionally will post data to the page form if post data is supplied
114
	 *  - passed arrays will be converted to a post string joined with &'s
115
	 *  - calls set_options to set the curl opts array values based on the defaults and user input
116
	 *
117
	 * @param string $url the site we are going to fetch
118
	 * @param array $post_data any post data as form name => value
119
	 * @return object An instance of the curl_fetch_web_data class
120
	 */
121
	public function get_url_data($url, $post_data = array())
122
	{
123
		// POSTing some data perhaps?
124
		if (!empty($post_data) && is_array($post_data))
125
			$this->post_data = $this->build_post_data($post_data);
126
		elseif (!empty($post_data))
127
			$this->post_data = trim($post_data);
128
129
		// set the options and get it
130
		$this->set_options();
131
		$this->curl_request(str_replace(' ', '%20', $url));
132
133
		return $this;
134
	}
135
136
	/**
137
	 * Makes the actual cURL call
138
	 *  - stores responses (url, code, error, headers, body) in the response array
139
	 *  - detects 301, 302, 307 codes and will redirect to the given response header location
140
	 *
141
	 * @param string $url The site to fetch
142
	 * @param bool $redirect Whether or not this was a redirect request
143
	 * @return void|bool Sets various properties of the class or returns false if the URL isn't specified
144
	 */
145
	private function curl_request($url, $redirect = false)
146
	{
147
		// we do have a url I hope
148
		if ($url == '')
149
			return false;
150
		else
151
			$this->options[CURLOPT_URL] = $url;
152
153
		// if we have not already been redirected, set it up so we can if needed
154
		if (!$redirect)
155
		{
156
			$this->current_redirect = 1;
157
			$this->response = array();
158
		}
159
160
		// Initialize the curl object and make the call
161
		$cr = curl_init();
162
		curl_setopt_array($cr, $this->options);
0 ignored issues
show
Bug introduced by
It seems like $cr can also be of type false; however, parameter $ch of curl_setopt_array() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

162
		curl_setopt_array(/** @scrutinizer ignore-type */ $cr, $this->options);
Loading history...
163
		curl_exec($cr);
0 ignored issues
show
Bug introduced by
It seems like $cr can also be of type false; however, parameter $ch of curl_exec() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

163
		curl_exec(/** @scrutinizer ignore-type */ $cr);
Loading history...
164
165
		// Get what was returned
166
		$curl_info = curl_getinfo($cr);
0 ignored issues
show
Bug introduced by
It seems like $cr can also be of type false; however, parameter $ch of curl_getinfo() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

166
		$curl_info = curl_getinfo(/** @scrutinizer ignore-type */ $cr);
Loading history...
167
		$curl_content = curl_multi_getcontent($cr);
0 ignored issues
show
Bug introduced by
It seems like $cr can also be of type false; however, parameter $ch of curl_multi_getcontent() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

167
		$curl_content = curl_multi_getcontent(/** @scrutinizer ignore-type */ $cr);
Loading history...
168
		$url = $curl_info['url']; // Last effective URL
169
		$http_code = $curl_info['http_code']; // Last HTTP code
170
		$body = (!curl_error($cr)) ? substr($curl_content, $curl_info['header_size']) : false;
0 ignored issues
show
Bug introduced by
It seems like $cr can also be of type false; however, parameter $ch of curl_error() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

170
		$body = (!curl_error(/** @scrutinizer ignore-type */ $cr)) ? substr($curl_content, $curl_info['header_size']) : false;
Loading history...
171
		$error = (curl_error($cr)) ? curl_error($cr) : false;
172
173
		// close this request
174
		curl_close($cr);
0 ignored issues
show
Bug introduced by
It seems like $cr can also be of type false; however, parameter $ch of curl_close() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

174
		curl_close(/** @scrutinizer ignore-type */ $cr);
Loading history...
175
176
		// store this 'loops' data, someone may want all of these :O
177
		$this->response[] = array(
178
			'url' => $url,
179
			'code' => $http_code,
180
			'error' => $error,
181
			'headers' => isset($this->headers) ? $this->headers : false,
182
			'body' => $body,
183
			'size' => $curl_info['download_content_length'],
184
		);
185
186
		// If this a redirect with a location header and we have not given up, then do it again
187
		if (preg_match('~30[127]~i', $http_code) === 1 && $this->headers['location'] != '' && $this->current_redirect <= $this->max_redirect)
188
		{
189
			$this->current_redirect++;
190
			$header_location = $this->get_redirect_url($url, $this->headers['location']);
191
			$this->redirect($header_location, $url);
192
		}
193
	}
194
195
	/**
196
	 * Used if being redirected to ensure we have a fully qualified address
197
	 *
198
	 * @param string $last_url The URL we went to
199
	 * @param string $new_url The URL we were redirected to
200
	 * @return string The new URL that was in the HTTP header
201
	 */
202
	private function get_redirect_url($last_url = '', $new_url = '')
203
	{
204
		// Get the elements for these urls
205
		$last_url_parse = parse_url($last_url);
206
		$new_url_parse = parse_url($new_url);
207
208
		// redirect headers are often incomplete or relative so we need to make sure they are fully qualified
209
		$new_url_parse['scheme'] = isset($new_url_parse['scheme']) ? $new_url_parse['scheme'] : $last_url_parse['scheme'];
210
		$new_url_parse['host'] = isset($new_url_parse['host']) ? $new_url_parse['host'] : $last_url_parse['host'];
211
		$new_url_parse['path'] = isset($new_url_parse['path']) ? $new_url_parse['path'] : $last_url_parse['path'];
212
		$new_url_parse['query'] = isset($new_url_parse['query']) ? $new_url_parse['query'] : '';
213
214
		// Build the new URL that was in the http header
215
		return $new_url_parse['scheme'] . '://' . $new_url_parse['host'] . $new_url_parse['path'] . (!empty($new_url_parse['query']) ? '?' . $new_url_parse['query'] : '');
216
	}
217
218
	/**
219
	 * Used to return the results to the calling program
220
	 *  - called as ->result() will return the full final array
221
	 *  - called as ->result('body') to just return the page source of the result
222
	 *
223
	 * @param string $area Used to return an area such as body, header, error
224
	 * @return string The response
225
	 */
226
	public function result($area = '')
227
	{
228
		$max_result = count($this->response) - 1;
229
230
		// just return a specifed area or the entire result?
231
		if ($area == '')
232
			return $this->response[$max_result];
233
		else
234
			return isset($this->response[$max_result][$area]) ? $this->response[$max_result][$area] : $this->response[$max_result];
235
	}
236
237
	/**
238
	 * Will return all results from all loops (redirects)
239
	 *  - Can be called as ->result_raw(x) where x is a specific loop results.
240
	 *  - Call as ->result_raw() for everything.
241
	 *
242
	 * @param string $response_number Which response we want to get
243
	 * @return array|string The entire response array or just the specified response
244
	 */
245
	public function result_raw($response_number = '')
246
	{
247
		if (!is_numeric($response_number))
248
			return $this->response;
249
		else
250
		{
251
			$response_number = min($response_number, count($this->response) - 1);
252
			return $this->response[$response_number];
253
		}
254
	}
255
256
	/**
257
	 * Takes supplied POST data and url encodes it
258
	 *  - forms the date (for post) in to a string var=xyz&var2=abc&var3=123
259
	 *  - drops vars with @ since we don't support sending files (uploading)
260
	 *
261
	 * @param array|string $post_data The raw POST data
262
	 * @return string A string of post data
263
	 */
264
	private function build_post_data($post_data)
265
	{
266
		if (is_array($post_data))
267
		{
268
			$postvars = array();
269
270
			// build the post data, drop ones with leading @'s since those can be used to send files, we don't support that.
271
			foreach ($post_data as $name => $value)
272
				$postvars[] = $name . '=' . urlencode($value[0] == '@' ? '' : $value);
273
274
			return implode('&', $postvars);
275
		}
276
		else
277
			return $post_data;
278
	}
279
280
	/**
281
	 * Sets the final cURL options for the current call
282
	 *  - overwrites our default values with user supplied ones or appends new user ones to what we have
283
	 *  - sets the callback function now that $this is existing
284
	 *
285
	 * @return void
286
	 */
287
	private function set_options()
288
	{
289
		// Callback to parse the returned headers, if any
290
		$this->default_options[CURLOPT_HEADERFUNCTION] = array($this, 'header_callback');
291
292
		// Any user options to account for
293
		if (is_array($this->user_options))
0 ignored issues
show
introduced by
The condition is_array($this->user_options) is always true.
Loading history...
294
		{
295
			$keys = array_merge(array_keys($this->default_options), array_keys($this->user_options));
296
			$vals = array_merge($this->default_options, $this->user_options);
297
			$this->options = array_combine($keys, $vals);
0 ignored issues
show
Documentation Bug introduced by
It seems like array_combine($keys, $vals) can also be of type false. However, the property $options is declared as type array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
298
		}
299
		else
300
			$this->options = $this->default_options;
301
302
		// POST data options, here we don't allow any overide
303
		if (isset($this->post_data))
304
		{
305
			$this->options[CURLOPT_POST] = 1;
306
			$this->options[CURLOPT_POSTFIELDS] = $this->post_data;
307
		}
308
	}
309
310
	/**
311
	 * Called to initiate a redirect from a 301, 302 or 307 header
312
	 *  - resets the cURL options for the loop, sets the referrer flag
313
	 *
314
	 * @param string $target_url The URL we want to redirect to
315
	 * @param string $referer_url The URL that we're redirecting from
316
	 */
317
	private function redirect($target_url, $referer_url)
318
	{
319
		// no no I last saw that over there ... really, 301, 302, 307
320
		$this->set_options();
321
		$this->options[CURLOPT_REFERER] = $referer_url;
322
		$this->curl_request($target_url, true);
323
	}
324
325
	/**
326
	 * Callback function to parse returned headers
327
	 *  - lowercases everything to make it consistent
328
	 *
329
	 * @param type $cr Not sure what this is used for?
330
	 * @param string $header The header
331
	 * @return int The length of the header
332
	 */
333
	private function header_callback($cr, $header)
334
	{
335
		$_header = trim($header);
336
		$temp = explode(': ', $_header, 2);
337
338
		// set proper headers only
339
		if (isset($temp[0]) && isset($temp[1]))
340
			$this->headers[strtolower($temp[0])] = strtolower(trim($temp[1]));
341
342
		// return the length of what was passed unless you want a Failed writing header error ;)
343
		return strlen($header);
344
	}
345
}
346
347
?>