1 | <?php |
||||
2 | /** |
||||
3 | * Simple Machines Forum (SMF) |
||||
4 | * |
||||
5 | * @package SMF |
||||
6 | * @author Simple Machines https://www.simplemachines.org |
||||
7 | * @copyright 2020 Simple Machines and individual contributors |
||||
8 | * @license https://www.simplemachines.org/about/smf/license.php BSD |
||||
9 | * |
||||
10 | * @version 2.1 RC2 |
||||
11 | */ |
||||
12 | |||||
13 | if (!defined('SMF')) |
||||
14 | die('No direct access...'); |
||||
15 | |||||
16 | /** |
||||
17 | * Class curl_fetch_web_data |
||||
18 | * Simple cURL class to fetch a web page |
||||
19 | * Properly redirects even with safe mode and basedir restrictions |
||||
20 | * Can provide simple post options to a page |
||||
21 | * |
||||
22 | * ### Load class |
||||
23 | * Initiate as |
||||
24 | * ``` |
||||
25 | * $fetch_data = new cURL_fetch_web_data(); |
||||
26 | * ``` |
||||
27 | * Optionally pass an array of cURL options and redirect count |
||||
28 | * ``` |
||||
29 | * $fetch_data = new cURL_fetch_web_data(array(CURLOPT_SSL_VERIFYPEER => 1), 5); |
||||
30 | * ``` |
||||
31 | * |
||||
32 | * ### Make the call |
||||
33 | * Fetch a page |
||||
34 | * ``` |
||||
35 | * $fetch_data->get_url_data('https://www.simplemachines.org'); |
||||
36 | * ``` |
||||
37 | * Post to a page providing an array |
||||
38 | * ``` |
||||
39 | * $fetch_data->get_url_data('https://www.simplemachines.org', array('user' => 'name', 'password' => 'password')); |
||||
40 | * ``` |
||||
41 | * Post to a page providing a string |
||||
42 | * ``` |
||||
43 | * $fetch_data->get_url_data('https://www.simplemachines.org', parameter1¶meter2¶meter3); |
||||
44 | * ``` |
||||
45 | * |
||||
46 | * ### Get the data |
||||
47 | * Just the page content |
||||
48 | * ``` |
||||
49 | * $fetch_data->result('body'); |
||||
50 | * ``` |
||||
51 | * An array of results, body, header, http result codes |
||||
52 | * ``` |
||||
53 | * $fetch_data->result(); |
||||
54 | * ``` |
||||
55 | * Show all results of all calls (in the event of a redirect) |
||||
56 | * ``` |
||||
57 | * $fetch_data->result_raw(); |
||||
58 | * ``` |
||||
59 | * Show the results of a specific call (in the event of a redirect) |
||||
60 | * ``` |
||||
61 | * $fetch_data->result_raw(0); |
||||
62 | * ``` |
||||
63 | */ |
||||
64 | class curl_fetch_web_data |
||||
65 | { |
||||
66 | /** |
||||
67 | * Set the default items for this class |
||||
68 | * |
||||
69 | * @var array $default_options |
||||
70 | */ |
||||
71 | private $default_options = array( |
||||
72 | CURLOPT_RETURNTRANSFER => 1, // Get returned value as a string (don't output it) |
||||
73 | CURLOPT_HEADER => 1, // We need the headers to do our own redirect |
||||
74 | CURLOPT_FOLLOWLOCATION => 0, // Don't follow, we will do it ourselves so safe mode and open_basedir will dig it |
||||
75 | CURLOPT_USERAGENT => SMF_USER_AGENT, // set a normal looking useragent |
||||
76 | CURLOPT_CONNECTTIMEOUT => 15, // Don't wait forever on a connection |
||||
77 | CURLOPT_TIMEOUT => 90, // A page should load in this amount of time |
||||
78 | CURLOPT_MAXREDIRS => 5, // stop after this many redirects |
||||
79 | CURLOPT_ENCODING => 'gzip,deflate', // accept gzip and decode it |
||||
80 | CURLOPT_SSL_VERIFYPEER => 0, // stop cURL from verifying the peer's certificate |
||||
81 | CURLOPT_SSL_VERIFYHOST => 0, // stop cURL from verifying the peer's host |
||||
82 | CURLOPT_POST => 0, // no post data unless its passed |
||||
83 | ); |
||||
84 | |||||
85 | /** |
||||
86 | * @var int Maximum number of redirects |
||||
87 | */ |
||||
88 | public $max_redirect; |
||||
89 | |||||
90 | /** |
||||
91 | * @var array An array of cURL options |
||||
92 | */ |
||||
93 | public $user_options = array(); |
||||
94 | |||||
95 | /** |
||||
96 | * @var string Any post data as form name => value |
||||
97 | */ |
||||
98 | public $post_data; |
||||
99 | |||||
100 | /** |
||||
101 | * @var array An array of cURL options |
||||
102 | */ |
||||
103 | public $options; |
||||
104 | |||||
105 | /** |
||||
106 | * @var int ??? |
||||
107 | */ |
||||
108 | public $current_redirect; |
||||
109 | |||||
110 | /** |
||||
111 | * @var array Stores responses (url, code, error, headers, body) in the response array |
||||
112 | */ |
||||
113 | public $response = array(); |
||||
114 | |||||
115 | /** |
||||
116 | * @var string The header |
||||
117 | */ |
||||
118 | public $headers; |
||||
119 | |||||
120 | /** |
||||
121 | * Start the curl object |
||||
122 | * - allow for user override values |
||||
123 | * |
||||
124 | * @param array $options An array of cURL options |
||||
125 | * @param int $max_redirect Maximum number of redirects |
||||
126 | */ |
||||
127 | public function __construct($options = array(), $max_redirect = 3) |
||||
128 | { |
||||
129 | // Initialize class variables |
||||
130 | $this->max_redirect = intval($max_redirect); |
||||
131 | $this->user_options = $options; |
||||
132 | } |
||||
133 | |||||
134 | /** |
||||
135 | * Main calling function, |
||||
136 | * - will request the page data from a given $url |
||||
137 | * - optionally will post data to the page form if post data is supplied |
||||
138 | * - passed arrays will be converted to a post string joined with &'s |
||||
139 | * - calls set_options to set the curl opts array values based on the defaults and user input |
||||
140 | * |
||||
141 | * @param string $url the site we are going to fetch |
||||
142 | * @param array $post_data any post data as form name => value |
||||
143 | * @return object An instance of the curl_fetch_web_data class |
||||
144 | */ |
||||
145 | public function get_url_data($url, $post_data = array()) |
||||
146 | { |
||||
147 | // POSTing some data perhaps? |
||||
148 | if (!empty($post_data) && is_array($post_data)) |
||||
149 | $this->post_data = $this->build_post_data($post_data); |
||||
150 | elseif (!empty($post_data)) |
||||
151 | $this->post_data = trim($post_data); |
||||
152 | |||||
153 | // set the options and get it |
||||
154 | $this->set_options(); |
||||
155 | $this->curl_request(str_replace(' ', '%20', $url)); |
||||
156 | |||||
157 | return $this; |
||||
158 | } |
||||
159 | |||||
160 | /** |
||||
161 | * Makes the actual cURL call |
||||
162 | * - stores responses (url, code, error, headers, body) in the response array |
||||
163 | * - detects 301, 302, 307 codes and will redirect to the given response header location |
||||
164 | * |
||||
165 | * @param string $url The site to fetch |
||||
166 | * @param bool $redirect Whether or not this was a redirect request |
||||
167 | * @return void|bool Sets various properties of the class or returns false if the URL isn't specified |
||||
168 | */ |
||||
169 | private function curl_request($url, $redirect = false) |
||||
170 | { |
||||
171 | // we do have a url I hope |
||||
172 | if ($url == '') |
||||
173 | return false; |
||||
174 | else |
||||
175 | $this->options[CURLOPT_URL] = $url; |
||||
176 | |||||
177 | // if we have not already been redirected, set it up so we can if needed |
||||
178 | if (!$redirect) |
||||
179 | { |
||||
180 | $this->current_redirect = 1; |
||||
181 | $this->response = array(); |
||||
182 | } |
||||
183 | |||||
184 | // Initialize the curl object and make the call |
||||
185 | $cr = curl_init(); |
||||
186 | curl_setopt_array($cr, $this->options); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
187 | curl_exec($cr); |
||||
0 ignored issues
–
show
It seems like
$cr can also be of type false ; however, parameter $ch of curl_exec() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
188 | |||||
189 | // Get what was returned |
||||
190 | $curl_info = curl_getinfo($cr); |
||||
0 ignored issues
–
show
It seems like
$cr can also be of type false ; however, parameter $ch of curl_getinfo() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
191 | $curl_content = curl_multi_getcontent($cr); |
||||
0 ignored issues
–
show
It seems like
$cr can also be of type false ; however, parameter $ch of curl_multi_getcontent() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
192 | $url = $curl_info['url']; // Last effective URL |
||||
193 | $http_code = $curl_info['http_code']; // Last HTTP code |
||||
194 | $body = (!curl_error($cr)) ? substr($curl_content, $curl_info['header_size']) : false; |
||||
0 ignored issues
–
show
It seems like
$cr can also be of type false ; however, parameter $ch of curl_error() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
195 | $error = (curl_error($cr)) ? curl_error($cr) : false; |
||||
196 | |||||
197 | // close this request |
||||
198 | curl_close($cr); |
||||
0 ignored issues
–
show
It seems like
$cr can also be of type false ; however, parameter $ch of curl_close() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
199 | |||||
200 | // store this 'loops' data, someone may want all of these :O |
||||
201 | $this->response[] = array( |
||||
202 | 'url' => $url, |
||||
203 | 'code' => $http_code, |
||||
204 | 'error' => $error, |
||||
205 | 'headers' => isset($this->headers) ? $this->headers : false, |
||||
206 | 'body' => $body, |
||||
207 | 'size' => $curl_info['download_content_length'], |
||||
208 | ); |
||||
209 | |||||
210 | // If this a redirect with a location header and we have not given up, then do it again |
||||
211 | if (preg_match('~30[127]~i', $http_code) === 1 && $this->headers['location'] != '' && $this->current_redirect <= $this->max_redirect) |
||||
212 | { |
||||
213 | $this->current_redirect++; |
||||
214 | $header_location = $this->get_redirect_url($url, $this->headers['location']); |
||||
215 | $this->redirect($header_location, $url); |
||||
216 | } |
||||
217 | } |
||||
218 | |||||
219 | /** |
||||
220 | * Used if being redirected to ensure we have a fully qualified address |
||||
221 | * |
||||
222 | * @param string $last_url The URL we went to |
||||
223 | * @param string $new_url The URL we were redirected to |
||||
224 | * @return string The new URL that was in the HTTP header |
||||
225 | */ |
||||
226 | private function get_redirect_url($last_url = '', $new_url = '') |
||||
227 | { |
||||
228 | // Get the elements for these urls |
||||
229 | $last_url_parse = parse_url($last_url); |
||||
230 | $new_url_parse = parse_url($new_url); |
||||
231 | |||||
232 | // redirect headers are often incomplete or relative so we need to make sure they are fully qualified |
||||
233 | $new_url_parse['scheme'] = isset($new_url_parse['scheme']) ? $new_url_parse['scheme'] : $last_url_parse['scheme']; |
||||
234 | $new_url_parse['host'] = isset($new_url_parse['host']) ? $new_url_parse['host'] : $last_url_parse['host']; |
||||
235 | $new_url_parse['path'] = isset($new_url_parse['path']) ? $new_url_parse['path'] : $last_url_parse['path']; |
||||
236 | $new_url_parse['query'] = isset($new_url_parse['query']) ? $new_url_parse['query'] : ''; |
||||
237 | |||||
238 | // Build the new URL that was in the http header |
||||
239 | return $new_url_parse['scheme'] . '://' . $new_url_parse['host'] . $new_url_parse['path'] . (!empty($new_url_parse['query']) ? '?' . $new_url_parse['query'] : ''); |
||||
240 | } |
||||
241 | |||||
242 | /** |
||||
243 | * Used to return the results to the calling program |
||||
244 | * - called as ->result() will return the full final array |
||||
245 | * - called as ->result('body') to just return the page source of the result |
||||
246 | * |
||||
247 | * @param string $area Used to return an area such as body, header, error |
||||
248 | * @return string The response |
||||
249 | */ |
||||
250 | public function result($area = '') |
||||
251 | { |
||||
252 | $max_result = count($this->response) - 1; |
||||
253 | |||||
254 | // just return a specifed area or the entire result? |
||||
255 | if ($area == '') |
||||
256 | return $this->response[$max_result]; |
||||
257 | else |
||||
258 | return isset($this->response[$max_result][$area]) ? $this->response[$max_result][$area] : $this->response[$max_result]; |
||||
259 | } |
||||
260 | |||||
261 | /** |
||||
262 | * Will return all results from all loops (redirects) |
||||
263 | * - Can be called as ->result_raw(x) where x is a specific loop results. |
||||
264 | * - Call as ->result_raw() for everything. |
||||
265 | * |
||||
266 | * @param string $response_number Which response we want to get |
||||
267 | * @return array|string The entire response array or just the specified response |
||||
268 | */ |
||||
269 | public function result_raw($response_number = '') |
||||
270 | { |
||||
271 | if (!is_numeric($response_number)) |
||||
272 | return $this->response; |
||||
273 | else |
||||
274 | { |
||||
275 | $response_number = min($response_number, count($this->response) - 1); |
||||
276 | return $this->response[$response_number]; |
||||
277 | } |
||||
278 | } |
||||
279 | |||||
280 | /** |
||||
281 | * Takes supplied POST data and url encodes it |
||||
282 | * - forms the date (for post) in to a string var=xyz&var2=abc&var3=123 |
||||
283 | * - drops vars with @ since we don't support sending files (uploading) |
||||
284 | * |
||||
285 | * @param array|string $post_data The raw POST data |
||||
286 | * @return string A string of post data |
||||
287 | */ |
||||
288 | private function build_post_data($post_data) |
||||
289 | { |
||||
290 | if (is_array($post_data)) |
||||
291 | { |
||||
292 | $postvars = array(); |
||||
293 | |||||
294 | // build the post data, drop ones with leading @'s since those can be used to send files, we don't support that. |
||||
295 | foreach ($post_data as $name => $value) |
||||
296 | $postvars[] = $name . '=' . urlencode($value[0] == '@' ? '' : $value); |
||||
297 | |||||
298 | return implode('&', $postvars); |
||||
299 | } |
||||
300 | else |
||||
301 | return $post_data; |
||||
302 | } |
||||
303 | |||||
304 | /** |
||||
305 | * Sets the final cURL options for the current call |
||||
306 | * - overwrites our default values with user supplied ones or appends new user ones to what we have |
||||
307 | * - sets the callback function now that $this is existing |
||||
308 | * |
||||
309 | * @return void |
||||
310 | */ |
||||
311 | private function set_options() |
||||
312 | { |
||||
313 | // Callback to parse the returned headers, if any |
||||
314 | $this->default_options[CURLOPT_HEADERFUNCTION] = array($this, 'header_callback'); |
||||
315 | |||||
316 | // Any user options to account for |
||||
317 | if (is_array($this->user_options)) |
||||
318 | { |
||||
319 | $keys = array_merge(array_keys($this->default_options), array_keys($this->user_options)); |
||||
320 | $vals = array_merge($this->default_options, $this->user_options); |
||||
321 | $this->options = array_combine($keys, $vals); |
||||
0 ignored issues
–
show
It seems like
array_combine($keys, $vals) can also be of type false . However, the property $options is declared as type array . Maybe add an additional type check?
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly. For example, imagine you have a variable Either this assignment is in error or a type check should be added for that assignment. class Id
{
public $id;
public function __construct($id)
{
$this->id = $id;
}
}
class Account
{
/** @var Id $id */
public $id;
}
$account_id = false;
if (starsAreRight()) {
$account_id = new Id(42);
}
$account = new Account();
if ($account instanceof Id)
{
$account->id = $account_id;
}
![]() |
|||||
322 | } |
||||
323 | else |
||||
324 | $this->options = $this->default_options; |
||||
325 | |||||
326 | // POST data options, here we don't allow any overide |
||||
327 | if (isset($this->post_data)) |
||||
328 | { |
||||
329 | $this->options[CURLOPT_POST] = 1; |
||||
330 | $this->options[CURLOPT_POSTFIELDS] = $this->post_data; |
||||
331 | } |
||||
332 | } |
||||
333 | |||||
334 | /** |
||||
335 | * Called to initiate a redirect from a 301, 302 or 307 header |
||||
336 | * - resets the cURL options for the loop, sets the referrer flag |
||||
337 | * |
||||
338 | * @param string $target_url The URL we want to redirect to |
||||
339 | * @param string $referer_url The URL that we're redirecting from |
||||
340 | */ |
||||
341 | private function redirect($target_url, $referer_url) |
||||
342 | { |
||||
343 | // no no I last saw that over there ... really, 301, 302, 307 |
||||
344 | $this->set_options(); |
||||
345 | $this->options[CURLOPT_REFERER] = $referer_url; |
||||
346 | $this->curl_request($target_url, true); |
||||
347 | } |
||||
348 | |||||
349 | /** |
||||
350 | * Callback function to parse returned headers |
||||
351 | * - lowercases everything to make it consistent |
||||
352 | * |
||||
353 | * @param curl_fetch_web_data $cr The curl request |
||||
354 | * @param string $header The header |
||||
355 | * @return int The length of the header |
||||
356 | */ |
||||
357 | private function header_callback($cr, $header) |
||||
358 | { |
||||
359 | $_header = trim($header); |
||||
360 | $temp = explode(': ', $_header, 2); |
||||
361 | |||||
362 | // set proper headers only |
||||
363 | if (isset($temp[0]) && isset($temp[1])) |
||||
364 | $this->headers[strtolower($temp[0])] = strtolower(trim($temp[1])); |
||||
365 | |||||
366 | // return the length of what was passed unless you want a Failed writing header error ;) |
||||
367 | return strlen($header); |
||||
368 | } |
||||
369 | } |
||||
370 | |||||
371 | ?> |