1 | <?php |
||
2 | /** |
||
3 | * Simple Machines Forum (SMF) |
||
4 | * |
||
5 | * @package SMF |
||
6 | * @author Simple Machines https://www.simplemachines.org |
||
7 | * @copyright 2022 Simple Machines and individual contributors |
||
8 | * @license https://www.simplemachines.org/about/smf/license.php BSD |
||
9 | * |
||
10 | * @version 2.1.0 |
||
11 | */ |
||
12 | |||
13 | if (!defined('SMF')) |
||
14 | die('No direct access...'); |
||
15 | |||
16 | /** |
||
17 | * Class curl_fetch_web_data |
||
18 | * Simple cURL class to fetch a web page |
||
19 | * Properly redirects even with safe mode and basedir restrictions |
||
20 | * Can provide simple post options to a page |
||
21 | * |
||
22 | * ### Load class |
||
23 | * Initiate as |
||
24 | * ``` |
||
25 | * $fetch_data = new cURL_fetch_web_data(); |
||
26 | * ``` |
||
27 | * Optionally pass an array of cURL options and redirect count |
||
28 | * ``` |
||
29 | * $fetch_data = new cURL_fetch_web_data(array(CURLOPT_SSL_VERIFYPEER => 1), 5); |
||
30 | * ``` |
||
31 | * |
||
32 | * ### Make the call |
||
33 | * Fetch a page |
||
34 | * ``` |
||
35 | * $fetch_data->get_url_data('https://www.simplemachines.org'); |
||
36 | * ``` |
||
37 | * Post to a page providing an array |
||
38 | * ``` |
||
39 | * $fetch_data->get_url_data('https://www.simplemachines.org', array('user' => 'name', 'password' => 'password')); |
||
40 | * ``` |
||
41 | * Post to a page providing a string |
||
42 | * ``` |
||
43 | * $fetch_data->get_url_data('https://www.simplemachines.org', parameter1¶meter2¶meter3); |
||
44 | * ``` |
||
45 | * |
||
46 | * ### Get the data |
||
47 | * Just the page content |
||
48 | * ``` |
||
49 | * $fetch_data->result('body'); |
||
50 | * ``` |
||
51 | * An array of results, body, header, http result codes |
||
52 | * ``` |
||
53 | * $fetch_data->result(); |
||
54 | * ``` |
||
55 | * Show all results of all calls (in the event of a redirect) |
||
56 | * ``` |
||
57 | * $fetch_data->result_raw(); |
||
58 | * ``` |
||
59 | * Show the results of a specific call (in the event of a redirect) |
||
60 | * ``` |
||
61 | * $fetch_data->result_raw(0); |
||
62 | * ``` |
||
63 | */ |
||
64 | class curl_fetch_web_data |
||
65 | { |
||
66 | /** |
||
67 | * Set the default items for this class |
||
68 | * |
||
69 | * @var array $default_options |
||
70 | */ |
||
71 | private $default_options = array( |
||
72 | CURLOPT_RETURNTRANSFER => 1, // Get returned value as a string (don't output it) |
||
73 | CURLOPT_HEADER => 1, // We need the headers to do our own redirect |
||
74 | CURLOPT_FOLLOWLOCATION => 0, // Don't follow, we will do it ourselves so safe mode and open_basedir will dig it |
||
75 | CURLOPT_USERAGENT => SMF_USER_AGENT, // set a normal looking useragent |
||
76 | CURLOPT_CONNECTTIMEOUT => 15, // Don't wait forever on a connection |
||
77 | CURLOPT_TIMEOUT => 90, // A page should load in this amount of time |
||
78 | CURLOPT_MAXREDIRS => 5, // stop after this many redirects |
||
79 | CURLOPT_ENCODING => 'gzip,deflate', // accept gzip and decode it |
||
80 | CURLOPT_SSL_VERIFYPEER => 0, // stop cURL from verifying the peer's certificate |
||
81 | CURLOPT_SSL_VERIFYHOST => 0, // stop cURL from verifying the peer's host |
||
82 | CURLOPT_POST => 0, // no post data unless its passed |
||
83 | ); |
||
84 | |||
85 | /** |
||
86 | * @var int Maximum number of redirects |
||
87 | */ |
||
88 | public $max_redirect; |
||
89 | |||
90 | /** |
||
91 | * @var array An array of cURL options |
||
92 | */ |
||
93 | public $user_options = array(); |
||
94 | |||
95 | /** |
||
96 | * @var string Any post data as form name => value |
||
97 | */ |
||
98 | public $post_data; |
||
99 | |||
100 | /** |
||
101 | * @var array An array of cURL options |
||
102 | */ |
||
103 | public $options; |
||
104 | |||
105 | /** |
||
106 | * @var int ??? |
||
107 | */ |
||
108 | public $current_redirect; |
||
109 | |||
110 | /** |
||
111 | * @var array Stores responses (url, code, error, headers, body) in the response array |
||
112 | */ |
||
113 | public $response = array(); |
||
114 | |||
115 | /** |
||
116 | * @var string The header |
||
117 | */ |
||
118 | public $headers; |
||
119 | |||
120 | /** |
||
121 | * Start the curl object |
||
122 | * - allow for user override values |
||
123 | * |
||
124 | * @param array $options An array of cURL options |
||
125 | * @param int $max_redirect Maximum number of redirects |
||
126 | */ |
||
127 | public function __construct($options = array(), $max_redirect = 3) |
||
128 | { |
||
129 | // Initialize class variables |
||
130 | $this->max_redirect = intval($max_redirect); |
||
131 | $this->user_options = $options; |
||
132 | } |
||
133 | |||
134 | /** |
||
135 | * Main calling function, |
||
136 | * - will request the page data from a given $url |
||
137 | * - optionally will post data to the page form if post data is supplied |
||
138 | * - passed arrays will be converted to a post string joined with &'s |
||
139 | * - calls set_options to set the curl opts array values based on the defaults and user input |
||
140 | * |
||
141 | * @param string $url the site we are going to fetch |
||
142 | * @param array $post_data any post data as form name => value |
||
143 | * @return object An instance of the curl_fetch_web_data class |
||
144 | */ |
||
145 | public function get_url_data($url, $post_data = array()) |
||
146 | { |
||
147 | // POSTing some data perhaps? |
||
148 | if (!empty($post_data) && is_array($post_data)) |
||
149 | $this->post_data = $this->build_post_data($post_data); |
||
150 | elseif (!empty($post_data)) |
||
151 | $this->post_data = trim($post_data); |
||
152 | |||
153 | // set the options and get it |
||
154 | $this->set_options(); |
||
155 | $this->curl_request(str_replace(' ', '%20', $url)); |
||
156 | |||
157 | return $this; |
||
158 | } |
||
159 | |||
160 | /** |
||
161 | * Makes the actual cURL call |
||
162 | * - stores responses (url, code, error, headers, body) in the response array |
||
163 | * - detects 301, 302, 307 codes and will redirect to the given response header location |
||
164 | * |
||
165 | * @param string $url The site to fetch |
||
166 | * @param bool $redirect Whether or not this was a redirect request |
||
167 | * @return void|bool Sets various properties of the class or returns false if the URL isn't specified |
||
168 | */ |
||
169 | private function curl_request($url, $redirect = false) |
||
170 | { |
||
171 | // we do have a url I hope |
||
172 | if ($url == '') |
||
173 | return false; |
||
174 | else |
||
175 | $this->options[CURLOPT_URL] = $url; |
||
176 | |||
177 | // if we have not already been redirected, set it up so we can if needed |
||
178 | if (!$redirect) |
||
179 | { |
||
180 | $this->current_redirect = 1; |
||
181 | $this->response = array(); |
||
182 | } |
||
183 | |||
184 | // Initialize the curl object and make the call |
||
185 | $cr = curl_init(); |
||
186 | curl_setopt_array($cr, $this->options); |
||
187 | curl_exec($cr); |
||
188 | |||
189 | // Get what was returned |
||
190 | $curl_info = curl_getinfo($cr); |
||
191 | $curl_content = curl_multi_getcontent($cr); |
||
192 | $url = $curl_info['url']; // Last effective URL |
||
193 | $http_code = $curl_info['http_code']; // Last HTTP code |
||
194 | $body = (!curl_error($cr)) ? substr($curl_content, $curl_info['header_size']) : false; |
||
195 | $error = (curl_error($cr)) ? curl_error($cr) : false; |
||
196 | |||
197 | // close this request |
||
198 | curl_close($cr); |
||
199 | |||
200 | // store this 'loops' data, someone may want all of these :O |
||
201 | $this->response[] = array( |
||
202 | 'url' => $url, |
||
203 | 'code' => $http_code, |
||
204 | 'error' => $error, |
||
205 | 'headers' => isset($this->headers) ? $this->headers : false, |
||
206 | 'body' => $body, |
||
207 | 'size' => $curl_info['download_content_length'], |
||
208 | ); |
||
209 | |||
210 | // If this a redirect with a location header and we have not given up, then do it again |
||
211 | if (preg_match('~30[127]~i', $http_code) === 1 && $this->headers['location'] != '' && $this->current_redirect <= $this->max_redirect) |
||
212 | { |
||
213 | $this->current_redirect++; |
||
214 | $header_location = $this->get_redirect_url($url, $this->headers['location']); |
||
215 | $this->redirect($header_location, $url); |
||
216 | } |
||
217 | } |
||
218 | |||
219 | /** |
||
220 | * Used if being redirected to ensure we have a fully qualified address |
||
221 | * |
||
222 | * @param string $last_url The URL we went to |
||
223 | * @param string $new_url The URL we were redirected to |
||
224 | * @return string The new URL that was in the HTTP header |
||
225 | */ |
||
226 | private function get_redirect_url($last_url = '', $new_url = '') |
||
227 | { |
||
228 | // Get the elements for these urls |
||
229 | $last_url_parse = parse_url($last_url); |
||
230 | $new_url_parse = parse_url($new_url); |
||
231 | |||
232 | // redirect headers are often incomplete or relative so we need to make sure they are fully qualified |
||
233 | $new_url_parse['scheme'] = isset($new_url_parse['scheme']) ? $new_url_parse['scheme'] : $last_url_parse['scheme']; |
||
234 | $new_url_parse['host'] = isset($new_url_parse['host']) ? $new_url_parse['host'] : $last_url_parse['host']; |
||
235 | $new_url_parse['path'] = isset($new_url_parse['path']) ? $new_url_parse['path'] : $last_url_parse['path']; |
||
236 | $new_url_parse['query'] = isset($new_url_parse['query']) ? $new_url_parse['query'] : ''; |
||
237 | |||
238 | // Build the new URL that was in the http header |
||
239 | return $new_url_parse['scheme'] . '://' . $new_url_parse['host'] . $new_url_parse['path'] . (!empty($new_url_parse['query']) ? '?' . $new_url_parse['query'] : ''); |
||
240 | } |
||
241 | |||
242 | /** |
||
243 | * Used to return the results to the calling program |
||
244 | * - called as ->result() will return the full final array |
||
245 | * - called as ->result('body') to just return the page source of the result |
||
246 | * |
||
247 | * @param string $area Used to return an area such as body, header, error |
||
248 | * @return string The response |
||
249 | */ |
||
250 | public function result($area = '') |
||
251 | { |
||
252 | $max_result = count($this->response) - 1; |
||
253 | |||
254 | // just return a specifed area or the entire result? |
||
255 | if ($area == '') |
||
256 | return $this->response[$max_result]; |
||
257 | else |
||
258 | return isset($this->response[$max_result][$area]) ? $this->response[$max_result][$area] : $this->response[$max_result]; |
||
259 | } |
||
260 | |||
261 | /** |
||
262 | * Will return all results from all loops (redirects) |
||
263 | * - Can be called as ->result_raw(x) where x is a specific loop results. |
||
264 | * - Call as ->result_raw() for everything. |
||
265 | * |
||
266 | * @param string $response_number Which response we want to get |
||
267 | * @return array|string The entire response array or just the specified response |
||
268 | */ |
||
269 | public function result_raw($response_number = '') |
||
270 | { |
||
271 | if (!is_numeric($response_number)) |
||
272 | return $this->response; |
||
273 | else |
||
274 | { |
||
275 | $response_number = min($response_number, count($this->response) - 1); |
||
276 | return $this->response[$response_number]; |
||
277 | } |
||
278 | } |
||
279 | |||
280 | /** |
||
281 | * Takes supplied POST data and url encodes it |
||
282 | * - forms the date (for post) in to a string var=xyz&var2=abc&var3=123 |
||
283 | * - drops vars with @ since we don't support sending files (uploading) |
||
284 | * |
||
285 | * @param array|string $post_data The raw POST data |
||
286 | * @return string A string of post data |
||
287 | */ |
||
288 | private function build_post_data($post_data) |
||
289 | { |
||
290 | if (is_array($post_data)) |
||
291 | { |
||
292 | $postvars = array(); |
||
293 | |||
294 | // build the post data, drop ones with leading @'s since those can be used to send files, we don't support that. |
||
295 | foreach ($post_data as $name => $value) |
||
296 | $postvars[] = $name . '=' . urlencode($value[0] == '@' ? '' : $value); |
||
297 | |||
298 | return implode('&', $postvars); |
||
299 | } |
||
300 | else |
||
301 | return $post_data; |
||
302 | } |
||
303 | |||
304 | /** |
||
305 | * Sets the final cURL options for the current call |
||
306 | * - overwrites our default values with user supplied ones or appends new user ones to what we have |
||
307 | * - sets the callback function now that $this is existing |
||
308 | * |
||
309 | * @return void |
||
310 | */ |
||
311 | private function set_options() |
||
312 | { |
||
313 | // Callback to parse the returned headers, if any |
||
314 | $this->default_options[CURLOPT_HEADERFUNCTION] = array($this, 'header_callback'); |
||
315 | |||
316 | // Any user options to account for |
||
317 | if (is_array($this->user_options)) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
318 | { |
||
319 | $keys = array_merge(array_keys($this->default_options), array_keys($this->user_options)); |
||
320 | $vals = array_merge($this->default_options, $this->user_options); |
||
321 | $this->options = array_combine($keys, $vals); |
||
322 | } |
||
323 | else |
||
324 | $this->options = $this->default_options; |
||
325 | |||
326 | // POST data options, here we don't allow any overide |
||
327 | if (isset($this->post_data)) |
||
328 | { |
||
329 | $this->options[CURLOPT_POST] = 1; |
||
330 | $this->options[CURLOPT_POSTFIELDS] = $this->post_data; |
||
331 | } |
||
332 | } |
||
333 | |||
334 | /** |
||
335 | * Called to initiate a redirect from a 301, 302 or 307 header |
||
336 | * - resets the cURL options for the loop, sets the referrer flag |
||
337 | * |
||
338 | * @param string $target_url The URL we want to redirect to |
||
339 | * @param string $referer_url The URL that we're redirecting from |
||
340 | */ |
||
341 | private function redirect($target_url, $referer_url) |
||
342 | { |
||
343 | // no no I last saw that over there ... really, 301, 302, 307 |
||
344 | $this->set_options(); |
||
345 | $this->options[CURLOPT_REFERER] = $referer_url; |
||
346 | $this->curl_request($target_url, true); |
||
347 | } |
||
348 | |||
349 | /** |
||
350 | * Callback function to parse returned headers |
||
351 | * - lowercases everything to make it consistent |
||
352 | * |
||
353 | * @param curl_fetch_web_data $cr The curl request |
||
354 | * @param string $header The header |
||
355 | * @return int The length of the header |
||
356 | */ |
||
357 | private function header_callback($cr, $header) |
||
358 | { |
||
359 | $_header = trim($header); |
||
360 | $temp = explode(': ', $_header, 2); |
||
361 | |||
362 | // set proper headers only |
||
363 | if (isset($temp[0]) && isset($temp[1])) |
||
364 | $this->headers[strtolower($temp[0])] = strtolower(trim($temp[1])); |
||
365 | |||
366 | // return the length of what was passed unless you want a Failed writing header error ;) |
||
367 | return strlen($header); |
||
368 | } |
||
369 | } |
||
370 | |||
371 | ?> |