1 | <?php |
||||
2 | |||||
3 | /** |
||||
4 | * Provides a cURL interface for fetching files and submitting requests to sites |
||||
5 | * |
||||
6 | * @package ElkArte Forum |
||||
7 | * @copyright ElkArte Forum contributors |
||||
8 | * @license BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
||||
9 | * |
||||
10 | * @version 2.0 dev |
||||
11 | * |
||||
12 | */ |
||||
13 | |||||
14 | namespace ElkArte\Http; |
||||
15 | |||||
16 | /** |
||||
17 | * Simple cURL class to fetch a web page |
||||
18 | * Properly redirects even with safe mode and basedir restrictions |
||||
19 | * Can provide simple post options to a page |
||||
20 | * |
||||
21 | * Load class |
||||
22 | * Initiate as |
||||
23 | * - $fetch_data = new CurlFetchWebdata(); |
||||
24 | * - optionally pass an array of cURL options and redirect count |
||||
25 | * - CurlFetchWebdata(cURL options array, Max redirects); |
||||
26 | * - $fetch_data = new CurlFetchWebdata(array(CURLOPT_SSL_VERIFYPEER => 1), 5); |
||||
27 | * |
||||
28 | * Make the call |
||||
29 | * - $fetch_data->get_url_data('http://www.adomain.org'); // fetch a page |
||||
30 | * - $fetch_data->get_url_data('http://www.adomain.org', array('user' => 'name', 'password' => 'password')); // post to a page |
||||
31 | * - $fetch_data->get_url_data('http://www.adomain.org', parameter1¶meter2¶meter3); // post to a page |
||||
32 | * |
||||
33 | * Get the data |
||||
34 | * - $fetch_data->result('body'); // just the page content |
||||
35 | * - $fetch_data->result(); // an array of results, body, header, http result codes |
||||
36 | * - $fetch_data->result_raw(); // show all results of all calls (in the event of a redirect) |
||||
37 | * - $fetch_data->result_raw(x); // show all results of call x |
||||
38 | */ |
||||
39 | class CurlFetchWebdata |
||||
40 | { |
||||
41 | /** |
||||
42 | * Set the default items for this class |
||||
43 | * |
||||
44 | * @var array |
||||
45 | */ |
||||
46 | private $default_options = [ |
||||
47 | CURLOPT_RETURNTRANSFER => true, // Get returned value as a string (don't output it) |
||||
48 | CURLOPT_HEADER => true, // We need the headers to do our own redirect |
||||
49 | CURLOPT_FOLLOWLOCATION => false, // Don't follow, we will do it ourselves so safe mode and open_basedir will dig it |
||||
50 | CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931', // set a normal looking user agent |
||||
51 | CURLOPT_CONNECTTIMEOUT => 10, // Don't wait forever on a connection |
||||
52 | CURLOPT_TIMEOUT => 10, // A page should load in this amount of time |
||||
53 | CURLOPT_MAXREDIRS => 3, // stop after this many redirects |
||||
54 | CURLOPT_ENCODING => 'gzip,deflate', // accept gzip and decode it |
||||
55 | CURLOPT_SSL_VERIFYPEER => false, // stop cURL from verifying the peer's certificate |
||||
56 | CURLOPT_SSL_VERIFYHOST => 0, // stop cURL from verifying the peer's host |
||||
57 | CURLOPT_POST => false, // no post data unless its passed |
||||
58 | CURLOPT_HTTPHEADER => ['Accept-Encoding: gzip,compress,identity'], // no special headers unless supplied |
||||
59 | ]; |
||||
60 | |||||
61 | /** @var int Holds the passed or default value for redirects */ |
||||
62 | private $_max_redirect; |
||||
63 | |||||
64 | /** @var int Holds the current redirect count for the request */ |
||||
65 | private $_current_redirect = 0; |
||||
66 | |||||
67 | /** @var array Holds the passed user options array */ |
||||
68 | private $_user_options; |
||||
69 | |||||
70 | /** @var string Holds any data that will be posted to a form */ |
||||
71 | private $_post_data = ''; |
||||
72 | |||||
73 | /** @var string[] Holds the response to the cURL request, headers, data, code, etc */ |
||||
74 | private $_response = []; |
||||
75 | |||||
76 | /** @var array Holds response headers to the request */ |
||||
77 | private $_headers = []; |
||||
78 | |||||
79 | /** @var array Holds the options for this request */ |
||||
80 | private $_options = []; |
||||
81 | |||||
82 | /** |
||||
83 | * Start the cURL object |
||||
84 | * |
||||
85 | * - Allow for user override values |
||||
86 | * |
||||
87 | * @param array $options cURL options as an array |
||||
88 | * @param int $max_redirect Maximum number of redirects |
||||
89 | */ |
||||
90 | public function __construct($options = [], $max_redirect = 3) |
||||
91 | { |
||||
92 | // Initialize class variables |
||||
93 | $this->_max_redirect = (int) $max_redirect; |
||||
94 | $this->_user_options = $options; |
||||
95 | } |
||||
96 | |||||
97 | /** |
||||
98 | * Main calling function |
||||
99 | * |
||||
100 | * What it does: |
||||
101 | * |
||||
102 | * - Will request the page data from a given $url |
||||
103 | * - Optionally will post data to the page form if post data is supplied |
||||
104 | * - Passed arrays will be converted to a post string joined with &'s |
||||
105 | * - Calls _setOptions to set the curl opts array values based on the defaults and user input |
||||
106 | * |
||||
107 | * @param string $url the site we are going to fetch |
||||
108 | * @param array|string $post_data data to send in the curl request as post data |
||||
109 | * |
||||
110 | * @return CurlFetchWebdata |
||||
111 | */ |
||||
112 | public function get_url_data($url, $post_data = []) |
||||
113 | { |
||||
114 | // POSTing some data perhaps? |
||||
115 | if (!empty($post_data) && is_array($post_data)) |
||||
116 | { |
||||
117 | 8 | $this->_post_data = $this->_buildPostData($post_data); |
|||
118 | } |
||||
119 | elseif (!empty($post_data)) |
||||
120 | 8 | { |
|||
121 | 8 | $this->_post_data = trim($post_data); |
|||
122 | 8 | } |
|||
123 | |||||
124 | // Set the options and get it |
||||
125 | $this->_setOptions(); |
||||
126 | $this->_curlRequest(str_replace(' ', '%20', $url)); |
||||
127 | |||||
128 | return $this; |
||||
129 | } |
||||
130 | |||||
131 | /** |
||||
132 | * Takes supplied POST data and url encodes it |
||||
133 | * |
||||
134 | * What it does: |
||||
135 | * |
||||
136 | * - Forms the date (for post) in to a string var=xyz&var2=abc&var3=123 |
||||
137 | * - Drops vars with @ since we don't support sending files (uploading) |
||||
138 | * |
||||
139 | 8 | * @param array $post_data |
|||
140 | * |
||||
141 | * @return array|string |
||||
142 | 8 | */ |
|||
143 | private function _buildPostData($post_data) |
||||
144 | 2 | { |
|||
145 | if (is_array($post_data)) |
||||
0 ignored issues
–
show
introduced
by
![]() |
|||||
146 | 6 | { |
|||
147 | $post_vars = []; |
||||
148 | |||||
149 | // Build the post data, drop ones with leading @'s since those can be used to send files, |
||||
150 | // we don't support that. |
||||
151 | foreach ($post_data as $name => $value) |
||||
152 | 8 | { |
|||
153 | 8 | $post_vars[] = $name . '=' . urlencode(($value === '' || $value[0] === '@') ? '' : $value); |
|||
154 | } |
||||
155 | 8 | ||||
156 | return implode('&', $post_vars); |
||||
157 | } |
||||
158 | |||||
159 | return $post_data; |
||||
160 | } |
||||
161 | |||||
162 | /** |
||||
163 | * Sets the final cURL options for the current call |
||||
164 | * |
||||
165 | * What it does: |
||||
166 | * |
||||
167 | * - Overwrites our default values with user supplied ones or appends new user ones to what we have |
||||
168 | * - Sets the callback function now that $this exists |
||||
169 | * |
||||
170 | 2 | * @uses _headerCallback() |
|||
171 | */ |
||||
172 | 2 | private function _setOptions() |
|||
173 | { |
||||
174 | 2 | // Callback to parse the returned headers, if any |
|||
175 | $this->default_options[CURLOPT_HEADERFUNCTION] = fn($cr, $header) => $this->_headerCallback($cr, $header); |
||||
176 | |||||
177 | 2 | // Any user options to account for |
|||
178 | if (is_array($this->_user_options)) |
||||
0 ignored issues
–
show
|
|||||
179 | 2 | { |
|||
180 | $keys = array_merge(array_keys($this->default_options), array_keys($this->_user_options)); |
||||
181 | $vals = array_merge($this->default_options, $this->_user_options); |
||||
182 | 2 | $this->_options = array_combine($keys, $vals); |
|||
183 | } |
||||
184 | else |
||||
185 | { |
||||
186 | $this->_options = $this->default_options; |
||||
187 | } |
||||
188 | |||||
189 | // POST data options, here we don't allow any override |
||||
190 | if (!empty($this->_post_data)) |
||||
191 | { |
||||
192 | $this->_options[CURLOPT_POST] = 1; |
||||
193 | $this->_options[CURLOPT_POSTFIELDS] = $this->_post_data; |
||||
194 | } |
||||
195 | } |
||||
196 | |||||
197 | /** |
||||
198 | * Callback function to parse returned headers |
||||
199 | * |
||||
200 | 8 | * What it does: |
|||
201 | * |
||||
202 | * - lowercase everything to make it consistent |
||||
203 | 8 | * |
|||
204 | * @param object $cr Not used but passed by the cURL agent |
||||
205 | 8 | * @param string $header The headers received |
|||
206 | * |
||||
207 | * @return int |
||||
208 | */ |
||||
209 | 8 | private function _headerCallback($cr, $header) |
|||
0 ignored issues
–
show
The parameter
$cr is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||
210 | { |
||||
211 | 8 | $_header = trim($header); |
|||
212 | 8 | $temp = explode(': ', $_header, 2); |
|||
213 | 8 | ||||
214 | // Set proper headers only |
||||
215 | if (isset($temp[0], $temp[1])) |
||||
216 | { |
||||
217 | $this->_headers[strtolower($temp[0])] = trim($temp[1]); |
||||
218 | } |
||||
219 | |||||
220 | // Return the length of what was *passed* unless you want a Failed writing header error ;) |
||||
221 | 8 | return strlen($header); |
|||
222 | } |
||||
223 | 2 | ||||
224 | 2 | /** |
|||
225 | * Makes the actual cURL call |
||||
226 | 8 | * |
|||
227 | * What it does |
||||
228 | * - Store responses (url, code, error, headers, body) in the response array |
||||
229 | * - Detects 301, 302, 307 codes and will redirect to the given response header location |
||||
230 | * |
||||
231 | * @param string $url site to fetch |
||||
232 | * @param bool $redirect flag to indicate if this was a redirect request or not |
||||
233 | * |
||||
234 | * @return bool |
||||
235 | */ |
||||
236 | private function _curlRequest($url, $redirect = false) |
||||
237 | { |
||||
238 | // We do have a url I hope |
||||
239 | if (trim($url) === '') |
||||
240 | 8 | { |
|||
241 | return false; |
||||
242 | 8 | } |
|||
243 | 8 | ||||
244 | $this->_options[CURLOPT_URL] = $url; |
||||
245 | |||||
246 | 8 | // If we have not already been redirected, set it up so we can |
|||
247 | if (!$redirect) |
||||
248 | 8 | { |
|||
249 | $this->_current_redirect = 1; |
||||
250 | $this->_response = []; |
||||
251 | } |
||||
252 | 8 | ||||
253 | // Initialize the curl object and make the call |
||||
254 | $cr = curl_init(); |
||||
255 | curl_setopt_array($cr, $this->_options); |
||||
256 | curl_exec($cr); |
||||
257 | |||||
258 | // Get what was returned |
||||
259 | $curl_info = curl_getinfo($cr); |
||||
260 | $curl_content = curl_multi_getcontent($cr); |
||||
261 | $url = $curl_info['url']; // Last effective URL |
||||
262 | $http_code = $curl_info['http_code']; // Last HTTP code |
||||
263 | $body = (curl_error($cr) === '') ? substr($curl_content, $curl_info['header_size']) : false; |
||||
264 | $error = (curl_error($cr) !== '') ? curl_error($cr) : false; |
||||
265 | |||||
266 | // Close this request |
||||
267 | 8 | curl_close($cr); |
|||
268 | |||||
269 | // Store this 'loops' data, someone may want all of these :O |
||||
270 | 8 | $this->_response[] = [ |
|||
271 | 'url' => $url, |
||||
272 | 'code' => $http_code, |
||||
273 | 'error' => $error, |
||||
274 | 'size' => empty($curl_info['download_content_length']) ? 0 : $curl_info['download_content_length'], |
||||
275 | 'headers' => empty($this->_headers) ? false : $this->_headers, |
||||
276 | 8 | 'body' => $body, |
|||
277 | ]; |
||||
278 | |||||
279 | // If this a redirect with a location header and we have not given up, then we play it again Sam |
||||
280 | 8 | if (!empty($this->_headers['location']) && $this->_current_redirect <= $this->_max_redirect && preg_match('~30[127]~', $http_code) === 1) |
|||
281 | { |
||||
282 | 8 | $this->_current_redirect++; |
|||
283 | 8 | $header_location = $this->_getRedirectURL($url, $this->_headers['location']); |
|||
284 | $this->_redirect($header_location, $url); |
||||
285 | } |
||||
286 | |||||
287 | 8 | return true; |
|||
288 | 8 | } |
|||
289 | 8 | ||||
290 | /** |
||||
291 | * Used if being redirected to ensure we have a fully qualified address |
||||
292 | 8 | * |
|||
293 | 8 | * - Returns the new url location for the redirect |
|||
294 | 8 | * |
|||
295 | 8 | * @param string $last_url URL where we went to |
|||
296 | 8 | * @param string $new_url URL where we were redirected to |
|||
297 | 8 | * |
|||
298 | * @return string |
||||
299 | */ |
||||
300 | 8 | private function _getRedirectURL($last_url = '', $new_url = '') |
|||
301 | { |
||||
302 | // Get the elements for these urls |
||||
303 | 8 | $last_url_parse = parse_url($last_url); |
|||
304 | 8 | $new_url_parse = parse_url($new_url); |
|||
305 | 8 | ||||
306 | 8 | // Redirect headers are often incomplete / relative so we need to make sure they are fully qualified |
|||
307 | 8 | $new_url_parse['path'] = $new_url_parse['path'] ?? (isset($new_url_parse['host']) ? '' : $last_url_parse['path']); |
|||
308 | 8 | $new_url_parse['scheme'] = $new_url_parse['scheme'] ?? $last_url_parse['scheme']; |
|||
309 | 8 | $new_url_parse['host'] = $new_url_parse['host'] ?? $last_url_parse['host']; |
|||
310 | $new_url_parse['query'] = $new_url_parse['query'] ?? ''; |
||||
311 | |||||
312 | // Build the new URL that was in the http header |
||||
313 | 8 | return $new_url_parse['scheme'] . '://' . $new_url_parse['host'] . $new_url_parse['path'] . (empty($new_url_parse['query']) ? '' : '?' . $new_url_parse['query']); |
|||
314 | } |
||||
315 | 2 | ||||
316 | 2 | /** |
|||
317 | 2 | * Called to initiate a redirect from a 301, 302 or 307 header |
|||
318 | * |
||||
319 | * What it does |
||||
320 | 8 | * - Resets the cURL options for the loop, sets the referrer flag |
|||
321 | * |
||||
322 | * @param string $target_url The URL of the target |
||||
323 | * @param string $referer_url The URL of the link that referred us to the new target |
||||
324 | */ |
||||
325 | private function _redirect($target_url, $referer_url) |
||||
326 | { |
||||
327 | // No I last saw that over there ... really, 301, 302, 307 |
||||
328 | $this->_setOptions(); |
||||
329 | $this->_options[CURLOPT_REFERER] = $referer_url; |
||||
330 | $this->_curlRequest($target_url, true); |
||||
331 | } |
||||
332 | |||||
333 | 2 | /** |
|||
334 | * Used to return the results to the calling program |
||||
335 | * |
||||
336 | 2 | * What it does: |
|||
337 | 2 | * |
|||
338 | * - Called as ->result() will return the full final array |
||||
339 | * - Called as ->result('body') to just return the page source of the result |
||||
340 | 2 | * |
|||
341 | 2 | * @param string $area used to return an area such as body, header, error |
|||
342 | 2 | * |
|||
343 | 2 | * @return string |
|||
344 | */ |
||||
345 | public function result($area = '') |
||||
346 | 2 | { |
|||
347 | $max_result = count($this->_response) - 1; |
||||
348 | |||||
349 | // Just return a specified area or the entire result? |
||||
350 | if (trim($area) === '') |
||||
351 | { |
||||
352 | return $this->_response[$max_result]; |
||||
353 | } |
||||
354 | |||||
355 | return $this->_response[$max_result][$area] ?? $this->_response[$max_result]; |
||||
356 | } |
||||
357 | |||||
358 | 2 | /** |
|||
359 | * Will return all results from all loops (redirects) |
||||
360 | * |
||||
361 | 2 | * What it does: |
|||
362 | 2 | * |
|||
363 | 2 | * - Can be called as ->result_raw(x) where x is a specific loop results. |
|||
364 | 2 | * - Call as ->result_raw() for everything. |
|||
365 | * |
||||
366 | * @param int|string $response_number |
||||
367 | * |
||||
368 | * @return string|string[] |
||||
369 | */ |
||||
370 | public function result_raw($response_number = '') |
||||
371 | { |
||||
372 | if (!is_numeric($response_number)) |
||||
373 | { |
||||
374 | return $this->_response; |
||||
375 | } |
||||
376 | |||||
377 | $response_number = min($response_number, count($this->_response) - 1); |
||||
378 | 8 | ||||
379 | return $this->_response[$response_number]; |
||||
380 | 8 | } |
|||
381 | } |
||||
382 |