1 | <?php |
||||
2 | |||||
3 | /** |
||||
4 | * This will fetch a web resource http/https and return the headers and page data. It is capable of following |
||||
5 | * redirects and interpreting chunked data. It will work with allow_url_fopen off. |
||||
6 | * |
||||
7 | * @package ElkArte Forum |
||||
8 | * @copyright ElkArte Forum contributors |
||||
9 | * @license BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
||||
10 | * |
||||
11 | * @version 2.0 dev |
||||
12 | * |
||||
13 | */ |
||||
14 | |||||
15 | namespace ElkArte\Http; |
||||
16 | |||||
17 | use Exception; |
||||
18 | |||||
19 | /** |
||||
20 | * Class FsockFetchWebdata |
||||
21 | * |
||||
22 | * @package ElkArte |
||||
23 | */ |
||||
24 | class FsockFetchWebdata |
||||
25 | { |
||||
26 | /** @var bool Use the same connection on redirects */ |
||||
27 | private $_keep_alive; |
||||
28 | |||||
29 | /** @var int Holds the passed or default value for redirects */ |
||||
30 | private $_max_redirect; |
||||
31 | |||||
32 | /** @var int Holds the current redirect count for the request */ |
||||
33 | private $_current_redirect = 0; |
||||
34 | |||||
35 | /** @var null|string Used on redirect when keep alive is true */ |
||||
36 | private $_keep_alive_host; |
||||
37 | |||||
38 | /** @var null|resource the fp resource to reuse */ |
||||
39 | private $_keep_alive_fp; |
||||
40 | |||||
41 | /** @var int how much we will read */ |
||||
42 | private $_content_length = 0; |
||||
43 | |||||
44 | /** @var array the parsed url with host, port, path, etc */ |
||||
45 | private $_url = []; |
||||
46 | |||||
47 | /** @var null|resource the fsockopen resource */ |
||||
48 | private $_fp; |
||||
49 | |||||
50 | /** @var array Holds the passed user options array (only option is max_length) */ |
||||
51 | private $_user_options; |
||||
52 | |||||
53 | /** @var string|string[] Holds any data that will be posted to a form */ |
||||
54 | private $_post_data = ''; |
||||
55 | |||||
56 | /** @var string[] Holds the response to the request, headers, data, code */ |
||||
57 | private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => '']; |
||||
58 | |||||
59 | /** @var array() Holds the last header response to the request */ |
||||
60 | private $_headers = []; |
||||
61 | |||||
62 | /** @var string the HTTP response from the server 200/404/302 etc */ |
||||
63 | private $_server_response; |
||||
64 | |||||
65 | /** @var bool if the response body is transfer encoded chunked */ |
||||
66 | private $_chunked = false; |
||||
67 | |||||
68 | /** |
||||
69 | * FsockFetchWebdata constructor. |
||||
70 | * |
||||
71 | * @param array $options |
||||
72 | * @param int $max_redirect |
||||
73 | 6 | * @param bool $keep_alive |
|||
74 | */ |
||||
75 | public function __construct($options = [], $max_redirect = 3, $keep_alive = false) |
||||
76 | 6 | { |
|||
77 | 6 | // Initialize class variables |
|||
78 | 6 | $this->_max_redirect = (int) $max_redirect; |
|||
79 | 6 | $this->_user_options = $options; |
|||
80 | $this->_keep_alive = $keep_alive; |
||||
81 | } |
||||
82 | |||||
83 | /** |
||||
84 | * Prepares any post data supplied and then makes the request for data |
||||
85 | * |
||||
86 | * @param string $url |
||||
87 | * @param string|string[] $post_data |
||||
88 | 6 | * |
|||
89 | */ |
||||
90 | public function get_url_data($url, $post_data = '') |
||||
91 | 6 | { |
|||
92 | // Prepare any given post data |
||||
93 | 2 | if (!empty($post_data)) |
|||
94 | { |
||||
95 | 2 | if (is_array($post_data)) |
|||
96 | { |
||||
97 | $this->_post_data = http_build_query($post_data, '', '&'); |
||||
98 | } |
||||
99 | else |
||||
100 | { |
||||
101 | $this->_post_data = http_build_query([trim($post_data)], '', '&'); |
||||
102 | } |
||||
103 | } |
||||
104 | 6 | ||||
105 | 6 | // Set the options and get it |
|||
106 | 6 | $this->_current_redirect = 0; |
|||
107 | $this->_fopenRequest($url); |
||||
108 | } |
||||
109 | |||||
110 | /** |
||||
111 | * Main processing loop, connects, parses responses, redirects, fetches body |
||||
112 | * |
||||
113 | * @param string $url site to fetch |
||||
114 | * |
||||
115 | 6 | * @return bool |
|||
116 | */ |
||||
117 | private function _fopenRequest($url) |
||||
118 | 6 | { |
|||
119 | 6 | // We do have a url I hope |
|||
120 | $this->_setOptions($url); |
||||
121 | if (empty($this->_url)) |
||||
122 | { |
||||
123 | return false; |
||||
124 | } |
||||
125 | 6 | ||||
126 | // Reuse the socket if this is a keep alive |
||||
127 | if ($this->_keep_alive && $this->_url['host'] === $this->_keep_alive_host) |
||||
128 | { |
||||
129 | $this->_fp = $this->_keep_alive_fp; |
||||
130 | } |
||||
131 | 6 | ||||
132 | // Open a connection to the host & port |
||||
133 | if (!$this->_sockOpen()) |
||||
134 | { |
||||
135 | return false; |
||||
136 | } |
||||
137 | 6 | ||||
138 | // I want this, from there, and I'm not going to be bothering you for more (probably.) |
||||
139 | $this->_makeRequest(); |
||||
140 | 6 | ||||
141 | 6 | // Is it where we thought? |
|||
142 | 6 | $this->_readHeaders(); |
|||
143 | $location = $this->_checkRedirect(); |
||||
144 | 6 | if (empty($location)) |
|||
145 | 6 | { |
|||
146 | preg_match('~^HTTP/\S+\s+(\d{3})~i', $this->_server_response, $code); |
||||
147 | $this->_response['code'] = isset($code[1]) ? (int) $code[1] : '???'; |
||||
148 | 6 | ||||
149 | // Make sure we ended up with a 200 OK. |
||||
150 | if (in_array($this->_response['code'], [200, 201, 206], true)) |
||||
151 | 6 | { |
|||
152 | // Provide a common valid 200 return code to the caller |
||||
153 | $this->_response['code'] = 200; |
||||
154 | 6 | } |
|||
155 | 6 | ||||
156 | $this->_fetchData(); |
||||
157 | 6 | fclose($this->_fp); |
|||
158 | |||||
159 | return true; |
||||
160 | } |
||||
161 | |||||
162 | 2 | // To the new location we go |
|||
163 | $this->_fopenRequest($location); |
||||
164 | |||||
165 | 2 | return false; |
|||
166 | } |
||||
167 | |||||
168 | /** |
||||
169 | * Parses a url into the components we need |
||||
170 | * |
||||
171 | * @param string $url |
||||
172 | */ |
||||
173 | 6 | private function _setOptions($url) |
|||
174 | { |
||||
175 | 6 | $this->_url = []; |
|||
176 | 6 | $this->_response['url'] = $url; |
|||
177 | 6 | $this->_content_length = empty($this->_user_options['max_length']) ? 0 : (int) $this->_user_options['max_length']; |
|||
178 | |||||
179 | // Make sure its valid before we parse it out |
||||
180 | 6 | if (filter_var($url, FILTER_VALIDATE_URL)) |
|||
181 | { |
||||
182 | // Get the elements for this url |
||||
183 | 6 | $url_parse = parse_url($url); |
|||
184 | 6 | $this->_url['host_raw'] = $url_parse['host']; |
|||
185 | |||||
186 | // Handle SSL connections |
||||
187 | 6 | if ($url_parse['scheme'] === 'https') |
|||
188 | { |
||||
189 | 6 | $this->_url['host'] = 'ssl://' . $url_parse['host']; |
|||
190 | 6 | $this->_url['port'] = empty($this->_url['port']) ? 443 : $this->_url['port']; |
|||
191 | } |
||||
192 | else |
||||
193 | { |
||||
194 | 4 | $this->_url['host'] = $url_parse['host']; |
|||
195 | 4 | $this->_url['port'] = empty($this->_url['port']) ? 80 : $this->_url['port']; |
|||
196 | } |
||||
197 | |||||
198 | // Fix/Finalize the data path |
||||
199 | 6 | $this->_url['path'] = ($url_parse['path'] ?? '/') . (isset($url_parse['query']) ? '?' . $url_parse['query'] : ''); |
|||
200 | } |
||||
201 | 6 | } |
|||
202 | |||||
203 | /** |
||||
204 | * Connect to the host/port as requested |
||||
205 | * |
||||
206 | * @return bool |
||||
207 | */ |
||||
208 | 6 | private function _sockOpen() |
|||
209 | { |
||||
210 | // no socket, then we need to open one to do much |
||||
211 | 6 | if (!is_resource($this->_fp)) |
|||
212 | { |
||||
213 | set_error_handler(static function () { /* ignore errors */ }); |
||||
214 | try |
||||
215 | 6 | { |
|||
216 | 6 | $this->_fp = fsockopen($this->_url['host'], $this->_url['port'], $errno, $errstr, 5); |
|||
217 | $this->_response['error'] = empty($errstr) ? false : $errno . ' :: ' . $errstr; |
||||
218 | } |
||||
219 | catch (Exception) |
||||
220 | { |
||||
221 | return false; |
||||
222 | } |
||||
223 | finally |
||||
224 | 6 | { |
|||
225 | restore_error_handler(); |
||||
226 | } |
||||
227 | } |
||||
228 | |||||
229 | return is_resource($this->_fp); |
||||
230 | 6 | } |
|||
231 | |||||
232 | 6 | /** |
|||
233 | 6 | * Make the request to the host, either get or post, and get the initial response. |
|||
234 | 6 | */ |
|||
235 | 6 | private function _makeRequest() |
|||
236 | 6 | { |
|||
237 | 6 | $request = (empty($this->_post_data) ? 'GET ' : 'POST ') . $this->_url['path'] . ' HTTP/1.1' . "\r\n"; |
|||
238 | $request .= 'Host: ' . $this->_url['host_raw'] . "\r\n"; |
||||
239 | 6 | $request .= $this->_keepAlive(); |
|||
240 | $request .= 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931' . "\r\n"; |
||||
241 | 2 | $request .= 'Content-Type: application/x-www-form-urlencoded' . "\r\n"; |
|||
242 | |||||
243 | if (!empty($this->_content_length)) |
||||
244 | 6 | { |
|||
245 | $request .= 'Range: bytes=0-' . ($this->_content_length - 1) . "\r\n"; |
||||
246 | 2 | } |
|||
247 | 2 | ||||
248 | if (!empty($this->_post_data)) |
||||
249 | { |
||||
250 | $request .= 'Content-Length: ' . strlen($this->_post_data) . "\r\n\r\n"; |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
251 | 4 | $request .= $this->_post_data; |
|||
252 | } |
||||
253 | else |
||||
254 | { |
||||
255 | 6 | $request .= "\r\n\r\n"; |
|||
256 | 6 | } |
|||
257 | 6 | ||||
258 | // Make the request and read the first line of the server response, ending at the first CRLF |
||||
259 | fwrite($this->_fp, $request); |
||||
260 | $this->_server_response = fgets($this->_fp); |
||||
261 | } |
||||
262 | 6 | ||||
263 | /** |
||||
264 | 6 | * Sets the proper Keep-Alive header and sets the fp/host if the option is enabled |
|||
265 | */ |
||||
266 | private function _keepAlive() |
||||
267 | { |
||||
268 | if ($this->_keep_alive) |
||||
269 | { |
||||
270 | $request = 'Connection: Keep-Alive' . "\r\n"; |
||||
271 | $this->_keep_alive_host = $this->_url['host']; |
||||
272 | 6 | $this->_keep_alive_fp = $this->_fp; |
|||
273 | } |
||||
274 | else |
||||
275 | 6 | { |
|||
276 | $request = 'Connection: close' . "\r\n"; |
||||
277 | } |
||||
278 | |||||
279 | return $request; |
||||
280 | } |
||||
281 | 6 | ||||
282 | /** |
||||
283 | 6 | * Reads the stream until the end of the headers section and then parses those headers |
|||
284 | 6 | */ |
|||
285 | private function _readHeaders() |
||||
286 | { |
||||
287 | 6 | $this->_headers = []; |
|||
288 | $headers = ''; |
||||
289 | 6 | ||||
290 | // Read / request more data, Looking for a blank line which separates headers from body |
||||
291 | while (!feof($this->_fp) && trim($header = fgets($this->_fp)) !== '') |
||||
292 | { |
||||
293 | 6 | $headers .= $header; |
|||
294 | 6 | } |
|||
295 | 6 | ||||
296 | // Separate the data into standard headers |
||||
297 | $headers = explode("\r\n", $headers); |
||||
298 | 6 | array_pop($headers); |
|||
299 | foreach ($headers as $header) |
||||
300 | { |
||||
301 | 6 | // Get name and value |
|||
302 | 6 | [$name, $value] = explode(':', $header, 2); |
|||
303 | |||||
304 | // Normalize / clean |
||||
305 | 6 | $name = strtolower($name); |
|||
306 | $value = trim($value); |
||||
307 | 6 | ||||
308 | // If its already there, then add to it as an array |
||||
309 | 6 | if (isset($this->_headers[$name])) |
|||
310 | { |
||||
311 | if (is_string($this->_headers[$name])) |
||||
312 | 6 | { |
|||
313 | $this->_headers[$name] = array($this->_headers[$name]); |
||||
314 | } |
||||
315 | |||||
316 | 6 | $this->_headers[$name][] = $value; |
|||
317 | } |
||||
318 | else |
||||
319 | 6 | { |
|||
320 | $this->_headers[$name] = $value; |
||||
321 | } |
||||
322 | } |
||||
323 | } |
||||
324 | |||||
325 | /** |
||||
326 | 6 | * Looks at the server response and header array to determine if we are redirecting |
|||
327 | * |
||||
328 | * @return string |
||||
329 | 6 | */ |
|||
330 | private function _checkRedirect() |
||||
331 | { |
||||
332 | 2 | // Redirect in case this location is permanently or temporarily moved (301, 302, 307) |
|||
333 | 2 | if ($this->_current_redirect < $this->_max_redirect && preg_match('~^HTTP/\S+\s+(30[127])~i', $this->_server_response, $code) === 1) |
|||
334 | 2 | { |
|||
335 | // Maintain our status responses |
||||
336 | $this->_response['code'] = (int) $code[1]; |
||||
337 | 2 | $this->_response['redirects'] = ++$this->_current_redirect; |
|||
338 | $this->_response['headers'] = $this->_headers; |
||||
339 | |||||
340 | // redirection with no location, just like working in a corporation |
||||
341 | if (empty($this->_headers['location'])) |
||||
342 | { |
||||
343 | return ''; |
||||
344 | 2 | } |
|||
345 | |||||
346 | 2 | // Use the same connection or new? |
|||
347 | if (!$this->_keep_alive) |
||||
348 | { |
||||
349 | 2 | fclose($this->_fp); |
|||
350 | } |
||||
351 | |||||
352 | return $this->_headers['location']; |
||||
353 | 6 | } |
|||
354 | |||||
355 | return ''; |
||||
356 | } |
||||
357 | |||||
358 | /** |
||||
359 | 6 | * Fetch the data for the selected site. |
|||
360 | */ |
||||
361 | private function _fetchData() |
||||
362 | 6 | { |
|||
363 | // Respect the headers |
||||
364 | $this->_processHeaders(); |
||||
365 | 6 | ||||
366 | // Now the body of the response |
||||
367 | 6 | $response = ''; |
|||
368 | |||||
369 | 2 | if (!empty($this->_content_length)) |
|||
370 | { |
||||
371 | $response = stream_get_contents($this->_fp, $this->_content_length); |
||||
372 | } |
||||
373 | 4 | else |
|||
374 | { |
||||
375 | $response .= stream_get_contents($this->_fp); |
||||
376 | 6 | } |
|||
377 | 6 | ||||
378 | 6 | $this->_response['body'] = $this->_unChunk($response); |
|||
379 | $this->_response['size'] = strlen($this->_response['body']); |
||||
380 | } |
||||
381 | |||||
382 | /** |
||||
383 | 6 | * Read the response up to the end of the headers |
|||
384 | */ |
||||
385 | private function _processHeaders() |
||||
386 | 6 | { |
|||
387 | // If told to close the connection, do so |
||||
388 | 6 | if (isset($this->_headers['connection']) && $this->_headers['connection'] === 'close') |
|||
389 | 6 | { |
|||
390 | $this->_keep_alive_host = null; |
||||
391 | $this->_keep_alive = false; |
||||
392 | } |
||||
393 | 6 | ||||
394 | // If its chunked we need to decode the body |
||||
395 | 4 | if (isset($this->_headers['transfer-encoding']) && $this->_headers['transfer-encoding'] === 'chunked') |
|||
396 | { |
||||
397 | $this->_chunked = true; |
||||
398 | 6 | } |
|||
399 | 6 | ||||
400 | $this->_response['headers'] = $this->_headers; |
||||
401 | } |
||||
402 | |||||
403 | /** |
||||
404 | * Decodes the response body if its transfer-encoded as chunked |
||||
405 | * |
||||
406 | * @param string $body |
||||
407 | 6 | * @return string |
|||
408 | */ |
||||
409 | 6 | private function _unChunk($body) |
|||
410 | { |
||||
411 | 4 | if (!$this->_chunked) |
|||
412 | { |
||||
413 | return $body; |
||||
414 | 4 | } |
|||
415 | 4 | ||||
416 | $decoded_body = ''; |
||||
417 | while (trim($body)) |
||||
418 | 4 | { |
|||
419 | // It only claimed to be chunked, but its not. |
||||
420 | 2 | if (!preg_match('~^([\da-fA-F]+)[^\r\n]*\r\n~m', $body, $match)) |
|||
421 | 2 | { |
|||
422 | $decoded_body = $body; |
||||
423 | break; |
||||
424 | 4 | } |
|||
425 | |||||
426 | 4 | $length = hexdec(trim($match[1])); |
|||
427 | |||||
428 | if ($length === 0) |
||||
429 | { |
||||
430 | break; |
||||
431 | 4 | } |
|||
432 | 4 | ||||
433 | 4 | $cut = strlen($match[0]); |
|||
434 | $decoded_body .= substr($body, $cut, $length); |
||||
0 ignored issues
–
show
It seems like
$length can also be of type double ; however, parameter $length of substr() does only seem to accept integer|null , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
435 | $body = substr($body, $cut + $length + 2); |
||||
0 ignored issues
–
show
$cut + $length + 2 of type double is incompatible with the type integer expected by parameter $offset of substr() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
436 | 4 | } |
|||
437 | |||||
438 | return $decoded_body; |
||||
439 | } |
||||
440 | |||||
441 | /** |
||||
442 | * Used to return the results to the calling program |
||||
443 | * |
||||
444 | * What it does: |
||||
445 | * |
||||
446 | * - Called as ->result() will return the full final array |
||||
447 | * - Called as ->result('body') to just return the page source of the result |
||||
448 | * |
||||
449 | * @param string $area used to return an area such as body, header, error |
||||
450 | * |
||||
451 | 6 | * @return string|string[] |
|||
452 | */ |
||||
453 | public function result($area = '') |
||||
454 | 6 | { |
|||
455 | // Just return a specified area or the entire result? |
||||
456 | if (trim($area) === '') |
||||
457 | { |
||||
458 | return $this->_response; |
||||
459 | } |
||||
460 | 6 | ||||
461 | return $this->_response[$area] ?? $this->_response; |
||||
462 | } |
||||
463 | } |