1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* This will fetch a web resource http/https and return the headers and page data. It is capable of following |
5
|
|
|
* redirects and interpreting chunked data. It will work with allow_url_fopen off. |
6
|
|
|
* |
7
|
|
|
* @package ElkArte Forum |
8
|
|
|
* @copyright ElkArte Forum contributors |
9
|
|
|
* @license BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
10
|
|
|
* |
11
|
|
|
* @version 2.0 dev |
12
|
|
|
* |
13
|
|
|
*/ |
14
|
|
|
|
15
|
|
|
namespace ElkArte\Http; |
16
|
|
|
|
17
|
|
|
use Exception; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* Class FsockFetchWebdata |
21
|
|
|
* |
22
|
|
|
* @package ElkArte |
23
|
|
|
*/ |
24
|
|
|
class FsockFetchWebdata |
25
|
|
|
{ |
26
|
|
|
/** @var bool Use the same connection on redirects */ |
27
|
|
|
private $_keep_alive; |
28
|
|
|
|
29
|
|
|
/** @var int Holds the passed or default value for redirects */ |
30
|
|
|
private $_max_redirect; |
31
|
|
|
|
32
|
|
|
/** @var int Holds the current redirect count for the request */ |
33
|
|
|
private $_current_redirect = 0; |
34
|
|
|
|
35
|
|
|
/** @var null|string Used on redirect when keep alive is true */ |
36
|
|
|
private $_keep_alive_host; |
37
|
|
|
|
38
|
|
|
/** @var null|resource the fp resource to reuse */ |
39
|
|
|
private $_keep_alive_fp; |
40
|
|
|
|
41
|
|
|
/** @var int how much we will read */ |
42
|
|
|
private $_content_length = 0; |
43
|
|
|
|
44
|
|
|
/** @var array the parsed url with host, port, path, etc */ |
45
|
|
|
private $_url = []; |
46
|
|
|
|
47
|
|
|
/** @var null|resource the fsockopen resource */ |
48
|
|
|
private $_fp; |
49
|
|
|
|
50
|
|
|
/** @var array Holds the passed user options array (only option is max_length) */ |
51
|
|
|
private $_user_options; |
52
|
|
|
|
53
|
|
|
/** @var string|string[] Holds any data that will be posted to a form */ |
54
|
|
|
private $_post_data = ''; |
55
|
|
|
|
56
|
|
|
/** @var string[] Holds the response to the request, headers, data, code */ |
57
|
|
|
private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => '']; |
58
|
|
|
|
59
|
|
|
/** @var array() Holds the last header response to the request */ |
60
|
|
|
private $_headers = []; |
61
|
|
|
|
62
|
|
|
/** @var string the HTTP response from the server 200/404/302 etc */ |
63
|
|
|
private $_server_response; |
64
|
|
|
|
65
|
|
|
/** @var bool if the response body is transfer encoded chunked */ |
66
|
|
|
private $_chunked = false; |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* FsockFetchWebdata constructor. |
70
|
|
|
* |
71
|
|
|
* @param array $options |
72
|
|
|
* @param int $max_redirect |
73
|
6 |
|
* @param bool $keep_alive |
74
|
|
|
*/ |
75
|
|
|
public function __construct($options = [], $max_redirect = 3, $keep_alive = false) |
76
|
6 |
|
{ |
77
|
6 |
|
// Initialize class variables |
78
|
6 |
|
$this->_max_redirect = (int) $max_redirect; |
79
|
6 |
|
$this->_user_options = $options; |
80
|
|
|
$this->_keep_alive = $keep_alive; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Prepares any post data supplied and then makes the request for data |
85
|
|
|
* |
86
|
|
|
* @param string $url |
87
|
|
|
* @param string|string[] $post_data |
88
|
6 |
|
* |
89
|
|
|
*/ |
90
|
|
|
public function get_url_data($url, $post_data = '') |
91
|
6 |
|
{ |
92
|
|
|
// Prepare any given post data |
93
|
2 |
|
if (!empty($post_data)) |
94
|
|
|
{ |
95
|
2 |
|
if (is_array($post_data)) |
96
|
|
|
{ |
97
|
|
|
$this->_post_data = http_build_query($post_data, '', '&'); |
98
|
|
|
} |
99
|
|
|
else |
100
|
|
|
{ |
101
|
|
|
$this->_post_data = http_build_query([trim($post_data)], '', '&'); |
102
|
|
|
} |
103
|
|
|
} |
104
|
6 |
|
|
105
|
6 |
|
// Set the options and get it |
106
|
6 |
|
$this->_current_redirect = 0; |
107
|
|
|
$this->_fopenRequest($url); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* Main processing loop, connects, parses responses, redirects, fetches body |
112
|
|
|
* |
113
|
|
|
* @param string $url site to fetch |
114
|
|
|
* |
115
|
6 |
|
* @return bool |
116
|
|
|
*/ |
117
|
|
|
private function _fopenRequest($url) |
118
|
6 |
|
{ |
119
|
6 |
|
// We do have a url I hope |
120
|
|
|
$this->_setOptions($url); |
121
|
|
|
if (empty($this->_url)) |
122
|
|
|
{ |
123
|
|
|
return false; |
124
|
|
|
} |
125
|
6 |
|
|
126
|
|
|
// Reuse the socket if this is a keep alive |
127
|
|
|
if ($this->_keep_alive && $this->_url['host'] === $this->_keep_alive_host) |
128
|
|
|
{ |
129
|
|
|
$this->_fp = $this->_keep_alive_fp; |
130
|
|
|
} |
131
|
6 |
|
|
132
|
|
|
// Open a connection to the host & port |
133
|
|
|
if (!$this->_sockOpen()) |
134
|
|
|
{ |
135
|
|
|
return false; |
136
|
|
|
} |
137
|
6 |
|
|
138
|
|
|
// I want this, from there, and I'm not going to be bothering you for more (probably.) |
139
|
|
|
$this->_makeRequest(); |
140
|
6 |
|
|
141
|
6 |
|
// Is it where we thought? |
142
|
6 |
|
$this->_readHeaders(); |
143
|
|
|
$location = $this->_checkRedirect(); |
144
|
6 |
|
if (empty($location)) |
145
|
6 |
|
{ |
146
|
|
|
preg_match('~^HTTP/\S+\s+(\d{3})~i', $this->_server_response, $code); |
147
|
|
|
$this->_response['code'] = isset($code[1]) ? (int) $code[1] : '???'; |
148
|
6 |
|
|
149
|
|
|
// Make sure we ended up with a 200 OK. |
150
|
|
|
if (in_array($this->_response['code'], [200, 201, 206], true)) |
151
|
6 |
|
{ |
152
|
|
|
// Provide a common valid 200 return code to the caller |
153
|
|
|
$this->_response['code'] = 200; |
154
|
6 |
|
} |
155
|
6 |
|
|
156
|
|
|
$this->_fetchData(); |
157
|
6 |
|
fclose($this->_fp); |
158
|
|
|
|
159
|
|
|
return true; |
160
|
|
|
} |
161
|
|
|
|
162
|
2 |
|
// To the new location we go |
163
|
|
|
$this->_fopenRequest($location); |
164
|
|
|
|
165
|
2 |
|
return false; |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* Parses a url into the components we need |
170
|
|
|
* |
171
|
|
|
* @param string $url |
172
|
|
|
*/ |
173
|
6 |
|
private function _setOptions($url) |
174
|
|
|
{ |
175
|
6 |
|
$this->_url = []; |
176
|
6 |
|
$this->_response['url'] = $url; |
177
|
6 |
|
$this->_content_length = empty($this->_user_options['max_length']) ? 0 : (int) $this->_user_options['max_length']; |
178
|
|
|
|
179
|
|
|
// Make sure its valid before we parse it out |
180
|
6 |
|
if (filter_var($url, FILTER_VALIDATE_URL)) |
181
|
|
|
{ |
182
|
|
|
// Get the elements for this url |
183
|
6 |
|
$url_parse = parse_url($url); |
184
|
6 |
|
$this->_url['host_raw'] = $url_parse['host']; |
185
|
|
|
|
186
|
|
|
// Handle SSL connections |
187
|
6 |
|
if ($url_parse['scheme'] === 'https') |
188
|
|
|
{ |
189
|
6 |
|
$this->_url['host'] = 'ssl://' . $url_parse['host']; |
190
|
6 |
|
$this->_url['port'] = empty($this->_url['port']) ? 443 : $this->_url['port']; |
191
|
|
|
} |
192
|
|
|
else |
193
|
|
|
{ |
194
|
4 |
|
$this->_url['host'] = $url_parse['host']; |
195
|
4 |
|
$this->_url['port'] = empty($this->_url['port']) ? 80 : $this->_url['port']; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
// Fix/Finalize the data path |
199
|
6 |
|
$this->_url['path'] = ($url_parse['path'] ?? '/') . (isset($url_parse['query']) ? '?' . $url_parse['query'] : ''); |
200
|
|
|
} |
201
|
6 |
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Connect to the host/port as requested |
205
|
|
|
* |
206
|
|
|
* @return bool |
207
|
|
|
*/ |
208
|
6 |
|
private function _sockOpen() |
209
|
|
|
{ |
210
|
|
|
// no socket, then we need to open one to do much |
211
|
6 |
|
if (!is_resource($this->_fp)) |
212
|
|
|
{ |
213
|
|
|
set_error_handler(static function () { /* ignore errors */ }); |
214
|
|
|
try |
215
|
6 |
|
{ |
216
|
6 |
|
$this->_fp = fsockopen($this->_url['host'], $this->_url['port'], $errno, $errstr, 5); |
217
|
|
|
$this->_response['error'] = empty($errstr) ? false : $errno . ' :: ' . $errstr; |
218
|
|
|
} |
219
|
|
|
catch (Exception) |
220
|
|
|
{ |
221
|
|
|
return false; |
222
|
|
|
} |
223
|
|
|
finally |
224
|
6 |
|
{ |
225
|
|
|
restore_error_handler(); |
226
|
|
|
} |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
return is_resource($this->_fp); |
230
|
6 |
|
} |
231
|
|
|
|
232
|
6 |
|
/** |
233
|
6 |
|
* Make the request to the host, either get or post, and get the initial response. |
234
|
6 |
|
*/ |
235
|
6 |
|
private function _makeRequest() |
236
|
6 |
|
{ |
237
|
6 |
|
$request = (empty($this->_post_data) ? 'GET ' : 'POST ') . $this->_url['path'] . ' HTTP/1.1' . "\r\n"; |
238
|
|
|
$request .= 'Host: ' . $this->_url['host_raw'] . "\r\n"; |
239
|
6 |
|
$request .= $this->_keepAlive(); |
240
|
|
|
$request .= 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931' . "\r\n"; |
241
|
2 |
|
$request .= 'Content-Type: application/x-www-form-urlencoded' . "\r\n"; |
242
|
|
|
|
243
|
|
|
if (!empty($this->_content_length)) |
244
|
6 |
|
{ |
245
|
|
|
$request .= 'Range: bytes=0-' . ($this->_content_length - 1) . "\r\n"; |
246
|
2 |
|
} |
247
|
2 |
|
|
248
|
|
|
if (!empty($this->_post_data)) |
249
|
|
|
{ |
250
|
|
|
$request .= 'Content-Length: ' . strlen($this->_post_data) . "\r\n\r\n"; |
|
|
|
|
251
|
4 |
|
$request .= $this->_post_data; |
252
|
|
|
} |
253
|
|
|
else |
254
|
|
|
{ |
255
|
6 |
|
$request .= "\r\n\r\n"; |
256
|
6 |
|
} |
257
|
6 |
|
|
258
|
|
|
// Make the request and read the first line of the server response, ending at the first CRLF |
259
|
|
|
fwrite($this->_fp, $request); |
260
|
|
|
$this->_server_response = fgets($this->_fp); |
261
|
|
|
} |
262
|
6 |
|
|
263
|
|
|
/** |
264
|
6 |
|
* Sets the proper Keep-Alive header and sets the fp/host if the option is enabled |
265
|
|
|
*/ |
266
|
|
|
private function _keepAlive() |
267
|
|
|
{ |
268
|
|
|
if ($this->_keep_alive) |
269
|
|
|
{ |
270
|
|
|
$request = 'Connection: Keep-Alive' . "\r\n"; |
271
|
|
|
$this->_keep_alive_host = $this->_url['host']; |
272
|
6 |
|
$this->_keep_alive_fp = $this->_fp; |
273
|
|
|
} |
274
|
|
|
else |
275
|
6 |
|
{ |
276
|
|
|
$request = 'Connection: close' . "\r\n"; |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
return $request; |
280
|
|
|
} |
281
|
6 |
|
|
282
|
|
|
/** |
283
|
6 |
|
* Reads the stream until the end of the headers section and then parses those headers |
284
|
6 |
|
*/ |
285
|
|
|
private function _readHeaders() |
286
|
|
|
{ |
287
|
6 |
|
$this->_headers = []; |
288
|
|
|
$headers = ''; |
289
|
6 |
|
|
290
|
|
|
// Read / request more data, Looking for a blank line which separates headers from body |
291
|
|
|
while (!feof($this->_fp) && trim($header = fgets($this->_fp)) !== '') |
292
|
|
|
{ |
293
|
6 |
|
$headers .= $header; |
294
|
6 |
|
} |
295
|
6 |
|
|
296
|
|
|
// Separate the data into standard headers |
297
|
|
|
$headers = explode("\r\n", $headers); |
298
|
6 |
|
array_pop($headers); |
299
|
|
|
foreach ($headers as $header) |
300
|
|
|
{ |
301
|
6 |
|
// Get name and value |
302
|
6 |
|
[$name, $value] = explode(':', $header, 2); |
303
|
|
|
|
304
|
|
|
// Normalize / clean |
305
|
6 |
|
$name = strtolower($name); |
306
|
|
|
$value = trim($value); |
307
|
6 |
|
|
308
|
|
|
// If its already there, then add to it as an array |
309
|
6 |
|
if (isset($this->_headers[$name])) |
310
|
|
|
{ |
311
|
|
|
if (is_string($this->_headers[$name])) |
312
|
6 |
|
{ |
313
|
|
|
$this->_headers[$name] = array($this->_headers[$name]); |
314
|
|
|
} |
315
|
|
|
|
316
|
6 |
|
$this->_headers[$name][] = $value; |
317
|
|
|
} |
318
|
|
|
else |
319
|
6 |
|
{ |
320
|
|
|
$this->_headers[$name] = $value; |
321
|
|
|
} |
322
|
|
|
} |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/** |
326
|
6 |
|
* Looks at the server response and header array to determine if we are redirecting |
327
|
|
|
* |
328
|
|
|
* @return string |
329
|
6 |
|
*/ |
330
|
|
|
private function _checkRedirect() |
331
|
|
|
{ |
332
|
2 |
|
// Redirect in case this location is permanently or temporarily moved (301, 302, 307) |
333
|
2 |
|
if ($this->_current_redirect < $this->_max_redirect && preg_match('~^HTTP/\S+\s+(30[127])~i', $this->_server_response, $code) === 1) |
334
|
2 |
|
{ |
335
|
|
|
// Maintain our status responses |
336
|
|
|
$this->_response['code'] = (int) $code[1]; |
337
|
2 |
|
$this->_response['redirects'] = ++$this->_current_redirect; |
338
|
|
|
$this->_response['headers'] = $this->_headers; |
339
|
|
|
|
340
|
|
|
// redirection with no location, just like working in a corporation |
341
|
|
|
if (empty($this->_headers['location'])) |
342
|
|
|
{ |
343
|
|
|
return ''; |
344
|
2 |
|
} |
345
|
|
|
|
346
|
2 |
|
// Use the same connection or new? |
347
|
|
|
if (!$this->_keep_alive) |
348
|
|
|
{ |
349
|
2 |
|
fclose($this->_fp); |
350
|
|
|
} |
351
|
|
|
|
352
|
|
|
return $this->_headers['location']; |
353
|
6 |
|
} |
354
|
|
|
|
355
|
|
|
return ''; |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
/** |
359
|
6 |
|
* Fetch the data for the selected site. |
360
|
|
|
*/ |
361
|
|
|
private function _fetchData() |
362
|
6 |
|
{ |
363
|
|
|
// Respect the headers |
364
|
|
|
$this->_processHeaders(); |
365
|
6 |
|
|
366
|
|
|
// Now the body of the response |
367
|
6 |
|
$response = ''; |
368
|
|
|
|
369
|
2 |
|
if (!empty($this->_content_length)) |
370
|
|
|
{ |
371
|
|
|
$response = stream_get_contents($this->_fp, $this->_content_length); |
372
|
|
|
} |
373
|
4 |
|
else |
374
|
|
|
{ |
375
|
|
|
$response .= stream_get_contents($this->_fp); |
376
|
6 |
|
} |
377
|
6 |
|
|
378
|
6 |
|
$this->_response['body'] = $this->_unChunk($response); |
379
|
|
|
$this->_response['size'] = strlen($this->_response['body']); |
380
|
|
|
} |
381
|
|
|
|
382
|
|
|
/** |
383
|
6 |
|
* Read the response up to the end of the headers |
384
|
|
|
*/ |
385
|
|
|
private function _processHeaders() |
386
|
6 |
|
{ |
387
|
|
|
// If told to close the connection, do so |
388
|
6 |
|
if (isset($this->_headers['connection']) && $this->_headers['connection'] === 'close') |
389
|
6 |
|
{ |
390
|
|
|
$this->_keep_alive_host = null; |
391
|
|
|
$this->_keep_alive = false; |
392
|
|
|
} |
393
|
6 |
|
|
394
|
|
|
// If its chunked we need to decode the body |
395
|
4 |
|
if (isset($this->_headers['transfer-encoding']) && $this->_headers['transfer-encoding'] === 'chunked') |
396
|
|
|
{ |
397
|
|
|
$this->_chunked = true; |
398
|
6 |
|
} |
399
|
6 |
|
|
400
|
|
|
$this->_response['headers'] = $this->_headers; |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* Decodes the response body if its transfer-encoded as chunked |
405
|
|
|
* |
406
|
|
|
* @param string $body |
407
|
6 |
|
* @return string |
408
|
|
|
*/ |
409
|
6 |
|
private function _unChunk($body) |
410
|
|
|
{ |
411
|
4 |
|
if (!$this->_chunked) |
412
|
|
|
{ |
413
|
|
|
return $body; |
414
|
4 |
|
} |
415
|
4 |
|
|
416
|
|
|
$decoded_body = ''; |
417
|
|
|
while (trim($body)) |
418
|
4 |
|
{ |
419
|
|
|
// It only claimed to be chunked, but its not. |
420
|
2 |
|
if (!preg_match('~^([\da-fA-F]+)[^\r\n]*\r\n~m', $body, $match)) |
421
|
2 |
|
{ |
422
|
|
|
$decoded_body = $body; |
423
|
|
|
break; |
424
|
4 |
|
} |
425
|
|
|
|
426
|
4 |
|
$length = hexdec(trim($match[1])); |
427
|
|
|
|
428
|
|
|
if ($length === 0) |
429
|
|
|
{ |
430
|
|
|
break; |
431
|
4 |
|
} |
432
|
4 |
|
|
433
|
4 |
|
$cut = strlen($match[0]); |
434
|
|
|
$decoded_body .= substr($body, $cut, $length); |
|
|
|
|
435
|
|
|
$body = substr($body, $cut + $length + 2); |
|
|
|
|
436
|
4 |
|
} |
437
|
|
|
|
438
|
|
|
return $decoded_body; |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
/** |
442
|
|
|
* Used to return the results to the calling program |
443
|
|
|
* |
444
|
|
|
* What it does: |
445
|
|
|
* |
446
|
|
|
* - Called as ->result() will return the full final array |
447
|
|
|
* - Called as ->result('body') to just return the page source of the result |
448
|
|
|
* |
449
|
|
|
* @param string $area used to return an area such as body, header, error |
450
|
|
|
* |
451
|
6 |
|
* @return string|string[] |
452
|
|
|
*/ |
453
|
|
|
public function result($area = '') |
454
|
6 |
|
{ |
455
|
|
|
// Just return a specified area or the entire result? |
456
|
|
|
if (trim($area) === '') |
457
|
|
|
{ |
458
|
|
|
return $this->_response; |
459
|
|
|
} |
460
|
6 |
|
|
461
|
|
|
return $this->_response[$area] ?? $this->_response; |
462
|
|
|
} |
463
|
|
|
} |