1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* This will fetch a web resource http/https and return the headers and page data. It is capable of following |
5
|
|
|
* redirects and interpreting chunked data, etc. It will NOT work with ini allow_url_fopen off. |
6
|
|
|
* |
7
|
|
|
* @package ElkArte Forum |
8
|
|
|
* @copyright ElkArte Forum contributors |
9
|
|
|
* @license BSD https://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
10
|
|
|
* |
11
|
|
|
* @version 2.0 dev |
12
|
|
|
* |
13
|
|
|
*/ |
14
|
|
|
|
15
|
|
|
namespace ElkArte\Http; |
16
|
|
|
|
17
|
|
|
use Exception; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* Class StreamFetchWebdata |
21
|
|
|
* |
22
|
|
|
* @package ElkArte |
23
|
|
|
*/ |
24
|
|
|
class StreamFetchWebdata |
25
|
|
|
{ |
26
|
|
|
/** @var bool Use the same connection on redirects */ |
27
|
|
|
private $_keep_alive; |
28
|
|
|
|
29
|
|
|
/** @var int Holds the passed or default value for redirects */ |
30
|
|
|
private $_max_redirect; |
31
|
|
|
|
32
|
|
|
/** @var int how much we will read */ |
33
|
|
|
private $_content_length = 0; |
34
|
|
|
|
35
|
|
|
/** @var array the parsed url with host, port, path, etc */ |
36
|
|
|
private $_url = []; |
37
|
|
|
|
38
|
|
|
/** @var null|resource the fopen resource */ |
39
|
|
|
private $_fp; |
40
|
|
|
|
41
|
|
|
/** @var array Holds the passed user options array (only option is max_length) */ |
42
|
|
|
private $_user_options; |
43
|
|
|
|
44
|
|
|
/** @var string|string[] Holds any data that will be posted to a form */ |
45
|
|
|
private $_post_data = ''; |
46
|
|
|
|
47
|
|
|
/** @var string[] Holds the response to the request, headers, data, code */ |
48
|
|
|
private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => '']; |
49
|
|
|
|
50
|
|
|
/** @var array the context options for the stream */ |
51
|
|
|
private $_options = []; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* StreamFetchWebdata constructor. |
55
|
|
|
* |
56
|
|
|
* @param array $options |
57
|
|
|
* @param int $max_redirect |
58
|
4 |
|
* @param bool $keep_alive |
59
|
|
|
*/ |
60
|
|
|
public function __construct($options = [], $max_redirect = 3, $keep_alive = false) |
61
|
4 |
|
{ |
62
|
4 |
|
// Initialize class variables |
63
|
4 |
|
$this->_max_redirect = (int) $max_redirect; |
64
|
4 |
|
$this->_user_options = $options; |
65
|
|
|
$this->_keep_alive = $keep_alive; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Prepares any post data supplied and then makes the request for data |
70
|
|
|
* |
71
|
|
|
* @param string $url |
72
|
4 |
|
* @param string|string[] $post_data |
73
|
|
|
*/ |
74
|
|
|
public function get_url_data($url, $post_data = '') |
75
|
4 |
|
{ |
76
|
|
|
// Prepare any given post data |
77
|
2 |
|
if (!empty($post_data)) |
78
|
|
|
{ |
79
|
2 |
|
if (is_array($post_data)) |
80
|
|
|
{ |
81
|
|
|
$this->_post_data = http_build_query($post_data, '', '&'); |
82
|
|
|
} |
83
|
|
|
else |
84
|
|
|
{ |
85
|
|
|
$this->_post_data = http_build_query([trim($post_data)], '', '&'); |
86
|
|
|
} |
87
|
|
|
} |
88
|
4 |
|
|
89
|
4 |
|
// Set the options and get it |
90
|
|
|
$this->_openRequest($url); |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Makes the actual data call |
95
|
|
|
* |
96
|
|
|
* What it does |
97
|
|
|
* - Calls setOptions to build the stream context array |
98
|
|
|
* - Makes the data request and parses the results |
99
|
|
|
* |
100
|
|
|
* @param string $url site to fetch |
101
|
|
|
* |
102
|
4 |
|
* @return bool |
103
|
|
|
*/ |
104
|
|
|
private function _openRequest($url) |
105
|
4 |
|
{ |
106
|
|
|
// Build the stream options array |
107
|
|
|
$this->_setOptions($url); |
108
|
4 |
|
|
109
|
|
|
// We do have a url I hope |
110
|
|
|
if (empty($this->_url)) |
111
|
|
|
{ |
112
|
|
|
return false; |
113
|
|
|
} |
114
|
4 |
|
|
115
|
|
|
// I want this, from there, and I'm not going to be bothering you for more (probably.) |
116
|
4 |
|
if ($this->_makeRequest()) |
117
|
4 |
|
{ |
118
|
|
|
$this->_parseRequest(); |
119
|
4 |
|
$this->_processHeaders(); |
120
|
|
|
|
121
|
|
|
return $this->_fetchData(); |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
return false; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
/** |
128
|
|
|
* Prepares the options needed from this request |
129
|
|
|
* |
130
|
4 |
|
* @param string $url |
131
|
|
|
*/ |
132
|
4 |
|
private function _setOptions($url) |
133
|
|
|
{ |
134
|
|
|
$this->_url = []; |
135
|
4 |
|
|
136
|
|
|
// Ensure the url is valid |
137
|
|
|
if (filter_var($url, FILTER_VALIDATE_URL)) |
138
|
4 |
|
{ |
139
|
|
|
// Get the elements for the url |
140
|
4 |
|
$this->_url = parse_url($url); |
141
|
4 |
|
|
142
|
|
|
$this->_url['path'] = ($this->_url['path'] ?? '/') . (isset($this->_url['query']) ? '?' . $this->_url['query'] : ''); |
143
|
|
|
$this->_response['url'] = $this->_url['scheme'] . '://' . $this->_url['host'] . $this->_url['path']; |
144
|
|
|
} |
145
|
4 |
|
|
146
|
2 |
|
// Build out the options for our context stream |
147
|
|
|
$this->_options = [ |
148
|
4 |
|
'ssl' => [ |
149
|
4 |
|
'verify_peer' => false, |
150
|
|
|
'verify_peername' => false |
151
|
4 |
|
], |
152
|
4 |
|
'http' => |
153
|
|
|
[ |
154
|
4 |
|
'method' => 'GET', |
155
|
4 |
|
'max_redirects' => $this->_max_redirect, |
156
|
4 |
|
'ignore_errors' => true, |
157
|
|
|
'protocol_version' => 1.1, |
158
|
|
|
'follow_location' => 1, |
159
|
|
|
'timeout' => 10, |
160
|
|
|
'header' => [ |
161
|
|
|
'Connection: ' . ($this->_keep_alive ? 'Keep-Alive' : 'close'), |
162
|
4 |
|
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931', |
163
|
|
|
'Content-Type: application/x-www-form-urlencoded', |
164
|
|
|
], |
165
|
|
|
] |
166
|
|
|
]; |
167
|
|
|
|
168
|
4 |
|
// Try to limit the body of the response? |
169
|
|
|
if (!empty($this->_user_options['max_length'])) |
170
|
2 |
|
{ |
171
|
2 |
|
$this->_content_length = (int) $this->_user_options['max_length']; |
172
|
2 |
|
$this->_options['http']['header'][] = 'Range: bytes=0-' . ($this->_content_length - 1); |
173
|
|
|
} |
174
|
4 |
|
|
175
|
|
|
if (!empty($this->_post_data)) |
176
|
|
|
{ |
177
|
|
|
$this->_options['http']['method'] = 'POST'; |
178
|
|
|
$this->_options['http']['header'][] = 'Content-Length: ' . strlen($this->_post_data); |
|
|
|
|
179
|
|
|
$this->_options['http']['content'] = $this->_post_data; |
180
|
|
|
} |
181
|
4 |
|
} |
182
|
|
|
|
183
|
|
|
/** |
184
|
|
|
* Connect to the host/port with the steam options defined |
185
|
4 |
|
* |
186
|
4 |
|
* @return bool |
187
|
|
|
*/ |
188
|
|
|
private function _makeRequest() |
189
|
|
|
{ |
190
|
|
|
try |
191
|
|
|
{ |
192
|
|
|
$context = stream_context_create($this->_options); |
193
|
|
|
$this->_fp = fopen($this->_response['url'], 'rb', false, $context); |
194
|
|
|
} |
195
|
4 |
|
catch (Exception $exception) |
196
|
|
|
{ |
197
|
|
|
$this->_response['error'] = $exception->getMessage(); |
198
|
|
|
|
199
|
|
|
return false; |
200
|
|
|
} |
201
|
4 |
|
|
202
|
|
|
return is_resource($this->_fp); |
203
|
|
|
} |
204
|
4 |
|
|
205
|
4 |
|
/** |
206
|
4 |
|
* Fetch the headers and parse the meta data into the results we need |
207
|
4 |
|
*/ |
208
|
|
|
private function _parseRequest() |
209
|
|
|
{ |
210
|
4 |
|
// header information as well as meta data |
211
|
|
|
$headers = stream_get_meta_data($this->_fp); |
212
|
|
|
$this->_response['headers'] = array(); |
213
|
4 |
|
$this->_response['redirects'] = 0; |
214
|
|
|
$this->_response['code'] = '???'; |
215
|
|
|
|
216
|
4 |
|
// Loop and process the headers |
217
|
4 |
|
foreach ($headers['wrapper_data'] as $header) |
218
|
|
|
{ |
219
|
|
|
// Create the final header array |
220
|
4 |
|
$temp = explode(':', $header, 2); |
221
|
|
|
|
222
|
2 |
|
// Normalize / clean |
223
|
|
|
$name = isset($temp[0]) ? strtolower($temp[0]) : ''; |
224
|
|
|
$value = isset($temp[1]) ? trim($temp[1]) : null; |
225
|
|
|
|
226
|
4 |
|
// How many redirects |
227
|
|
|
if ($name === 'location') |
228
|
4 |
|
{ |
229
|
|
|
$this->_response['redirects']++; |
230
|
|
|
} |
231
|
4 |
|
|
232
|
|
|
// Server response is mixed in with the real headers |
233
|
4 |
|
if ($value === null) |
234
|
|
|
{ |
235
|
4 |
|
$this->_response['headers']['status'] = $name; |
236
|
|
|
} |
237
|
|
|
// If its already there overwrite with the new value, unless its a cookie |
238
|
4 |
|
elseif (isset($this->_response['headers'][$name]) && $name === 'set-cookie') |
239
|
|
|
{ |
240
|
|
|
if (is_string($this->_response['headers'][$name])) |
241
|
|
|
{ |
242
|
4 |
|
$this->_response['headers'][$name] = array($this->_response['headers'][$name]); |
243
|
|
|
} |
244
|
|
|
|
245
|
4 |
|
$this->_response['headers'][$name][] = $value; |
246
|
|
|
} |
247
|
|
|
else |
248
|
|
|
{ |
249
|
|
|
$this->_response['headers'][$name] = $value; |
250
|
4 |
|
} |
251
|
|
|
} |
252
|
|
|
} |
253
|
4 |
|
|
254
|
|
|
/** |
255
|
|
|
* Read the response up to the end of the headers |
256
|
2 |
|
*/ |
257
|
|
|
private function _processHeaders() |
258
|
|
|
{ |
259
|
|
|
// Were we redirected, if so lets find out where |
260
|
4 |
|
if (!empty($this->_response['headers']['location'])) |
261
|
|
|
{ |
262
|
|
|
// update $url with where we were ultimately redirected to |
263
|
4 |
|
$this->_response['url'] = $this->_response['headers']['location']; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
// What about our status code? |
267
|
4 |
|
if (!empty($this->_response['headers']['status'])) |
268
|
|
|
{ |
269
|
4 |
|
// Update with last status code found, its for this final navigated point |
270
|
4 |
|
$this->_response['code'] = substr($this->_response['headers']['status'], 9, 3); |
271
|
|
|
} |
272
|
4 |
|
|
273
|
|
|
// Provide a common "valid" return code to the caller |
274
|
|
|
if (in_array($this->_response['code'], array(200, 201, 206))) |
275
|
|
|
{ |
276
|
|
|
$this->_response['code_orig'] = $this->_response['code']; |
277
|
4 |
|
$this->_response['code'] = 200; |
278
|
|
|
} |
279
|
|
|
} |
280
|
4 |
|
|
281
|
|
|
/** |
282
|
|
|
* Fetch the body for the selected site. |
283
|
|
|
*/ |
284
|
|
|
private function _fetchData() |
285
|
|
|
{ |
286
|
4 |
|
// Get the contents of the url |
287
|
|
|
if (!empty($this->_content_length)) |
288
|
|
|
{ |
289
|
4 |
|
$this->_response['body'] = stream_get_contents($this->_fp, $this->_content_length); |
290
|
|
|
} |
291
|
4 |
|
else |
292
|
|
|
{ |
293
|
4 |
|
$this->_response['body'] = stream_get_contents($this->_fp); |
294
|
|
|
} |
295
|
|
|
|
296
|
|
|
fclose($this->_fp); |
297
|
|
|
|
298
|
|
|
$this->_response['size'] = strlen($this->_response['body']); |
299
|
|
|
|
300
|
|
|
return $this->_response['body']; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* Used to return the results to the calling program |
305
|
|
|
* |
306
|
|
|
* What it does: |
307
|
|
|
* |
308
|
4 |
|
* - Called as ->result() will return the full final array |
309
|
|
|
* - Called as ->result('body') to just return the page source of the result |
310
|
|
|
* |
311
|
4 |
|
* @param string $area used to return an area such as body, header, error |
312
|
|
|
* |
313
|
|
|
* @return string|string[] |
314
|
|
|
*/ |
315
|
|
|
public function result($area = '') |
316
|
|
|
{ |
317
|
4 |
|
// Just return a specified area or the entire result? |
318
|
|
|
if ($area === '') |
319
|
|
|
{ |
320
|
|
|
return $this->_response; |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
return $this->_response[$area] ?? $this->_response; |
324
|
|
|
} |
325
|
|
|
} |