|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* This will fetch a web resource http/https and return the headers and page data. It is capable of following |
|
5
|
|
|
* redirects and interpreting chunked data, etc. It will NOT work with ini allow_url_fopen off. |
|
6
|
|
|
* |
|
7
|
|
|
* @package ElkArte Forum |
|
8
|
|
|
* @copyright ElkArte Forum contributors |
|
9
|
|
|
* @license BSD https://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
|
10
|
|
|
* |
|
11
|
|
|
* @version 2.0 dev |
|
12
|
|
|
* |
|
13
|
|
|
*/ |
|
14
|
|
|
|
|
15
|
|
|
namespace ElkArte\Http; |
|
16
|
|
|
|
|
17
|
|
|
use Exception; |
|
18
|
|
|
|
|
19
|
|
|
/** |
|
20
|
|
|
* Class StreamFetchWebdata |
|
21
|
|
|
* |
|
22
|
|
|
* @package ElkArte |
|
23
|
|
|
*/ |
|
24
|
|
|
class StreamFetchWebdata |
|
25
|
|
|
{ |
|
26
|
|
|
/** @var bool Use the same connection on redirects */ |
|
27
|
|
|
private $_keep_alive; |
|
28
|
|
|
|
|
29
|
|
|
/** @var int Holds the passed or default value for redirects */ |
|
30
|
|
|
private $_max_redirect; |
|
31
|
|
|
|
|
32
|
|
|
/** @var int how much we will read */ |
|
33
|
|
|
private $_content_length = 0; |
|
34
|
|
|
|
|
35
|
|
|
/** @var array the parsed url with host, port, path, etc */ |
|
36
|
|
|
private $_url = []; |
|
37
|
|
|
|
|
38
|
|
|
/** @var null|resource the fopen resource */ |
|
39
|
|
|
private $_fp; |
|
40
|
|
|
|
|
41
|
|
|
/** @var array Holds the passed user options array (only option is max_length) */ |
|
42
|
|
|
private $_user_options; |
|
43
|
|
|
|
|
44
|
|
|
/** @var string|string[] Holds any data that will be posted to a form */ |
|
45
|
|
|
private $_post_data = ''; |
|
46
|
|
|
|
|
47
|
|
|
/** @var string[] Holds the response to the request, headers, data, code */ |
|
48
|
|
|
private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => '']; |
|
49
|
|
|
|
|
50
|
|
|
/** @var array the context options for the stream */ |
|
51
|
|
|
private $_options = []; |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* StreamFetchWebdata constructor. |
|
55
|
|
|
* |
|
56
|
|
|
* @param array $options |
|
57
|
|
|
* @param int $max_redirect |
|
58
|
4 |
|
* @param bool $keep_alive |
|
59
|
|
|
*/ |
|
60
|
|
|
public function __construct($options = [], $max_redirect = 3, $keep_alive = false) |
|
61
|
4 |
|
{ |
|
62
|
4 |
|
// Initialize class variables |
|
63
|
4 |
|
$this->_max_redirect = (int) $max_redirect; |
|
64
|
4 |
|
$this->_user_options = $options; |
|
65
|
|
|
$this->_keep_alive = $keep_alive; |
|
66
|
|
|
} |
|
67
|
|
|
|
|
68
|
|
|
/** |
|
69
|
|
|
* Prepares any post data supplied and then makes the request for data |
|
70
|
|
|
* |
|
71
|
|
|
* @param string $url |
|
72
|
4 |
|
* @param string|string[] $post_data |
|
73
|
|
|
*/ |
|
74
|
|
|
public function get_url_data($url, $post_data = ''): void |
|
75
|
4 |
|
{ |
|
76
|
|
|
// Prepare any given post data |
|
77
|
2 |
|
if (!empty($post_data)) |
|
78
|
|
|
{ |
|
79
|
2 |
|
if (is_array($post_data)) |
|
80
|
|
|
{ |
|
81
|
|
|
$this->_post_data = http_build_query($post_data, '', '&'); |
|
82
|
|
|
} |
|
83
|
|
|
else |
|
84
|
|
|
{ |
|
85
|
|
|
$this->_post_data = http_build_query([trim($post_data)], '', '&'); |
|
86
|
|
|
} |
|
87
|
|
|
} |
|
88
|
4 |
|
|
|
89
|
4 |
|
// Set the options and get it |
|
90
|
|
|
$this->_openRequest($url); |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
/** |
|
94
|
|
|
* Makes the actual data call |
|
95
|
|
|
* |
|
96
|
|
|
* What it does |
|
97
|
|
|
* - Calls setOptions to build the stream context array |
|
98
|
|
|
* - Makes the data request and parses the results |
|
99
|
|
|
* |
|
100
|
|
|
* @param string $url site to fetch |
|
101
|
|
|
* |
|
102
|
4 |
|
* @return bool |
|
103
|
|
|
*/ |
|
104
|
|
|
private function _openRequest($url): bool |
|
105
|
4 |
|
{ |
|
106
|
|
|
// Build the stream options array |
|
107
|
|
|
$this->_setOptions($url); |
|
108
|
4 |
|
|
|
109
|
|
|
// We do have a url I hope |
|
110
|
|
|
if (empty($this->_url)) |
|
111
|
|
|
{ |
|
112
|
|
|
return false; |
|
113
|
|
|
} |
|
114
|
4 |
|
|
|
115
|
|
|
// I want this, from there, and I'm not going to be bothering you for more (probably.) |
|
116
|
4 |
|
if ($this->_makeRequest()) |
|
117
|
4 |
|
{ |
|
118
|
|
|
$this->_parseRequest(); |
|
119
|
4 |
|
$this->_processHeaders(); |
|
120
|
|
|
|
|
121
|
|
|
return $this->_fetchData(); |
|
122
|
|
|
} |
|
123
|
|
|
|
|
124
|
|
|
return false; |
|
125
|
|
|
} |
|
126
|
|
|
|
|
127
|
|
|
/** |
|
128
|
|
|
* Prepares the options needed from this request |
|
129
|
|
|
* |
|
130
|
4 |
|
* @param string $url |
|
131
|
|
|
*/ |
|
132
|
4 |
|
private function _setOptions($url): void |
|
133
|
|
|
{ |
|
134
|
|
|
$this->_url = []; |
|
135
|
4 |
|
|
|
136
|
|
|
// Ensure the url is valid |
|
137
|
|
|
if (filter_var($url, FILTER_VALIDATE_URL)) |
|
138
|
4 |
|
{ |
|
139
|
|
|
// Get the elements for the url |
|
140
|
4 |
|
$this->_url = parse_url($url); |
|
141
|
4 |
|
|
|
142
|
|
|
$this->_url['path'] = ($this->_url['path'] ?? '/') . (isset($this->_url['query']) ? '?' . $this->_url['query'] : ''); |
|
143
|
|
|
$this->_response['url'] = $this->_url['scheme'] . '://' . $this->_url['host'] . $this->_url['path']; |
|
144
|
|
|
} |
|
145
|
4 |
|
|
|
146
|
2 |
|
// Build out the options for our context stream |
|
147
|
|
|
$this->_options = [ |
|
148
|
4 |
|
'ssl' => [ |
|
149
|
4 |
|
'verify_peer' => false, |
|
150
|
|
|
'verify_peername' => false |
|
151
|
4 |
|
], |
|
152
|
4 |
|
'http' => |
|
153
|
|
|
[ |
|
154
|
4 |
|
'method' => 'GET', |
|
155
|
4 |
|
'max_redirects' => $this->_max_redirect, |
|
156
|
4 |
|
'ignore_errors' => true, |
|
157
|
|
|
'protocol_version' => 1.1, |
|
158
|
|
|
'follow_location' => 1, |
|
159
|
|
|
'timeout' => 10, |
|
160
|
|
|
'header' => [ |
|
161
|
|
|
'Connection: ' . ($this->_keep_alive ? 'Keep-Alive' : 'close'), |
|
162
|
4 |
|
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931', |
|
163
|
|
|
'Content-Type: application/x-www-form-urlencoded', |
|
164
|
|
|
], |
|
165
|
|
|
] |
|
166
|
|
|
]; |
|
167
|
|
|
|
|
168
|
4 |
|
// Try to limit the body of the response? |
|
169
|
|
|
if (!empty($this->_user_options['max_length'])) |
|
170
|
2 |
|
{ |
|
171
|
2 |
|
$this->_content_length = (int) $this->_user_options['max_length']; |
|
172
|
2 |
|
$this->_options['http']['header'][] = 'Range: bytes=0-' . ($this->_content_length - 1); |
|
173
|
|
|
} |
|
174
|
4 |
|
|
|
175
|
|
|
if (!empty($this->_post_data)) |
|
176
|
|
|
{ |
|
177
|
|
|
$this->_options['http']['method'] = 'POST'; |
|
178
|
|
|
$this->_options['http']['header'][] = 'Content-Length: ' . strlen($this->_post_data); |
|
|
|
|
|
|
179
|
|
|
$this->_options['http']['content'] = $this->_post_data; |
|
180
|
|
|
} |
|
181
|
4 |
|
} |
|
182
|
|
|
|
|
183
|
|
|
/** |
|
184
|
|
|
* Connect to the host/port with the steam options defined |
|
185
|
4 |
|
* |
|
186
|
4 |
|
* @return bool |
|
187
|
|
|
*/ |
|
188
|
|
|
private function _makeRequest(): bool |
|
189
|
|
|
{ |
|
190
|
|
|
try |
|
191
|
|
|
{ |
|
192
|
|
|
$context = stream_context_create($this->_options); |
|
193
|
|
|
$this->_fp = fopen($this->_response['url'], 'rb', false, $context); |
|
194
|
|
|
} |
|
195
|
4 |
|
catch (Exception $exception) |
|
196
|
|
|
{ |
|
197
|
|
|
$this->_response['error'] = $exception->getMessage(); |
|
198
|
|
|
|
|
199
|
|
|
return false; |
|
200
|
|
|
} |
|
201
|
4 |
|
|
|
202
|
|
|
return is_resource($this->_fp); |
|
203
|
|
|
} |
|
204
|
4 |
|
|
|
205
|
4 |
|
/** |
|
206
|
4 |
|
* Fetch the headers and parse the meta data into the results we need |
|
207
|
4 |
|
*/ |
|
208
|
|
|
private function _parseRequest(): void |
|
209
|
|
|
{ |
|
210
|
4 |
|
// header information as well as meta data |
|
211
|
|
|
$headers = stream_get_meta_data($this->_fp); |
|
212
|
|
|
$this->_response['headers'] = []; |
|
213
|
4 |
|
$this->_response['redirects'] = 0; |
|
214
|
|
|
$this->_response['code'] = '???'; |
|
215
|
|
|
|
|
216
|
4 |
|
// Loop and process the headers |
|
217
|
4 |
|
foreach ($headers['wrapper_data'] as $header) |
|
218
|
|
|
{ |
|
219
|
|
|
// Create the final header array |
|
220
|
4 |
|
$temp = explode(':', $header, 2); |
|
221
|
|
|
|
|
222
|
2 |
|
// Normalize / clean |
|
223
|
|
|
$name = isset($temp[0]) ? strtolower($temp[0]) : ''; |
|
224
|
|
|
$value = isset($temp[1]) ? trim($temp[1]) : null; |
|
225
|
|
|
|
|
226
|
4 |
|
// How many redirects |
|
227
|
|
|
if ($name === 'location') |
|
228
|
4 |
|
{ |
|
229
|
|
|
$this->_response['redirects']++; |
|
230
|
|
|
} |
|
231
|
4 |
|
|
|
232
|
|
|
// Server response is mixed in with the real headers |
|
233
|
4 |
|
if ($value === null) |
|
234
|
|
|
{ |
|
235
|
4 |
|
$this->_response['headers']['status'] = $name; |
|
236
|
|
|
} |
|
237
|
|
|
// If its already there overwrite with the new value, unless its a cookie |
|
238
|
4 |
|
elseif (isset($this->_response['headers'][$name]) && $name === 'set-cookie') |
|
239
|
|
|
{ |
|
240
|
|
|
if (is_string($this->_response['headers'][$name])) |
|
241
|
|
|
{ |
|
242
|
4 |
|
$this->_response['headers'][$name] = [$this->_response['headers'][$name]]; |
|
243
|
|
|
} |
|
244
|
|
|
|
|
245
|
4 |
|
$this->_response['headers'][$name][] = $value; |
|
246
|
|
|
} |
|
247
|
|
|
else |
|
248
|
|
|
{ |
|
249
|
|
|
$this->_response['headers'][$name] = $value; |
|
250
|
4 |
|
} |
|
251
|
|
|
} |
|
252
|
|
|
} |
|
253
|
4 |
|
|
|
254
|
|
|
/** |
|
255
|
|
|
* Read the response up to the end of the headers |
|
256
|
2 |
|
*/ |
|
257
|
|
|
private function _processHeaders(): void |
|
258
|
|
|
{ |
|
259
|
|
|
// Were we redirected, if so lets find out where |
|
260
|
4 |
|
if (!empty($this->_response['headers']['location'])) |
|
261
|
|
|
{ |
|
262
|
|
|
// update $url with where we were ultimately redirected to |
|
263
|
4 |
|
$this->_response['url'] = $this->_response['headers']['location']; |
|
264
|
|
|
} |
|
265
|
|
|
|
|
266
|
|
|
// What about our status code? |
|
267
|
4 |
|
if (!empty($this->_response['headers']['status'])) |
|
268
|
|
|
{ |
|
269
|
4 |
|
// Update with last status code found, its for this final navigated point |
|
270
|
4 |
|
$this->_response['code'] = (int) substr($this->_response['headers']['status'], 9, 3); |
|
271
|
|
|
} |
|
272
|
4 |
|
|
|
273
|
|
|
// Provide a common "valid" return code to the caller |
|
274
|
|
|
if (in_array($this->_response['code'], [200, 201, 206])) |
|
275
|
|
|
{ |
|
276
|
|
|
$this->_response['code_orig'] = $this->_response['code']; |
|
277
|
4 |
|
$this->_response['code'] = 200; |
|
278
|
|
|
} |
|
279
|
|
|
} |
|
280
|
4 |
|
|
|
281
|
|
|
/** |
|
282
|
|
|
* Fetch the body for the selected site. |
|
283
|
|
|
*/ |
|
284
|
|
|
private function _fetchData() |
|
285
|
|
|
{ |
|
286
|
4 |
|
// Get the contents of the url |
|
287
|
|
|
if (!empty($this->_content_length)) |
|
288
|
|
|
{ |
|
289
|
4 |
|
$this->_response['body'] = stream_get_contents($this->_fp, $this->_content_length); |
|
290
|
|
|
} |
|
291
|
4 |
|
else |
|
292
|
|
|
{ |
|
293
|
4 |
|
$this->_response['body'] = stream_get_contents($this->_fp); |
|
294
|
|
|
} |
|
295
|
|
|
|
|
296
|
|
|
fclose($this->_fp); |
|
297
|
|
|
|
|
298
|
|
|
$this->_response['size'] = strlen($this->_response['body']); |
|
299
|
|
|
|
|
300
|
|
|
return $this->_response['body']; |
|
301
|
|
|
} |
|
302
|
|
|
|
|
303
|
|
|
/** |
|
304
|
|
|
* Used to return the results to the calling program |
|
305
|
|
|
* |
|
306
|
|
|
* What it does: |
|
307
|
|
|
* |
|
308
|
4 |
|
* - Called as ->result() will return the full final array |
|
309
|
|
|
* - Called as ->result('body') to just return the page source of the result |
|
310
|
|
|
* |
|
311
|
4 |
|
* @param string $area used to return an area such as body, header, error |
|
312
|
|
|
* |
|
313
|
|
|
* @return string|string[] |
|
314
|
|
|
*/ |
|
315
|
|
|
public function result($area = '') |
|
316
|
|
|
{ |
|
317
|
4 |
|
// Just return a specified area or the entire result? |
|
318
|
|
|
if ($area === '') |
|
319
|
|
|
{ |
|
320
|
|
|
return $this->_response; |
|
321
|
|
|
} |
|
322
|
|
|
|
|
323
|
|
|
return $this->_response[$area] ?? $this->_response; |
|
324
|
|
|
} |
|
325
|
|
|
} |
|
326
|
|
|
|