1 | <?php |
||
2 | |||
3 | /** |
||
4 | * This will fetch a web resource http/https and return the headers and page data. It is capable of following |
||
5 | * redirects and interpreting chunked data, etc. It will NOT work with ini allow_url_fopen off. |
||
6 | * |
||
7 | * @package ElkArte Forum |
||
8 | * @copyright ElkArte Forum contributors |
||
9 | * @license BSD https://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
||
10 | * |
||
11 | * @version 2.0 dev |
||
12 | * |
||
13 | */ |
||
14 | |||
15 | namespace ElkArte\Http; |
||
16 | |||
17 | use Exception; |
||
18 | |||
19 | /** |
||
20 | * Class StreamFetchWebdata |
||
21 | * |
||
22 | * @package ElkArte |
||
23 | */ |
||
24 | class StreamFetchWebdata |
||
25 | { |
||
26 | /** @var bool Use the same connection on redirects */ |
||
27 | private $_keep_alive; |
||
28 | |||
29 | /** @var int Holds the passed or default value for redirects */ |
||
30 | private $_max_redirect; |
||
31 | |||
32 | /** @var int how much we will read */ |
||
33 | private $_content_length = 0; |
||
34 | |||
35 | /** @var array the parsed url with host, port, path, etc */ |
||
36 | private $_url = []; |
||
37 | |||
38 | /** @var null|resource the fopen resource */ |
||
39 | private $_fp; |
||
40 | |||
41 | /** @var array Holds the passed user options array (only option is max_length) */ |
||
42 | private $_user_options; |
||
43 | |||
44 | /** @var string|string[] Holds any data that will be posted to a form */ |
||
45 | private $_post_data = ''; |
||
46 | |||
47 | /** @var string[] Holds the response to the request, headers, data, code */ |
||
48 | private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => '']; |
||
49 | |||
50 | /** @var array the context options for the stream */ |
||
51 | private $_options = []; |
||
52 | |||
53 | /** |
||
54 | * StreamFetchWebdata constructor. |
||
55 | * |
||
56 | * @param array $options |
||
57 | * @param int $max_redirect |
||
58 | 4 | * @param bool $keep_alive |
|
59 | */ |
||
60 | public function __construct($options = [], $max_redirect = 3, $keep_alive = false) |
||
61 | 4 | { |
|
62 | 4 | // Initialize class variables |
|
63 | 4 | $this->_max_redirect = (int) $max_redirect; |
|
64 | 4 | $this->_user_options = $options; |
|
65 | $this->_keep_alive = $keep_alive; |
||
66 | } |
||
67 | |||
68 | /** |
||
69 | * Prepares any post data supplied and then makes the request for data |
||
70 | * |
||
71 | * @param string $url |
||
72 | 4 | * @param string|string[] $post_data |
|
73 | */ |
||
74 | public function get_url_data($url, $post_data = '') |
||
75 | 4 | { |
|
76 | // Prepare any given post data |
||
77 | 2 | if (!empty($post_data)) |
|
78 | { |
||
79 | 2 | if (is_array($post_data)) |
|
80 | { |
||
81 | $this->_post_data = http_build_query($post_data, '', '&'); |
||
82 | } |
||
83 | else |
||
84 | { |
||
85 | $this->_post_data = http_build_query([trim($post_data)], '', '&'); |
||
86 | } |
||
87 | } |
||
88 | 4 | ||
89 | 4 | // Set the options and get it |
|
90 | $this->_openRequest($url); |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * Makes the actual data call |
||
95 | * |
||
96 | * What it does |
||
97 | * - Calls setOptions to build the stream context array |
||
98 | * - Makes the data request and parses the results |
||
99 | * |
||
100 | * @param string $url site to fetch |
||
101 | * |
||
102 | 4 | * @return bool |
|
103 | */ |
||
104 | private function _openRequest($url) |
||
105 | 4 | { |
|
106 | // Build the stream options array |
||
107 | $this->_setOptions($url); |
||
108 | 4 | ||
109 | // We do have a url I hope |
||
110 | if (empty($this->_url)) |
||
111 | { |
||
112 | return false; |
||
113 | } |
||
114 | 4 | ||
115 | // I want this, from there, and I'm not going to be bothering you for more (probably.) |
||
116 | 4 | if ($this->_makeRequest()) |
|
117 | 4 | { |
|
118 | $this->_parseRequest(); |
||
119 | 4 | $this->_processHeaders(); |
|
120 | |||
121 | return $this->_fetchData(); |
||
122 | } |
||
123 | |||
124 | return false; |
||
125 | } |
||
126 | |||
127 | /** |
||
128 | * Prepares the options needed from this request |
||
129 | * |
||
130 | 4 | * @param string $url |
|
131 | */ |
||
132 | 4 | private function _setOptions($url) |
|
133 | { |
||
134 | $this->_url = []; |
||
135 | 4 | ||
136 | // Ensure the url is valid |
||
137 | if (filter_var($url, FILTER_VALIDATE_URL)) |
||
138 | 4 | { |
|
139 | // Get the elements for the url |
||
140 | 4 | $this->_url = parse_url($url); |
|
141 | 4 | ||
142 | $this->_url['path'] = ($this->_url['path'] ?? '/') . (isset($this->_url['query']) ? '?' . $this->_url['query'] : ''); |
||
143 | $this->_response['url'] = $this->_url['scheme'] . '://' . $this->_url['host'] . $this->_url['path']; |
||
144 | } |
||
145 | 4 | ||
146 | 2 | // Build out the options for our context stream |
|
147 | $this->_options = [ |
||
148 | 4 | 'ssl' => [ |
|
149 | 4 | 'verify_peer' => false, |
|
150 | 'verify_peername' => false |
||
151 | 4 | ], |
|
152 | 4 | 'http' => |
|
153 | [ |
||
154 | 4 | 'method' => 'GET', |
|
155 | 4 | 'max_redirects' => $this->_max_redirect, |
|
156 | 4 | 'ignore_errors' => true, |
|
157 | 'protocol_version' => 1.1, |
||
158 | 'follow_location' => 1, |
||
159 | 'timeout' => 10, |
||
160 | 'header' => [ |
||
161 | 'Connection: ' . ($this->_keep_alive ? 'Keep-Alive' : 'close'), |
||
162 | 4 | 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931', |
|
163 | 'Content-Type: application/x-www-form-urlencoded', |
||
164 | ], |
||
165 | ] |
||
166 | ]; |
||
167 | |||
168 | 4 | // Try to limit the body of the response? |
|
169 | if (!empty($this->_user_options['max_length'])) |
||
170 | 2 | { |
|
171 | 2 | $this->_content_length = (int) $this->_user_options['max_length']; |
|
172 | 2 | $this->_options['http']['header'][] = 'Range: bytes=0-' . ($this->_content_length - 1); |
|
173 | } |
||
174 | 4 | ||
175 | if (!empty($this->_post_data)) |
||
176 | { |
||
177 | $this->_options['http']['method'] = 'POST'; |
||
178 | $this->_options['http']['header'][] = 'Content-Length: ' . strlen($this->_post_data); |
||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
179 | $this->_options['http']['content'] = $this->_post_data; |
||
180 | } |
||
181 | 4 | } |
|
182 | |||
183 | /** |
||
184 | * Connect to the host/port with the steam options defined |
||
185 | 4 | * |
|
186 | 4 | * @return bool |
|
187 | */ |
||
188 | private function _makeRequest() |
||
189 | { |
||
190 | try |
||
191 | { |
||
192 | $context = stream_context_create($this->_options); |
||
193 | $this->_fp = fopen($this->_response['url'], 'rb', false, $context); |
||
194 | } |
||
195 | 4 | catch (Exception $exception) |
|
196 | { |
||
197 | $this->_response['error'] = $exception->getMessage(); |
||
198 | |||
199 | return false; |
||
200 | } |
||
201 | 4 | ||
202 | return is_resource($this->_fp); |
||
203 | } |
||
204 | 4 | ||
205 | 4 | /** |
|
206 | 4 | * Fetch the headers and parse the meta data into the results we need |
|
207 | 4 | */ |
|
208 | private function _parseRequest() |
||
209 | { |
||
210 | 4 | // header information as well as meta data |
|
211 | $headers = stream_get_meta_data($this->_fp); |
||
212 | $this->_response['headers'] = array(); |
||
213 | 4 | $this->_response['redirects'] = 0; |
|
214 | $this->_response['code'] = '???'; |
||
215 | |||
216 | 4 | // Loop and process the headers |
|
217 | 4 | foreach ($headers['wrapper_data'] as $header) |
|
218 | { |
||
219 | // Create the final header array |
||
220 | 4 | $temp = explode(':', $header, 2); |
|
221 | |||
222 | 2 | // Normalize / clean |
|
223 | $name = isset($temp[0]) ? strtolower($temp[0]) : ''; |
||
224 | $value = isset($temp[1]) ? trim($temp[1]) : null; |
||
225 | |||
226 | 4 | // How many redirects |
|
227 | if ($name === 'location') |
||
228 | 4 | { |
|
229 | $this->_response['redirects']++; |
||
230 | } |
||
231 | 4 | ||
232 | // Server response is mixed in with the real headers |
||
233 | 4 | if ($value === null) |
|
234 | { |
||
235 | 4 | $this->_response['headers']['status'] = $name; |
|
236 | } |
||
237 | // If its already there overwrite with the new value, unless its a cookie |
||
238 | 4 | elseif (isset($this->_response['headers'][$name]) && $name === 'set-cookie') |
|
239 | { |
||
240 | if (is_string($this->_response['headers'][$name])) |
||
241 | { |
||
242 | 4 | $this->_response['headers'][$name] = array($this->_response['headers'][$name]); |
|
243 | } |
||
244 | |||
245 | 4 | $this->_response['headers'][$name][] = $value; |
|
246 | } |
||
247 | else |
||
248 | { |
||
249 | $this->_response['headers'][$name] = $value; |
||
250 | 4 | } |
|
251 | } |
||
252 | } |
||
253 | 4 | ||
254 | /** |
||
255 | * Read the response up to the end of the headers |
||
256 | 2 | */ |
|
257 | private function _processHeaders() |
||
258 | { |
||
259 | // Were we redirected, if so lets find out where |
||
260 | 4 | if (!empty($this->_response['headers']['location'])) |
|
261 | { |
||
262 | // update $url with where we were ultimately redirected to |
||
263 | 4 | $this->_response['url'] = $this->_response['headers']['location']; |
|
264 | } |
||
265 | |||
266 | // What about our status code? |
||
267 | 4 | if (!empty($this->_response['headers']['status'])) |
|
268 | { |
||
269 | 4 | // Update with last status code found, its for this final navigated point |
|
270 | 4 | $this->_response['code'] = substr($this->_response['headers']['status'], 9, 3); |
|
271 | } |
||
272 | 4 | ||
273 | // Provide a common "valid" return code to the caller |
||
274 | if (in_array($this->_response['code'], array(200, 201, 206))) |
||
275 | { |
||
276 | $this->_response['code_orig'] = $this->_response['code']; |
||
277 | 4 | $this->_response['code'] = 200; |
|
278 | } |
||
279 | } |
||
280 | 4 | ||
281 | /** |
||
282 | * Fetch the body for the selected site. |
||
283 | */ |
||
284 | private function _fetchData() |
||
285 | { |
||
286 | 4 | // Get the contents of the url |
|
287 | if (!empty($this->_content_length)) |
||
288 | { |
||
289 | 4 | $this->_response['body'] = stream_get_contents($this->_fp, $this->_content_length); |
|
290 | } |
||
291 | 4 | else |
|
292 | { |
||
293 | 4 | $this->_response['body'] = stream_get_contents($this->_fp); |
|
294 | } |
||
295 | |||
296 | fclose($this->_fp); |
||
297 | |||
298 | $this->_response['size'] = strlen($this->_response['body']); |
||
299 | |||
300 | return $this->_response['body']; |
||
301 | } |
||
302 | |||
303 | /** |
||
304 | * Used to return the results to the calling program |
||
305 | * |
||
306 | * What it does: |
||
307 | * |
||
308 | 4 | * - Called as ->result() will return the full final array |
|
309 | * - Called as ->result('body') to just return the page source of the result |
||
310 | * |
||
311 | 4 | * @param string $area used to return an area such as body, header, error |
|
312 | * |
||
313 | * @return string|string[] |
||
314 | */ |
||
315 | public function result($area = '') |
||
316 | { |
||
317 | 4 | // Just return a specified area or the entire result? |
|
318 | if ($area === '') |
||
319 | { |
||
320 | return $this->_response; |
||
321 | } |
||
322 | |||
323 | return $this->_response[$area] ?? $this->_response; |
||
324 | } |
||
325 | } |