elkarte /
Elkarte
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * This will fetch a web resource http/https and return the headers and page data. It is capable of following |
||
| 5 | * redirects and interpreting chunked data, etc. It will NOT work with ini allow_url_fopen off. |
||
| 6 | * |
||
| 7 | * @package ElkArte Forum |
||
| 8 | * @copyright ElkArte Forum contributors |
||
| 9 | * @license BSD https://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
||
| 10 | * |
||
| 11 | * @version 2.0 dev |
||
| 12 | * |
||
| 13 | */ |
||
| 14 | |||
| 15 | namespace ElkArte\Http; |
||
| 16 | |||
| 17 | use Exception; |
||
| 18 | |||
| 19 | /** |
||
| 20 | * Class StreamFetchWebdata |
||
| 21 | * |
||
| 22 | * @package ElkArte |
||
| 23 | */ |
||
| 24 | class StreamFetchWebdata |
||
| 25 | { |
||
| 26 | /** @var bool Use the same connection on redirects */ |
||
| 27 | private $_keep_alive; |
||
| 28 | |||
| 29 | /** @var int Holds the passed or default value for redirects */ |
||
| 30 | private $_max_redirect; |
||
| 31 | |||
| 32 | /** @var int how much we will read */ |
||
| 33 | private $_content_length = 0; |
||
| 34 | |||
| 35 | /** @var array the parsed url with host, port, path, etc */ |
||
| 36 | private $_url = []; |
||
| 37 | |||
| 38 | /** @var null|resource the fopen resource */ |
||
| 39 | private $_fp; |
||
| 40 | |||
| 41 | /** @var array Holds the passed user options array (only option is max_length) */ |
||
| 42 | private $_user_options; |
||
| 43 | |||
| 44 | /** @var string|string[] Holds any data that will be posted to a form */ |
||
| 45 | private $_post_data = ''; |
||
| 46 | |||
| 47 | /** @var string[] Holds the response to the request, headers, data, code */ |
||
| 48 | private $_response = ['url' => '', 'code' => 404, 'error' => '', 'redirects' => 0, 'size' => 0, 'headers' => [], 'body' => '']; |
||
| 49 | |||
| 50 | /** @var array the context options for the stream */ |
||
| 51 | private $_options = []; |
||
| 52 | |||
| 53 | /** |
||
| 54 | * StreamFetchWebdata constructor. |
||
| 55 | * |
||
| 56 | * @param array $options |
||
| 57 | * @param int $max_redirect |
||
| 58 | 4 | * @param bool $keep_alive |
|
| 59 | */ |
||
| 60 | public function __construct($options = [], $max_redirect = 3, $keep_alive = false) |
||
| 61 | 4 | { |
|
| 62 | 4 | // Initialize class variables |
|
| 63 | 4 | $this->_max_redirect = (int) $max_redirect; |
|
| 64 | 4 | $this->_user_options = $options; |
|
| 65 | $this->_keep_alive = $keep_alive; |
||
| 66 | } |
||
| 67 | |||
| 68 | /** |
||
| 69 | * Prepares any post data supplied and then makes the request for data |
||
| 70 | * |
||
| 71 | * @param string $url |
||
| 72 | 4 | * @param string|string[] $post_data |
|
| 73 | */ |
||
| 74 | public function get_url_data($url, $post_data = ''): void |
||
| 75 | 4 | { |
|
| 76 | // Prepare any given post data |
||
| 77 | 2 | if (!empty($post_data)) |
|
| 78 | { |
||
| 79 | 2 | if (is_array($post_data)) |
|
| 80 | { |
||
| 81 | $this->_post_data = http_build_query($post_data, '', '&'); |
||
| 82 | } |
||
| 83 | else |
||
| 84 | { |
||
| 85 | $this->_post_data = http_build_query([trim($post_data)], '', '&'); |
||
| 86 | } |
||
| 87 | } |
||
| 88 | 4 | ||
| 89 | 4 | // Set the options and get it |
|
| 90 | $this->_openRequest($url); |
||
| 91 | } |
||
| 92 | |||
| 93 | /** |
||
| 94 | * Makes the actual data call |
||
| 95 | * |
||
| 96 | * What it does |
||
| 97 | * - Calls setOptions to build the stream context array |
||
| 98 | * - Makes the data request and parses the results |
||
| 99 | * |
||
| 100 | * @param string $url site to fetch |
||
| 101 | * |
||
| 102 | 4 | * @return bool |
|
| 103 | */ |
||
| 104 | private function _openRequest($url): bool |
||
| 105 | 4 | { |
|
| 106 | // Build the stream options array |
||
| 107 | $this->_setOptions($url); |
||
| 108 | 4 | ||
| 109 | // We do have a url I hope |
||
| 110 | if (empty($this->_url)) |
||
| 111 | { |
||
| 112 | return false; |
||
| 113 | } |
||
| 114 | 4 | ||
| 115 | // I want this, from there, and I'm not going to be bothering you for more (probably.) |
||
| 116 | 4 | if ($this->_makeRequest()) |
|
| 117 | 4 | { |
|
| 118 | $this->_parseRequest(); |
||
| 119 | 4 | $this->_processHeaders(); |
|
| 120 | |||
| 121 | return $this->_fetchData(); |
||
| 122 | } |
||
| 123 | |||
| 124 | return false; |
||
| 125 | } |
||
| 126 | |||
| 127 | /** |
||
| 128 | * Prepares the options needed from this request |
||
| 129 | * |
||
| 130 | 4 | * @param string $url |
|
| 131 | */ |
||
| 132 | 4 | private function _setOptions($url): void |
|
| 133 | { |
||
| 134 | $this->_url = []; |
||
| 135 | 4 | ||
| 136 | // Ensure the url is valid |
||
| 137 | if (filter_var($url, FILTER_VALIDATE_URL)) |
||
| 138 | 4 | { |
|
| 139 | // Get the elements for the url |
||
| 140 | 4 | $this->_url = parse_url($url); |
|
| 141 | 4 | ||
| 142 | $this->_url['path'] = ($this->_url['path'] ?? '/') . (isset($this->_url['query']) ? '?' . $this->_url['query'] : ''); |
||
| 143 | $this->_response['url'] = $this->_url['scheme'] . '://' . $this->_url['host'] . $this->_url['path']; |
||
| 144 | } |
||
| 145 | 4 | ||
| 146 | 2 | // Build out the options for our context stream |
|
| 147 | $this->_options = [ |
||
| 148 | 4 | 'ssl' => [ |
|
| 149 | 4 | 'verify_peer' => false, |
|
| 150 | 'verify_peername' => false |
||
| 151 | 4 | ], |
|
| 152 | 4 | 'http' => |
|
| 153 | [ |
||
| 154 | 4 | 'method' => 'GET', |
|
| 155 | 4 | 'max_redirects' => $this->_max_redirect, |
|
| 156 | 4 | 'ignore_errors' => true, |
|
| 157 | 'protocol_version' => 1.1, |
||
| 158 | 'follow_location' => 1, |
||
| 159 | 'timeout' => 10, |
||
| 160 | 'header' => [ |
||
| 161 | 'Connection: ' . ($this->_keep_alive ? 'Keep-Alive' : 'close'), |
||
| 162 | 4 | 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14931', |
|
| 163 | 'Content-Type: application/x-www-form-urlencoded', |
||
| 164 | ], |
||
| 165 | ] |
||
| 166 | ]; |
||
| 167 | |||
| 168 | 4 | // Try to limit the body of the response? |
|
| 169 | if (!empty($this->_user_options['max_length'])) |
||
| 170 | 2 | { |
|
| 171 | 2 | $this->_content_length = (int) $this->_user_options['max_length']; |
|
| 172 | 2 | $this->_options['http']['header'][] = 'Range: bytes=0-' . ($this->_content_length - 1); |
|
| 173 | } |
||
| 174 | 4 | ||
| 175 | if (!empty($this->_post_data)) |
||
| 176 | { |
||
| 177 | $this->_options['http']['method'] = 'POST'; |
||
| 178 | $this->_options['http']['header'][] = 'Content-Length: ' . strlen($this->_post_data); |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 179 | $this->_options['http']['content'] = $this->_post_data; |
||
| 180 | } |
||
| 181 | 4 | } |
|
| 182 | |||
| 183 | /** |
||
| 184 | * Connect to the host/port with the steam options defined |
||
| 185 | 4 | * |
|
| 186 | 4 | * @return bool |
|
| 187 | */ |
||
| 188 | private function _makeRequest(): bool |
||
| 189 | { |
||
| 190 | try |
||
| 191 | { |
||
| 192 | $context = stream_context_create($this->_options); |
||
| 193 | $this->_fp = fopen($this->_response['url'], 'rb', false, $context); |
||
| 194 | } |
||
| 195 | 4 | catch (Exception $exception) |
|
| 196 | { |
||
| 197 | $this->_response['error'] = $exception->getMessage(); |
||
| 198 | |||
| 199 | return false; |
||
| 200 | } |
||
| 201 | 4 | ||
| 202 | return is_resource($this->_fp); |
||
| 203 | } |
||
| 204 | 4 | ||
| 205 | 4 | /** |
|
| 206 | 4 | * Fetch the headers and parse the meta data into the results we need |
|
| 207 | 4 | */ |
|
| 208 | private function _parseRequest(): void |
||
| 209 | { |
||
| 210 | 4 | // header information as well as meta data |
|
| 211 | $headers = stream_get_meta_data($this->_fp); |
||
| 212 | $this->_response['headers'] = []; |
||
| 213 | 4 | $this->_response['redirects'] = 0; |
|
| 214 | $this->_response['code'] = '???'; |
||
| 215 | |||
| 216 | 4 | // Loop and process the headers |
|
| 217 | 4 | foreach ($headers['wrapper_data'] as $header) |
|
| 218 | { |
||
| 219 | // Create the final header array |
||
| 220 | 4 | $temp = explode(':', $header, 2); |
|
| 221 | |||
| 222 | 2 | // Normalize / clean |
|
| 223 | $name = isset($temp[0]) ? strtolower($temp[0]) : ''; |
||
| 224 | $value = isset($temp[1]) ? trim($temp[1]) : null; |
||
| 225 | |||
| 226 | 4 | // How many redirects |
|
| 227 | if ($name === 'location') |
||
| 228 | 4 | { |
|
| 229 | $this->_response['redirects']++; |
||
| 230 | } |
||
| 231 | 4 | ||
| 232 | // Server response is mixed in with the real headers |
||
| 233 | 4 | if ($value === null) |
|
| 234 | { |
||
| 235 | 4 | $this->_response['headers']['status'] = $name; |
|
| 236 | } |
||
| 237 | // If its already there overwrite with the new value, unless its a cookie |
||
| 238 | 4 | elseif (isset($this->_response['headers'][$name]) && $name === 'set-cookie') |
|
| 239 | { |
||
| 240 | if (is_string($this->_response['headers'][$name])) |
||
| 241 | { |
||
| 242 | 4 | $this->_response['headers'][$name] = [$this->_response['headers'][$name]]; |
|
| 243 | } |
||
| 244 | |||
| 245 | 4 | $this->_response['headers'][$name][] = $value; |
|
| 246 | } |
||
| 247 | else |
||
| 248 | { |
||
| 249 | $this->_response['headers'][$name] = $value; |
||
| 250 | 4 | } |
|
| 251 | } |
||
| 252 | } |
||
| 253 | 4 | ||
| 254 | /** |
||
| 255 | * Read the response up to the end of the headers |
||
| 256 | 2 | */ |
|
| 257 | private function _processHeaders(): void |
||
| 258 | { |
||
| 259 | // Were we redirected, if so lets find out where |
||
| 260 | 4 | if (!empty($this->_response['headers']['location'])) |
|
| 261 | { |
||
| 262 | // update $url with where we were ultimately redirected to |
||
| 263 | 4 | $this->_response['url'] = $this->_response['headers']['location']; |
|
| 264 | } |
||
| 265 | |||
| 266 | // What about our status code? |
||
| 267 | 4 | if (!empty($this->_response['headers']['status'])) |
|
| 268 | { |
||
| 269 | 4 | // Update with last status code found, its for this final navigated point |
|
| 270 | 4 | $this->_response['code'] = (int) substr($this->_response['headers']['status'], 9, 3); |
|
| 271 | } |
||
| 272 | 4 | ||
| 273 | // Provide a common "valid" return code to the caller |
||
| 274 | if (in_array($this->_response['code'], [200, 201, 206])) |
||
| 275 | { |
||
| 276 | $this->_response['code_orig'] = $this->_response['code']; |
||
| 277 | 4 | $this->_response['code'] = 200; |
|
| 278 | } |
||
| 279 | } |
||
| 280 | 4 | ||
| 281 | /** |
||
| 282 | * Fetch the body for the selected site. |
||
| 283 | */ |
||
| 284 | private function _fetchData() |
||
| 285 | { |
||
| 286 | 4 | // Get the contents of the url |
|
| 287 | if (!empty($this->_content_length)) |
||
| 288 | { |
||
| 289 | 4 | $this->_response['body'] = stream_get_contents($this->_fp, $this->_content_length); |
|
| 290 | } |
||
| 291 | 4 | else |
|
| 292 | { |
||
| 293 | 4 | $this->_response['body'] = stream_get_contents($this->_fp); |
|
| 294 | } |
||
| 295 | |||
| 296 | fclose($this->_fp); |
||
| 297 | |||
| 298 | $this->_response['size'] = strlen($this->_response['body']); |
||
| 299 | |||
| 300 | return $this->_response['body']; |
||
| 301 | } |
||
| 302 | |||
| 303 | /** |
||
| 304 | * Used to return the results to the calling program |
||
| 305 | * |
||
| 306 | * What it does: |
||
| 307 | * |
||
| 308 | 4 | * - Called as ->result() will return the full final array |
|
| 309 | * - Called as ->result('body') to just return the page source of the result |
||
| 310 | * |
||
| 311 | 4 | * @param string $area used to return an area such as body, header, error |
|
| 312 | * |
||
| 313 | * @return string|string[] |
||
| 314 | */ |
||
| 315 | public function result($area = '') |
||
| 316 | { |
||
| 317 | 4 | // Just return a specified area or the entire result? |
|
| 318 | if ($area === '') |
||
| 319 | { |
||
| 320 | return $this->_response; |
||
| 321 | } |
||
| 322 | |||
| 323 | return $this->_response[$area] ?? $this->_response; |
||
| 324 | } |
||
| 325 | } |
||
| 326 |