dimaslanjaka /
universal-framework
| 1 | <?php |
||
| 2 | |||
| 3 | namespace simplehtmldom; |
||
| 4 | |||
| 5 | /** |
||
| 6 | * Website: http://sourceforge.net/projects/simplehtmldom/ |
||
| 7 | * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/). |
||
| 8 | * |
||
| 9 | * Licensed under The MIT License |
||
| 10 | * See the LICENSE file in the project root for more information. |
||
| 11 | * |
||
| 12 | * Authors: |
||
| 13 | * S.C. Chen |
||
| 14 | * John Schlick |
||
| 15 | * Rus Carroll |
||
| 16 | * logmanoriginal |
||
| 17 | * |
||
| 18 | * Contributors: |
||
| 19 | * Yousuke Kumakura |
||
| 20 | * Vadim Voituk |
||
| 21 | * Antcs |
||
| 22 | * |
||
| 23 | * Version Rev. 2.0-RC2 (415) |
||
| 24 | */ |
||
| 25 | include_once __DIR__ . '/HtmlDocument.php'; |
||
| 26 | |||
| 27 | class HtmlWeb |
||
| 28 | { |
||
| 29 | /** |
||
| 30 | * @return HtmlDocument Returns the DOM for a webpage |
||
| 31 | */ |
||
| 32 | public function load($url) |
||
| 33 | { |
||
| 34 | if (!filter_var($url, FILTER_VALIDATE_URL)) { |
||
| 35 | return null; |
||
| 36 | } |
||
| 37 | |||
| 38 | if ($scheme = parse_url($url, PHP_URL_SCHEME)) { |
||
| 39 | switch (strtolower($scheme)) { |
||
| 40 | case 'http': |
||
| 41 | case 'https': |
||
| 42 | break; |
||
| 43 | default: |
||
| 44 | return null; |
||
| 45 | } |
||
| 46 | |||
| 47 | if (extension_loaded('curl')) { |
||
| 48 | return $this->load_curl($url); |
||
| 49 | } elseif (ini_get('allow_url_fopen')) { |
||
| 50 | return $this->load_fopen($url); |
||
| 51 | } else { |
||
| 52 | error_log(__FUNCTION__ . ' requires either the cURL extension or allow_url_fopen=On in php.ini'); |
||
| 53 | } |
||
| 54 | } |
||
| 55 | |||
| 56 | return null; |
||
| 57 | } |
||
| 58 | |||
| 59 | /** |
||
| 60 | * cURL implementation of load. |
||
| 61 | */ |
||
| 62 | private function load_curl($url) |
||
| 63 | { |
||
| 64 | $ch = curl_init(); |
||
| 65 | |||
| 66 | curl_setopt($ch, CURLOPT_URL, $url); |
||
| 67 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
||
| 68 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); |
||
| 69 | |||
| 70 | // There is no guarantee this request will be fulfilled |
||
| 71 | // -- https://www.php.net/manual/en/function.curl-setopt.php |
||
| 72 | curl_setopt($ch, CURLOPT_BUFFERSIZE, MAX_FILE_SIZE); |
||
| 73 | |||
| 74 | // There is no guarantee this request will be fulfilled |
||
| 75 | $header = [ |
||
| 76 | 'Accept: text/html', // Prefer HTML format |
||
| 77 | 'Accept-Charset: utf-8', // Prefer UTF-8 encoding |
||
| 78 | ]; |
||
| 79 | curl_setopt($ch, CURLOPT_HTTPHEADER, $header); |
||
| 80 | |||
| 81 | $doc = curl_exec($ch); |
||
| 82 | |||
| 83 | if (200 !== curl_getinfo($ch, CURLINFO_RESPONSE_CODE)) { |
||
| 84 | return null; |
||
| 85 | } |
||
| 86 | |||
| 87 | curl_close($ch); |
||
| 88 | |||
| 89 | if (strlen($doc) > MAX_FILE_SIZE) { |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 90 | return null; |
||
| 91 | } |
||
| 92 | |||
| 93 | return new HtmlDocument($doc); |
||
| 94 | } |
||
| 95 | |||
| 96 | /** |
||
| 97 | * fopen implementation of load. |
||
| 98 | */ |
||
| 99 | private function load_fopen($url) |
||
| 100 | { |
||
| 101 | // There is no guarantee this request will be fulfilled |
||
| 102 | $context = stream_context_create(['http' => [ |
||
| 103 | 'header' => [ |
||
| 104 | 'Accept: text/html', // Prefer HTML format |
||
| 105 | 'Accept-Charset: utf-8', // Prefer UTF-8 encoding |
||
| 106 | ], |
||
| 107 | 'ignore_errors' => true, // Always fetch content |
||
| 108 | ]]); |
||
| 109 | |||
| 110 | $doc = file_get_contents($url, false, $context, 0, MAX_FILE_SIZE + 1); |
||
| 111 | |||
| 112 | if (isset($http_response_header)) { |
||
| 113 | foreach ($http_response_header as $rh) { |
||
| 114 | // https://stackoverflow.com/a/1442526 |
||
| 115 | $parts = explode(' ', $rh, 3); |
||
| 116 | |||
| 117 | if (preg_match('/HTTP\/\d\.\d/', $parts[0])) { |
||
| 118 | $code = $parts[1]; |
||
| 119 | } |
||
| 120 | } // Last code is final status |
||
| 121 | |||
| 122 | if (!isset($code) || '200' !== $code) { |
||
| 123 | return null; |
||
| 124 | } |
||
| 125 | } |
||
| 126 | |||
| 127 | if (strlen($doc) > MAX_FILE_SIZE) { |
||
| 128 | return null; |
||
| 129 | } |
||
| 130 | |||
| 131 | return new HtmlDocument($doc); |
||
| 132 | } |
||
| 133 | } |
||
| 134 |