| Total Complexity | 102 |
| Total Lines | 566 |
| Duplicated Lines | 0 % |
| Changes | 3 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Af_RedditImgur often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Af_RedditImgur, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 2 | class Af_RedditImgur extends Plugin { |
||
| 3 | |||
| 4 | /* @var PluginHost $host */ |
||
| 5 | private $host; |
||
| 6 | |||
| 7 | public function about() { |
||
| 8 | return array(1.0, |
||
| 9 | "Inline images (and other content) in Reddit RSS feeds", |
||
| 10 | "fox"); |
||
| 11 | } |
||
| 12 | |||
| 13 | public function flags() { |
||
| 14 | return array("needs_curl" => true); |
||
| 15 | } |
||
| 16 | |||
| 17 | public function init($host) { |
||
| 18 | $this->host = $host; |
||
| 19 | |||
| 20 | $host->add_hook($host::HOOK_ARTICLE_FILTER, $this); |
||
| 21 | $host->add_hook($host::HOOK_PREFS_TAB, $this); |
||
| 22 | } |
||
| 23 | |||
| 24 | public function hook_prefs_tab($args) { |
||
| 25 | if ($args != "prefFeeds") return; |
||
| 26 | |||
| 27 | print "<div dojoType=\"dijit.layout.AccordionPane\" |
||
| 28 | title=\"<i class='material-icons'>extension</i> ".__('Reddit content settings (af_redditimgur)')."\">"; |
||
| 29 | |||
| 30 | $enable_readability = $this->host->get($this, "enable_readability"); |
||
| 31 | $enable_content_dupcheck = $this->host->get($this, "enable_content_dupcheck"); |
||
| 32 | |||
| 33 | if (version_compare(PHP_VERSION, '5.6.0', '<')) { |
||
| 34 | print_error("Readability requires PHP version 5.6."); |
||
| 35 | } |
||
| 36 | |||
| 37 | print "<form dojoType='dijit.form.Form'>"; |
||
| 38 | |||
| 39 | print "<script type='dojo/method' event='onSubmit' args='evt'> |
||
| 40 | evt.preventDefault(); |
||
| 41 | if (this.validate()) { |
||
| 42 | console.log(dojo.objectToQuery(this.getValues())); |
||
| 43 | new Ajax.Request('backend.php', { |
||
| 44 | parameters: dojo.objectToQuery(this.getValues()), |
||
| 45 | onComplete: function(transport) { |
||
| 46 | Notify.info(transport.responseText); |
||
| 47 | } |
||
| 48 | }); |
||
| 49 | //this.reset(); |
||
| 50 | } |
||
| 51 | </script>"; |
||
| 52 | |||
| 53 | print_hidden("op", "pluginhandler"); |
||
| 54 | print_hidden("method", "save"); |
||
| 55 | print_hidden("plugin", "af_redditimgur"); |
||
| 56 | |||
| 57 | print "<fieldset class='narrow'>"; |
||
| 58 | print "<label class='checkbox'>"; |
||
| 59 | print_checkbox("enable_readability", $enable_readability); |
||
| 60 | print " " . __("Extract missing content using Readability (requires af_readability)") . "</label>"; |
||
| 61 | print "</fieldset>"; |
||
| 62 | |||
| 63 | print "<fieldset class='narrow'>"; |
||
| 64 | print "<label class='checkbox'>"; |
||
| 65 | print_checkbox("enable_content_dupcheck", $enable_content_dupcheck); |
||
| 66 | print " " . __("Enable additional duplicate checking") . "</label>"; |
||
| 67 | print "</fieldset>"; |
||
| 68 | |||
| 69 | print_button("submit", __("Save"), 'class="alt-primary"'); |
||
| 70 | print "</form>"; |
||
| 71 | |||
| 72 | print "</div>"; |
||
| 73 | } |
||
| 74 | |||
| 75 | public function save() { |
||
| 76 | $enable_readability = checkbox_to_sql_bool($_POST["enable_readability"]); |
||
| 77 | $enable_content_dupcheck = checkbox_to_sql_bool($_POST["enable_content_dupcheck"]); |
||
| 78 | |||
| 79 | $this->host->set($this, "enable_readability", $enable_readability, false); |
||
| 80 | $this->host->set($this, "enable_content_dupcheck", $enable_content_dupcheck); |
||
| 81 | |||
| 82 | echo __("Configuration saved"); |
||
| 83 | } |
||
| 84 | |||
| 85 | /** |
||
| 86 | * @SuppressWarnings(PHPMD.UnusedFormalParameter) |
||
| 87 | */ |
||
| 88 | private function inline_stuff($article, &$doc, $xpath) { |
||
| 89 | |||
| 90 | $entries = $xpath->query('(//a[@href]|//img[@src])'); |
||
| 91 | $img_entries = $xpath->query("(//img[@src])"); |
||
| 92 | |||
| 93 | $found = false; |
||
| 94 | //$debug = 1; |
||
| 95 | |||
| 96 | foreach ($entries as $entry) { |
||
| 97 | if ($entry->hasAttribute("href") && strpos($entry->getAttribute("href"), "reddit.com") === FALSE) { |
||
| 98 | |||
| 99 | Debug::log("processing href: " . $entry->getAttribute("href"), Debug::$LOG_VERBOSE); |
||
| 100 | |||
| 101 | $matches = array(); |
||
| 102 | |||
| 103 | if (!$found && preg_match("/^https?:\/\/twitter.com\/(.*?)\/status\/(.*)/", $entry->getAttribute("href"), $matches)) { |
||
| 104 | Debug::log("handling as twitter: " . $matches[1] . " " . $matches[2], Debug::$LOG_VERBOSE); |
||
| 105 | |||
| 106 | $oembed_result = fetch_file_contents("https://publish.twitter.com/oembed?url=" . urlencode($entry->getAttribute("href"))); |
||
| 107 | |||
| 108 | if ($oembed_result) { |
||
| 109 | $oembed_result = json_decode($oembed_result, true); |
||
| 110 | |||
| 111 | if ($oembed_result && isset($oembed_result["html"])) { |
||
| 112 | |||
| 113 | $tmp = new DOMDocument(); |
||
| 114 | if ($tmp->loadHTML('<?xml encoding="utf-8" ?>' . $oembed_result["html"])) { |
||
| 115 | $p = $doc->createElement("p"); |
||
| 116 | |||
| 117 | $p->appendChild($doc->importNode( |
||
| 118 | $tmp->getElementsByTagName("blockquote")->item(0), TRUE)); |
||
| 119 | |||
| 120 | $br = $doc->createElement('br'); |
||
| 121 | $entry->parentNode->insertBefore($p, $entry); |
||
| 122 | $entry->parentNode->insertBefore($br, $entry); |
||
| 123 | |||
| 124 | $found = 1; |
||
| 125 | } |
||
| 126 | } |
||
| 127 | } |
||
| 128 | } |
||
| 129 | |||
| 130 | if (!$found && preg_match("/\.gfycat.com\/([a-z]+)?(\.[a-z]+)$/i", $entry->getAttribute("href"), $matches)) { |
||
| 131 | $entry->setAttribute("href", "http://www.gfycat.com/".$matches[1]); |
||
| 132 | } |
||
| 133 | |||
| 134 | if (!$found && preg_match("/https?:\/\/(www\.)?gfycat.com\/([a-z]+)$/i", $entry->getAttribute("href"), $matches)) { |
||
| 135 | |||
| 136 | Debug::log("Handling as Gfycat", Debug::$LOG_VERBOSE); |
||
| 137 | |||
| 138 | $source_stream = 'https://giant.gfycat.com/' . $matches[2] . '.mp4'; |
||
| 139 | $poster_url = 'https://thumbs.gfycat.com/' . $matches[2] . '-mobile.jpg'; |
||
| 140 | |||
| 141 | $content_type = $this->get_content_type($source_stream); |
||
| 142 | |||
| 143 | if (strpos($content_type, "video/") !== FALSE) { |
||
| 144 | $this->handle_as_video($doc, $entry, $source_stream, $poster_url); |
||
| 145 | $found = 1; |
||
| 146 | } |
||
| 147 | } |
||
| 148 | |||
| 149 | if (!$found && preg_match("/https?:\/\/v\.redd\.it\/(.*)$/i", $entry->getAttribute("href"), $matches)) { |
||
| 150 | |||
| 151 | Debug::log("Handling as reddit inline video", Debug::$LOG_VERBOSE); |
||
| 152 | |||
| 153 | $img = $img_entries->item(0); |
||
| 154 | |||
| 155 | if ($img) { |
||
| 156 | $poster_url = $img->getAttribute("src"); |
||
| 157 | } else { |
||
| 158 | $poster_url = false; |
||
| 159 | } |
||
| 160 | |||
| 161 | // Get original article URL from v.redd.it redirects |
||
| 162 | $source_article_url = $this->get_location($matches[0]); |
||
| 163 | Debug::log("Resolved ".$matches[0]." to ".$source_article_url, Debug::$LOG_VERBOSE); |
||
| 164 | |||
| 165 | $source_stream = false; |
||
| 166 | |||
| 167 | if ($source_article_url) { |
||
| 168 | $j = json_decode(fetch_file_contents($source_article_url.".json"), true); |
||
| 169 | |||
| 170 | if ($j) { |
||
| 171 | foreach ($j as $listing) { |
||
| 172 | foreach ($listing["data"]["children"] as $child) { |
||
| 173 | if ($child["data"]["url"] == $matches[0]) { |
||
| 174 | try { |
||
| 175 | $source_stream = $child["data"]["media"]["reddit_video"]["fallback_url"]; |
||
| 176 | } |
||
| 177 | catch (Exception $e) { |
||
| 178 | } |
||
| 179 | break 2; |
||
| 180 | } |
||
| 181 | } |
||
| 182 | } |
||
| 183 | } |
||
| 184 | } |
||
| 185 | |||
| 186 | if (!$source_stream) { |
||
| 187 | $source_stream = "https://v.redd.it/" . $matches[1] . "/DASH_600_K"; |
||
| 188 | } |
||
| 189 | |||
| 190 | $this->handle_as_video($doc, $entry, $source_stream, $poster_url); |
||
| 191 | $found = 1; |
||
| 192 | } |
||
| 193 | |||
| 194 | if (!$found && preg_match("/https?:\/\/(www\.)?streamable.com\//i", $entry->getAttribute("href"))) { |
||
| 195 | |||
| 196 | Debug::log("Handling as Streamable", Debug::$LOG_VERBOSE); |
||
| 197 | |||
| 198 | $tmp = fetch_file_contents($entry->getAttribute("href")); |
||
| 199 | |||
| 200 | if ($tmp) { |
||
| 201 | $tmpdoc = new DOMDocument(); |
||
| 202 | |||
| 203 | if (@$tmpdoc->loadHTML($tmp)) { |
||
| 204 | $tmpxpath = new DOMXPath($tmpdoc); |
||
| 205 | |||
| 206 | $source_node = $tmpxpath->query("//video[contains(@class,'video-player-tag')]//source[contains(@src, '.mp4')]")->item(0); |
||
| 207 | $poster_node = $tmpxpath->query("//video[contains(@class,'video-player-tag') and @poster]")->item(0); |
||
| 208 | |||
| 209 | if ($source_node && $poster_node) { |
||
| 210 | $source_stream = $source_node->getAttribute("src"); |
||
| 211 | $poster_url = $poster_node->getAttribute("poster"); |
||
| 212 | |||
| 213 | $this->handle_as_video($doc, $entry, $source_stream, $poster_url); |
||
| 214 | $found = 1; |
||
| 215 | } |
||
| 216 | } |
||
| 217 | } |
||
| 218 | } |
||
| 219 | |||
| 220 | // imgur .gif -> .gifv |
||
| 221 | if (!$found && preg_match("/i\.imgur\.com\/(.*?)\.gif$/i", $entry->getAttribute("href"))) { |
||
| 222 | Debug::log("Handling as imgur gif (->gifv)", Debug::$LOG_VERBOSE); |
||
| 223 | |||
| 224 | $entry->setAttribute("href", |
||
| 225 | str_replace(".gif", ".gifv", $entry->getAttribute("href"))); |
||
| 226 | } |
||
| 227 | |||
| 228 | if (!$found && preg_match("/\.(gifv|mp4)$/i", $entry->getAttribute("href"))) { |
||
| 229 | Debug::log("Handling as imgur gifv", Debug::$LOG_VERBOSE); |
||
| 230 | |||
| 231 | $source_stream = str_replace(".gifv", ".mp4", $entry->getAttribute("href")); |
||
| 232 | |||
| 233 | if (strpos($source_stream, "imgur.com") !== FALSE) |
||
| 234 | $poster_url = str_replace(".mp4", "h.jpg", $source_stream); |
||
| 235 | |||
| 236 | $this->handle_as_video($doc, $entry, $source_stream, $poster_url); |
||
| 237 | |||
| 238 | $found = true; |
||
| 239 | } |
||
| 240 | |||
| 241 | $matches = array(); |
||
| 242 | if (!$found && preg_match("/youtube\.com\/v\/([\w-]+)/", $entry->getAttribute("href"), $matches) || |
||
| 243 | preg_match("/youtube\.com\/.*?[\&\?]v=([\w-]+)/", $entry->getAttribute("href"), $matches) || |
||
| 244 | preg_match("/youtube\.com\/watch\?v=([\w-]+)/", $entry->getAttribute("href"), $matches) || |
||
| 245 | preg_match("/\/\/youtu.be\/([\w-]+)/", $entry->getAttribute("href"), $matches)) { |
||
| 246 | |||
| 247 | $vid_id = $matches[1]; |
||
| 248 | |||
| 249 | Debug::log("Handling as youtube: $vid_id", Debug::$LOG_VERBOSE); |
||
| 250 | |||
| 251 | $iframe = $doc->createElement("iframe"); |
||
| 252 | $iframe->setAttribute("class", "youtube-player"); |
||
| 253 | $iframe->setAttribute("type", "text/html"); |
||
| 254 | $iframe->setAttribute("width", "640"); |
||
| 255 | $iframe->setAttribute("height", "385"); |
||
| 256 | $iframe->setAttribute("src", "https://www.youtube.com/embed/$vid_id"); |
||
| 257 | $iframe->setAttribute("allowfullscreen", "1"); |
||
| 258 | $iframe->setAttribute("frameborder", "0"); |
||
| 259 | |||
| 260 | $br = $doc->createElement('br'); |
||
| 261 | $entry->parentNode->insertBefore($iframe, $entry); |
||
| 262 | $entry->parentNode->insertBefore($br, $entry); |
||
| 263 | |||
| 264 | $found = true; |
||
| 265 | } |
||
| 266 | |||
| 267 | if (!$found && preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href")) || |
||
| 268 | mb_strpos($entry->getAttribute("href"), "i.reddituploads.com") !== FALSE || |
||
| 269 | mb_strpos($this->get_content_type($entry->getAttribute("href")), "image/") !== FALSE) { |
||
| 270 | |||
| 271 | Debug::log("Handling as a picture", Debug::$LOG_VERBOSE); |
||
| 272 | |||
| 273 | $img = $doc->createElement('img'); |
||
| 274 | $img->setAttribute("src", $entry->getAttribute("href")); |
||
| 275 | |||
| 276 | $br = $doc->createElement('br'); |
||
| 277 | $entry->parentNode->insertBefore($img, $entry); |
||
| 278 | $entry->parentNode->insertBefore($br, $entry); |
||
| 279 | |||
| 280 | $found = true; |
||
| 281 | } |
||
| 282 | |||
| 283 | // imgur via link rel="image_src" href="..." |
||
| 284 | if (!$found && preg_match("/imgur/", $entry->getAttribute("href"))) { |
||
| 285 | |||
| 286 | Debug::log("handling as imgur page/whatever", Debug::$LOG_VERBOSE); |
||
| 287 | |||
| 288 | $content = fetch_file_contents(["url" => $entry->getAttribute("href"), |
||
| 289 | "http_accept" => "text/*"]); |
||
| 290 | |||
| 291 | if ($content) { |
||
| 292 | $cdoc = new DOMDocument(); |
||
| 293 | |||
| 294 | if (@$cdoc->loadHTML($content)) { |
||
| 295 | $cxpath = new DOMXPath($cdoc); |
||
| 296 | |||
| 297 | $rel_image = $cxpath->query("//link[@rel='image_src']")->item(0); |
||
| 298 | |||
| 299 | if ($rel_image) { |
||
| 300 | |||
| 301 | $img = $doc->createElement('img'); |
||
| 302 | $img->setAttribute("src", $rel_image->getAttribute("href")); |
||
| 303 | |||
| 304 | $br = $doc->createElement('br'); |
||
| 305 | $entry->parentNode->insertBefore($img, $entry); |
||
| 306 | $entry->parentNode->insertBefore($br, $entry); |
||
| 307 | |||
| 308 | $found = true; |
||
| 309 | } |
||
| 310 | } |
||
| 311 | } |
||
| 312 | } |
||
| 313 | |||
| 314 | // wtf is this even |
||
| 315 | if (!$found && preg_match("/^https?:\/\/gyazo\.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) { |
||
| 316 | $img_id = $matches[1]; |
||
| 317 | |||
| 318 | Debug::log("handling as gyazo: $img_id", Debug::$LOG_VERBOSE); |
||
| 319 | |||
| 320 | $img = $doc->createElement('img'); |
||
| 321 | $img->setAttribute("src", "https://i.gyazo.com/$img_id.jpg"); |
||
| 322 | |||
| 323 | $br = $doc->createElement('br'); |
||
| 324 | $entry->parentNode->insertBefore($img, $entry); |
||
| 325 | $entry->parentNode->insertBefore($br, $entry); |
||
| 326 | |||
| 327 | $found = true; |
||
| 328 | } |
||
| 329 | |||
| 330 | // let's try meta properties |
||
| 331 | if (!$found) { |
||
| 332 | Debug::log("looking for meta og:image", Debug::$LOG_VERBOSE); |
||
| 333 | |||
| 334 | $content = fetch_file_contents(["url" => $entry->getAttribute("href"), |
||
| 335 | "http_accept" => "text/*"]); |
||
| 336 | |||
| 337 | if ($content) { |
||
| 338 | $cdoc = new DOMDocument(); |
||
| 339 | |||
| 340 | if (@$cdoc->loadHTML($content)) { |
||
| 341 | $cxpath = new DOMXPath($cdoc); |
||
| 342 | |||
| 343 | $og_image = $cxpath->query("//meta[@property='og:image']")->item(0); |
||
| 344 | $og_video = $cxpath->query("//meta[@property='og:video']")->item(0); |
||
| 345 | |||
| 346 | if ($og_video) { |
||
| 347 | |||
| 348 | $source_stream = $og_video->getAttribute("content"); |
||
| 349 | |||
| 350 | if ($source_stream) { |
||
| 351 | |||
| 352 | if ($og_image) { |
||
| 353 | $poster_url = $og_image->getAttribute("content"); |
||
| 354 | } else { |
||
| 355 | $poster_url = false; |
||
| 356 | } |
||
| 357 | |||
| 358 | $this->handle_as_video($doc, $entry, $source_stream, $poster_url); |
||
| 359 | $found = true; |
||
| 360 | } |
||
| 361 | |||
| 362 | } else if ($og_image) { |
||
| 363 | |||
| 364 | $og_src = $og_image->getAttribute("content"); |
||
| 365 | |||
| 366 | if ($og_src) { |
||
| 367 | $img = $doc->createElement('img'); |
||
| 368 | $img->setAttribute("src", $og_src); |
||
| 369 | |||
| 370 | $br = $doc->createElement('br'); |
||
| 371 | $entry->parentNode->insertBefore($img, $entry); |
||
| 372 | $entry->parentNode->insertBefore($br, $entry); |
||
| 373 | |||
| 374 | $found = true; |
||
| 375 | } |
||
| 376 | } |
||
| 377 | } |
||
| 378 | } |
||
| 379 | } |
||
| 380 | |||
| 381 | } |
||
| 382 | |||
| 383 | // remove tiny thumbnails |
||
| 384 | if ($entry->hasAttribute("src")) { |
||
| 385 | if ($entry->parentNode && $entry->parentNode->parentNode) { |
||
| 386 | $entry->parentNode->parentNode->removeChild($entry->parentNode); |
||
| 387 | } |
||
| 388 | } |
||
| 389 | } |
||
| 390 | |||
| 391 | return $found; |
||
| 392 | } |
||
| 393 | |||
| 394 | public function hook_article_filter($article) { |
||
| 395 | |||
| 396 | if (strpos($article["link"], "reddit.com/r/") !== FALSE) { |
||
| 397 | $doc = new DOMDocument(); |
||
| 398 | @$doc->loadHTML($article["content"]); |
||
| 399 | $xpath = new DOMXPath($doc); |
||
| 400 | |||
| 401 | $content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0); |
||
| 402 | |||
| 403 | if ($this->host->get($this, "enable_content_dupcheck")) { |
||
| 404 | |||
| 405 | if ($content_link) { |
||
| 406 | $content_href = $content_link->getAttribute("href"); |
||
| 407 | $entry_guid = $article["guid_hashed"]; |
||
| 408 | $owner_uid = $article["owner_uid"]; |
||
| 409 | |||
| 410 | if (DB_TYPE == "pgsql") { |
||
| 411 | $interval_qpart = "date_entered < NOW() - INTERVAL '1 day'"; |
||
| 412 | } else { |
||
| 413 | $interval_qpart = "date_entered < DATE_SUB(NOW(), INTERVAL 1 DAY)"; |
||
| 414 | } |
||
| 415 | |||
| 416 | $sth = $this->pdo->prepare("SELECT COUNT(id) AS cid |
||
| 417 | FROM ttrss_entries, ttrss_user_entries WHERE |
||
| 418 | ref_id = id AND |
||
| 419 | $interval_qpart AND |
||
| 420 | guid != ? AND |
||
| 421 | owner_uid = ? AND |
||
| 422 | content LIKE ?"); |
||
| 423 | |||
| 424 | $sth->execute([$entry_guid, $owner_uid, "%href=\"$content_href\">[link]%"]); |
||
| 425 | |||
| 426 | if ($row = $sth->fetch()) { |
||
| 427 | $num_found = $row['cid']; |
||
| 428 | |||
| 429 | if ($num_found > 0) $article["force_catchup"] = true; |
||
| 430 | } |
||
| 431 | } |
||
| 432 | } |
||
| 433 | |||
| 434 | $found = $this->inline_stuff($article, $doc, $xpath); |
||
| 435 | |||
| 436 | $node = $doc->getElementsByTagName('body')->item(0); |
||
| 437 | |||
| 438 | if ($node && $found) { |
||
| 439 | $article["content"] = $doc->saveHTML($node); |
||
| 440 | } else if ($content_link) { |
||
| 441 | $article = $this->readability($article, $content_link->getAttribute("href"), $doc, $xpath); |
||
| 442 | } |
||
| 443 | } |
||
| 444 | |||
| 445 | return $article; |
||
| 446 | } |
||
| 447 | |||
| 448 | public function api_version() { |
||
| 449 | return 2; |
||
| 450 | } |
||
| 451 | |||
| 452 | private function handle_as_video($doc, $entry, $source_stream, $poster_url = false) { |
||
| 453 | |||
| 454 | Debug::log("handle_as_video: $source_stream", Debug::$LOG_VERBOSE); |
||
| 455 | |||
| 456 | $video = $doc->createElement('video'); |
||
| 457 | $video->setAttribute("autoplay", "1"); |
||
| 458 | $video->setAttribute("controls", "1"); |
||
| 459 | $video->setAttribute("loop", "1"); |
||
| 460 | |||
| 461 | if ($poster_url) $video->setAttribute("poster", $poster_url); |
||
| 462 | |||
| 463 | $source = $doc->createElement('source'); |
||
| 464 | $source->setAttribute("src", $source_stream); |
||
| 465 | $source->setAttribute("type", "video/mp4"); |
||
| 466 | |||
| 467 | $video->appendChild($source); |
||
| 468 | |||
| 469 | $br = $doc->createElement('br'); |
||
| 470 | $entry->parentNode->insertBefore($video, $entry); |
||
| 471 | $entry->parentNode->insertBefore($br, $entry); |
||
| 472 | |||
| 473 | $img = $doc->createElement('img'); |
||
| 474 | $img->setAttribute("src", |
||
| 475 | "data:image/gif;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs%3D"); |
||
| 476 | |||
| 477 | $entry->parentNode->insertBefore($img, $entry); |
||
| 478 | } |
||
| 479 | |||
| 480 | public function testurl() { |
||
| 481 | $url = htmlspecialchars($_REQUEST["url"]); |
||
| 482 | |||
| 483 | header("Content-type: text/plain"); |
||
| 484 | |||
| 485 | print "URL: $url\n"; |
||
| 486 | |||
| 487 | $doc = new DOMDocument(); |
||
| 488 | @$doc->loadHTML("<html><body><a href=\"$url\">[link]</a></body>"); |
||
| 489 | $xpath = new DOMXPath($doc); |
||
| 490 | |||
| 491 | $found = $this->inline_stuff([], $doc, $xpath); |
||
| 492 | |||
| 493 | print "Inline result: $found\n"; |
||
| 494 | |||
| 495 | if (!$found) { |
||
| 496 | print "\nReadability result:\n"; |
||
| 497 | |||
| 498 | $article = $this->readability([], $url, $doc, $xpath); |
||
| 499 | |||
| 500 | print_r($article); |
||
| 501 | } else { |
||
| 502 | print "\nResulting HTML:\n"; |
||
| 503 | |||
| 504 | print $doc->saveHTML(); |
||
| 505 | } |
||
| 506 | } |
||
| 507 | |||
| 508 | private function get_header($url, $useragent = SELF_USER_AGENT, $header) { |
||
| 509 | $ret = false; |
||
| 510 | |||
| 511 | if (function_exists("curl_init") && !defined("NO_CURL")) { |
||
| 512 | $ch = curl_init($url); |
||
| 513 | curl_setopt($ch, CURLOPT_TIMEOUT, 5); |
||
| 514 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
||
| 515 | curl_setopt($ch, CURLOPT_HEADER, true); |
||
| 516 | curl_setopt($ch, CURLOPT_NOBODY, true); |
||
| 517 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, !ini_get("open_basedir")); |
||
| 518 | curl_setopt($ch, CURLOPT_USERAGENT, $useragent); |
||
| 519 | |||
| 520 | @curl_exec($ch); |
||
| 521 | $ret = curl_getinfo($ch, $header); |
||
| 522 | } |
||
| 523 | |||
| 524 | return $ret; |
||
| 525 | } |
||
| 526 | |||
| 527 | private function get_content_type($url, $useragent = SELF_USER_AGENT) { |
||
| 528 | return $this->get_header($url, $useragent, CURLINFO_CONTENT_TYPE); |
||
| 529 | } |
||
| 530 | |||
| 531 | private function get_location($url, $useragent = SELF_USER_AGENT) { |
||
| 532 | return $this->get_header($url, $useragent, CURLINFO_EFFECTIVE_URL); |
||
| 533 | } |
||
| 534 | |||
| 535 | /** |
||
| 536 | * @SuppressWarnings(PHPMD.UnusedFormalParameter) |
||
| 537 | */ |
||
| 538 | private function readability($article, $url, $doc, $xpath, $debug = false) { |
||
| 539 | |||
| 540 | if (!defined('NO_CURL') && function_exists("curl_init") && $this->host->get($this, "enable_readability") && |
||
| 541 | mb_strlen(strip_tags($article["content"])) <= 150) { |
||
| 542 | |||
| 543 | // do not try to embed posts linking back to other reddit posts |
||
| 544 | // readability.php requires PHP 5.6 |
||
| 545 | if ($url && strpos($url, "reddit.com") === FALSE && version_compare(PHP_VERSION, '5.6.0', '>=')) { |
||
| 546 | |||
| 547 | /* link may lead to a huge video file or whatever, we need to check content type before trying to |
||
| 548 | parse it which p much requires curl */ |
||
| 549 | |||
| 550 | $useragent_compat = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"; |
||
| 551 | $content_type = $this->get_content_type($url, $useragent_compat); |
||
| 552 | |||
| 553 | if ($content_type && strpos($content_type, "text/html") !== FALSE) { |
||
| 554 | |||
| 555 | foreach ($this->host->get_hooks(PluginHost::HOOK_GET_FULL_TEXT) as $p) { |
||
| 556 | $extracted_content = $p->hook_get_full_text($url); |
||
| 557 | |||
| 558 | if ($extracted_content) { |
||
| 559 | $article["content"] = $extracted_content; |
||
| 560 | break; |
||
| 561 | } |
||
| 562 | } |
||
| 563 | } |
||
| 564 | } |
||
| 565 | } |
||
| 566 | |||
| 567 | return $article; |
||
| 568 | } |
||
| 570 |