Conditions | 122 |
Total Lines | 943 |
Code Lines | 538 |
Lines | 0 |
Ratio | 0 % |
Changes | 13 | ||
Bugs | 1 | Features | 2 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
263 | public static function update_rss_feed($feed, $no_cache = false) { |
||
264 | |||
265 | reset_fetch_domain_quota(); |
||
266 | |||
267 | Debug::log("start", Debug::$LOG_VERBOSE); |
||
268 | |||
269 | $pdo = Db::pdo(); |
||
270 | |||
271 | $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?"); |
||
272 | $sth->execute([$feed]); |
||
273 | |||
274 | if (!$row = $sth->fetch()) { |
||
275 | Debug::log("feed $feed not found, skipping."); |
||
276 | user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING); |
||
277 | return false; |
||
278 | } |
||
279 | |||
280 | $title = $row["title"]; |
||
281 | $site_url = $row["site_url"]; |
||
282 | |||
283 | // feed was batch-subscribed or something, we need to get basic info |
||
284 | // this is not optimal currently as it fetches stuff separately TODO: optimize |
||
285 | if ($title == "[Unknown]" || !$title || !$site_url) { |
||
286 | Debug::log("setting basic feed info for $feed [$title, $site_url]..."); |
||
287 | RSSUtils::set_basic_feed_info($feed); |
||
288 | } |
||
289 | |||
290 | $sth = $pdo->prepare("SELECT id,update_interval,auth_login, |
||
291 | feed_url,auth_pass,cache_images, |
||
292 | mark_unread_on_update, owner_uid, |
||
293 | auth_pass_encrypted, feed_language, |
||
294 | last_modified, |
||
295 | ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional |
||
296 | FROM ttrss_feeds WHERE id = ?"); |
||
297 | $sth->execute([$feed]); |
||
298 | |||
299 | if ($row = $sth->fetch()) { |
||
300 | |||
301 | $owner_uid = $row["owner_uid"]; |
||
302 | $mark_unread_on_update = $row["mark_unread_on_update"]; |
||
303 | |||
304 | $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW() |
||
305 | WHERE id = ?"); |
||
306 | $sth->execute([$feed]); |
||
307 | |||
308 | $auth_login = $row["auth_login"]; |
||
309 | $auth_pass = $row["auth_pass"]; |
||
310 | $stored_last_modified = $row["last_modified"]; |
||
311 | $last_unconditional = $row["last_unconditional"]; |
||
312 | $cache_images = $row["cache_images"]; |
||
313 | $fetch_url = $row["feed_url"]; |
||
314 | |||
315 | $feed_language = mb_strtolower($row["feed_language"]); |
||
316 | |||
317 | if (!$feed_language) { |
||
318 | $feed_language = mb_strtolower(get_pref('DEFAULT_SEARCH_LANGUAGE', $owner_uid)); |
||
319 | } |
||
320 | |||
321 | if (!$feed_language) { |
||
322 | $feed_language = 'simple'; |
||
323 | } |
||
324 | |||
325 | } else { |
||
326 | return false; |
||
327 | } |
||
328 | |||
329 | $date_feed_processed = date('Y-m-d H:i'); |
||
330 | |||
331 | $cache_filename = CACHE_DIR."/feeds/".sha1($fetch_url).".xml"; |
||
332 | |||
333 | $pluginhost = new PluginHost(); |
||
334 | $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid); |
||
335 | |||
336 | $pluginhost->load(PLUGINS, PluginHost::KIND_ALL); |
||
337 | $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid); |
||
338 | $pluginhost->load_data(); |
||
339 | |||
340 | $rss_hash = false; |
||
341 | |||
342 | $force_refetch = isset($_REQUEST["force_refetch"]); |
||
343 | $feed_data = ""; |
||
344 | |||
345 | Debug::log("running HOOK_FETCH_FEED handlers...", Debug::$LOG_VERBOSE); |
||
346 | |||
347 | foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) { |
||
348 | Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE); |
||
349 | $start = microtime(true); |
||
350 | $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass); |
||
351 | Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE); |
||
352 | } |
||
353 | |||
354 | if ($feed_data) { |
||
355 | Debug::log("feed data has been modified by a plugin.", Debug::$LOG_VERBOSE); |
||
356 | } else { |
||
357 | Debug::log("feed data has not been modified by a plugin.", Debug::$LOG_VERBOSE); |
||
358 | } |
||
359 | |||
360 | // try cache |
||
361 | if (!$feed_data && |
||
362 | file_exists($cache_filename) && |
||
363 | is_readable($cache_filename) && |
||
364 | !$auth_login && !$auth_pass && |
||
365 | filemtime($cache_filename) > time() - 30) { |
||
366 | |||
367 | Debug::log("using local cache [$cache_filename].", Debug::$LOG_VERBOSE); |
||
368 | |||
369 | @$feed_data = file_get_contents($cache_filename); |
||
370 | |||
371 | if ($feed_data) { |
||
372 | $rss_hash = sha1($feed_data); |
||
373 | } |
||
374 | |||
375 | } else { |
||
376 | Debug::log("local cache will not be used for this feed", Debug::$LOG_VERBOSE); |
||
377 | } |
||
378 | |||
379 | global $fetch_last_modified; |
||
380 | |||
381 | // fetch feed from source |
||
382 | if (!$feed_data) { |
||
383 | Debug::log("last unconditional update request: $last_unconditional", Debug::$LOG_VERBOSE); |
||
384 | |||
385 | if (ini_get("open_basedir") && function_exists("curl_init")) { |
||
386 | Debug::log("not using CURL due to open_basedir restrictions", Debug::$LOG_VERBOSE); |
||
387 | } |
||
388 | |||
389 | if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) { |
||
390 | Debug::log("maximum allowed interval for conditional requests exceeded, forcing refetch", Debug::$LOG_VERBOSE); |
||
391 | |||
392 | $force_refetch = true; |
||
393 | } else { |
||
394 | Debug::log("stored last modified for conditional request: $stored_last_modified", Debug::$LOG_VERBOSE); |
||
395 | } |
||
396 | |||
397 | Debug::log("fetching [$fetch_url] (force_refetch: $force_refetch)...", Debug::$LOG_VERBOSE); |
||
398 | |||
399 | $feed_data = fetch_file_contents([ |
||
400 | "url" => $fetch_url, |
||
401 | "login" => $auth_login, |
||
402 | "pass" => $auth_pass, |
||
403 | "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT, |
||
404 | "last_modified" => $force_refetch ? "" : $stored_last_modified |
||
405 | ]); |
||
406 | |||
407 | $feed_data = trim($feed_data); |
||
408 | |||
409 | Debug::log("fetch done.", Debug::$LOG_VERBOSE); |
||
410 | Debug::log("source last modified: ".$fetch_last_modified, Debug::$LOG_VERBOSE); |
||
411 | |||
412 | if ($feed_data && $fetch_last_modified != $stored_last_modified) { |
||
413 | $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?"); |
||
414 | $sth->execute([substr($fetch_last_modified, 0, 245), $feed]); |
||
415 | } |
||
416 | |||
417 | // cache vanilla feed data for re-use |
||
418 | if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR."/feeds")) { |
||
419 | $new_rss_hash = sha1($feed_data); |
||
420 | |||
421 | if ($new_rss_hash != $rss_hash) { |
||
422 | Debug::log("saving $cache_filename", Debug::$LOG_VERBOSE); |
||
423 | @file_put_contents($cache_filename, $feed_data); |
||
424 | } |
||
425 | } |
||
426 | } |
||
427 | |||
428 | if (!$feed_data) { |
||
429 | global $fetch_last_error; |
||
430 | global $fetch_last_error_code; |
||
431 | |||
432 | Debug::log("unable to fetch: $fetch_last_error [$fetch_last_error_code]", Debug::$LOG_VERBOSE); |
||
433 | |||
434 | // If-Modified-Since |
||
435 | if ($fetch_last_error_code != 304) { |
||
436 | $error_message = $fetch_last_error; |
||
437 | } else { |
||
438 | Debug::log("source claims data not modified, nothing to do.", Debug::$LOG_VERBOSE); |
||
439 | $error_message = ""; |
||
440 | } |
||
441 | |||
442 | $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?, |
||
443 | last_updated = NOW() WHERE id = ?"); |
||
444 | $sth->execute([$error_message, $feed]); |
||
445 | |||
446 | return; |
||
447 | } |
||
448 | |||
449 | Debug::log("running HOOK_FEED_FETCHED handlers...", Debug::$LOG_VERBOSE); |
||
450 | $feed_data_checksum = md5($feed_data); |
||
451 | |||
452 | foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) { |
||
453 | Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE); |
||
454 | $start = microtime(true); |
||
455 | $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed); |
||
456 | Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE); |
||
457 | } |
||
458 | |||
459 | if (md5($feed_data) != $feed_data_checksum) { |
||
460 | Debug::log("feed data has been modified by a plugin.", Debug::$LOG_VERBOSE); |
||
461 | } else { |
||
462 | Debug::log("feed data has not been modified by a plugin.", Debug::$LOG_VERBOSE); |
||
463 | } |
||
464 | |||
465 | $rss = new FeedParser($feed_data); |
||
466 | $rss->init(); |
||
467 | |||
468 | if (!$rss->error()) { |
||
469 | |||
470 | Debug::log("running HOOK_FEED_PARSED handlers...", Debug::$LOG_VERBOSE); |
||
471 | |||
472 | // We use local pluginhost here because we need to load different per-user feed plugins |
||
473 | |||
474 | foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_PARSED) as $plugin) { |
||
475 | Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE); |
||
476 | $start = microtime(true); |
||
477 | $plugin->hook_feed_parsed($rss); |
||
478 | Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE); |
||
479 | } |
||
480 | |||
481 | Debug::log("language: $feed_language", Debug::$LOG_VERBOSE); |
||
482 | Debug::log("processing feed data...", Debug::$LOG_VERBOSE); |
||
483 | |||
484 | if (DB_TYPE == "pgsql") { |
||
485 | $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'"; |
||
486 | } else { |
||
487 | $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)"; |
||
488 | } |
||
489 | |||
490 | $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color, |
||
491 | (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS |
||
492 | favicon_needs_check |
||
493 | FROM ttrss_feeds WHERE id = ?"); |
||
494 | $sth->execute([$feed]); |
||
495 | |||
496 | if ($row = $sth->fetch()) { |
||
497 | $favicon_needs_check = $row["favicon_needs_check"]; |
||
498 | $favicon_avg_color = $row["favicon_avg_color"]; |
||
499 | $owner_uid = $row["owner_uid"]; |
||
500 | } else { |
||
501 | return false; |
||
502 | } |
||
503 | |||
504 | $site_url = mb_substr(rewrite_relative_url($fetch_url, clean($rss->get_link())), 0, 245); |
||
505 | |||
506 | Debug::log("site_url: $site_url", Debug::$LOG_VERBOSE); |
||
507 | Debug::log("feed_title: ".clean($rss->get_title()), Debug::$LOG_VERBOSE); |
||
508 | |||
509 | if ($favicon_needs_check || $force_refetch) { |
||
510 | |||
511 | /* terrible hack: if we crash on floicon shit here, we won't check |
||
512 | * the icon avgcolor again (unless the icon got updated) */ |
||
513 | |||
514 | $favicon_file = ICONS_DIR."/$feed.ico"; |
||
515 | $favicon_modified = @filemtime($favicon_file); |
||
516 | |||
517 | Debug::log("checking favicon...", Debug::$LOG_VERBOSE); |
||
518 | |||
519 | RSSUtils::check_feed_favicon($site_url, $feed); |
||
520 | $favicon_modified_new = @filemtime($favicon_file); |
||
521 | |||
522 | if ($favicon_modified_new > $favicon_modified) { |
||
523 | $favicon_avg_color = ''; |
||
524 | } |
||
525 | |||
526 | $favicon_colorstring = ""; |
||
527 | if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') { |
||
528 | require_once "colors.php"; |
||
529 | |||
530 | $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE |
||
531 | id = ?"); |
||
532 | $sth->execute([$feed]); |
||
533 | |||
534 | $favicon_color = calculate_avg_color($favicon_file); |
||
535 | |||
536 | $favicon_colorstring = ",favicon_avg_color = ".$pdo->quote($favicon_color); |
||
537 | |||
538 | } else if ($favicon_avg_color == 'fail') { |
||
539 | Debug::log("floicon failed on this file, not trying to recalculate avg color", Debug::$LOG_VERBOSE); |
||
540 | } |
||
541 | |||
542 | $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW() |
||
543 | $favicon_colorstring WHERE id = ?"); |
||
544 | $sth->execute([$feed]); |
||
545 | } |
||
546 | |||
547 | Debug::log("loading filters & labels...", Debug::$LOG_VERBOSE); |
||
548 | |||
549 | $filters = RSSUtils::load_filters($feed, $owner_uid); |
||
550 | |||
551 | if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) { |
||
552 | print_r($filters); |
||
553 | } |
||
554 | |||
555 | Debug::log("".count($filters)." filters loaded.", Debug::$LOG_VERBOSE); |
||
556 | |||
557 | $items = $rss->get_items(); |
||
558 | |||
559 | if (!is_array($items)) { |
||
560 | Debug::log("no articles found.", Debug::$LOG_VERBOSE); |
||
561 | |||
562 | $sth = $pdo->prepare("UPDATE ttrss_feeds |
||
563 | SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?"); |
||
564 | $sth->execute([$feed]); |
||
565 | |||
566 | return true; // no articles |
||
567 | } |
||
568 | |||
569 | Debug::log("processing articles...", Debug::$LOG_VERBOSE); |
||
570 | |||
571 | $tstart = time(); |
||
572 | |||
573 | foreach ($items as $item) { |
||
574 | $pdo->beginTransaction(); |
||
575 | |||
576 | if (Debug::get_loglevel() >= 3) { |
||
577 | print_r($item); |
||
578 | } |
||
579 | |||
580 | if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) { |
||
581 | Debug::log("looks like there's too many articles to process at once, breaking out", Debug::$LOG_VERBOSE); |
||
582 | $pdo->commit(); |
||
583 | break; |
||
584 | } |
||
585 | |||
586 | $entry_guid = strip_tags($item->get_id()); |
||
587 | if (!$entry_guid) { |
||
588 | $entry_guid = strip_tags($item->get_link()); |
||
589 | } |
||
590 | if (!$entry_guid) { |
||
591 | $entry_guid = RSSUtils::make_guid_from_title($item->get_title()); |
||
592 | } |
||
593 | |||
594 | if (!$entry_guid) { |
||
595 | $pdo->commit(); |
||
596 | continue; |
||
597 | } |
||
598 | |||
599 | $entry_guid = "$owner_uid,$entry_guid"; |
||
600 | |||
601 | $entry_guid_hashed = 'SHA1:'.sha1($entry_guid); |
||
602 | |||
603 | Debug::log("guid $entry_guid / $entry_guid_hashed", Debug::$LOG_VERBOSE); |
||
604 | |||
605 | $entry_timestamp = (int) $item->get_date(); |
||
606 | |||
607 | Debug::log("orig date: ".$item->get_date(), Debug::$LOG_VERBOSE); |
||
608 | |||
609 | $entry_title = strip_tags($item->get_title()); |
||
610 | |||
611 | $entry_link = rewrite_relative_url($site_url, clean($item->get_link())); |
||
612 | |||
613 | $entry_language = mb_substr(trim($item->get_language()), 0, 2); |
||
614 | |||
615 | Debug::log("title $entry_title", Debug::$LOG_VERBOSE); |
||
616 | Debug::log("link $entry_link", Debug::$LOG_VERBOSE); |
||
617 | Debug::log("language $entry_language", Debug::$LOG_VERBOSE); |
||
618 | |||
619 | if (!$entry_title) { |
||
620 | $entry_title = date("Y-m-d H:i:s", $entry_timestamp); |
||
621 | } |
||
622 | ; |
||
623 | |||
624 | $entry_content = $item->get_content(); |
||
625 | if (!$entry_content) { |
||
626 | $entry_content = $item->get_description(); |
||
627 | } |
||
628 | |||
629 | if (Debug::get_loglevel() >= 3) { |
||
630 | print "content: "; |
||
631 | print htmlspecialchars($entry_content); |
||
632 | print "\n"; |
||
633 | } |
||
634 | |||
635 | $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245); |
||
636 | $num_comments = (int) $item->get_comments_count(); |
||
637 | |||
638 | $entry_author = strip_tags($item->get_author()); |
||
639 | $entry_guid = mb_substr($entry_guid, 0, 245); |
||
640 | |||
641 | Debug::log("author $entry_author", Debug::$LOG_VERBOSE); |
||
642 | Debug::log("looking for tags...", Debug::$LOG_VERBOSE); |
||
643 | |||
644 | $entry_tags = $item->get_categories(); |
||
645 | Debug::log("tags found: ".join(", ", $entry_tags), Debug::$LOG_VERBOSE); |
||
646 | |||
647 | Debug::log("done collecting data.", Debug::$LOG_VERBOSE); |
||
648 | |||
649 | $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries |
||
650 | WHERE guid = ? OR guid = ?"); |
||
651 | $sth->execute([$entry_guid, $entry_guid_hashed]); |
||
652 | |||
653 | if ($row = $sth->fetch()) { |
||
654 | $base_entry_id = $row["id"]; |
||
655 | $entry_stored_hash = $row["content_hash"]; |
||
656 | $article_labels = Article::get_article_labels($base_entry_id, $owner_uid); |
||
657 | |||
658 | $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid); |
||
659 | $entry_tags = array_unique(array_merge($entry_tags, $existing_tags)); |
||
660 | } else { |
||
661 | $base_entry_id = false; |
||
662 | $entry_stored_hash = ""; |
||
663 | $article_labels = array(); |
||
664 | } |
||
665 | |||
666 | $article = array("owner_uid" => $owner_uid, // read only |
||
667 | "guid" => $entry_guid, // read only |
||
668 | "guid_hashed" => $entry_guid_hashed, // read only |
||
669 | "title" => $entry_title, |
||
670 | "content" => $entry_content, |
||
671 | "link" => $entry_link, |
||
672 | "labels" => $article_labels, // current limitation: can add labels to article, can't remove them |
||
673 | "tags" => $entry_tags, |
||
674 | "author" => $entry_author, |
||
675 | "force_catchup" => false, // ugly hack for the time being |
||
676 | "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed |
||
677 | "language" => $entry_language, |
||
678 | "timestamp" => $entry_timestamp, |
||
679 | "num_comments" => $num_comments, |
||
680 | "feed" => array("id" => $feed, |
||
681 | "fetch_url" => $fetch_url, |
||
682 | "site_url" => $site_url, |
||
683 | "cache_images" => $cache_images) |
||
684 | ); |
||
685 | |||
686 | $entry_plugin_data = ""; |
||
687 | $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost); |
||
688 | |||
689 | Debug::log("article hash: $entry_current_hash [stored=$entry_stored_hash]", Debug::$LOG_VERBOSE); |
||
690 | |||
691 | if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) { |
||
692 | Debug::log("stored article seems up to date [IID: $base_entry_id], updating timestamp only", Debug::$LOG_VERBOSE); |
||
693 | |||
694 | // we keep encountering the entry in feeds, so we need to |
||
695 | // update date_updated column so that we don't get horrible |
||
696 | // dupes when the entry gets purged and reinserted again e.g. |
||
697 | // in the case of SLOW SLOW OMG SLOW updating feeds |
||
698 | |||
699 | $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW() |
||
700 | WHERE id = ?"); |
||
701 | $sth->execute([$base_entry_id]); |
||
702 | |||
703 | $pdo->commit(); |
||
704 | continue; |
||
705 | } |
||
706 | |||
707 | Debug::log("hash differs, applying plugin filters:", Debug::$LOG_VERBOSE); |
||
708 | |||
709 | foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) { |
||
710 | Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE); |
||
711 | |||
712 | $start = microtime(true); |
||
713 | $article = $plugin->hook_article_filter($article); |
||
714 | |||
715 | Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE); |
||
716 | |||
717 | $entry_plugin_data .= mb_strtolower(get_class($plugin)).","; |
||
718 | } |
||
719 | |||
720 | if (Debug::get_loglevel() >= 3) { |
||
721 | print "processed content: "; |
||
722 | print htmlspecialchars($article["content"]); |
||
723 | print "\n"; |
||
724 | } |
||
725 | |||
726 | Debug::log("plugin data: $entry_plugin_data", Debug::$LOG_VERBOSE); |
||
727 | |||
728 | // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077 |
||
729 | if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") { |
||
730 | foreach ($article as $k => $v) { |
||
731 | // i guess we'll have to take the risk of 4byte unicode labels & tags here |
||
732 | if (is_string($article[$k])) { |
||
733 | $article[$k] = RSSUtils::strip_utf8mb4($v); |
||
734 | } |
||
735 | } |
||
736 | } |
||
737 | |||
738 | /* Collect article tags here so we could filter by them: */ |
||
739 | |||
740 | $matched_rules = []; |
||
741 | $matched_filters = []; |
||
742 | |||
743 | $article_filters = RSSUtils::get_article_filters($filters, $article["title"], |
||
744 | $article["content"], $article["link"], $article["author"], |
||
745 | $article["tags"], $matched_rules, $matched_filters); |
||
746 | |||
747 | // $article_filters should be renamed to something like $filter_actions; actual filter objects are in $matched_filters |
||
748 | foreach ($pluginhost->get_hooks(PluginHost::HOOK_FILTER_TRIGGERED) as $plugin) { |
||
749 | $plugin->hook_filter_triggered($feed, $owner_uid, $article, $matched_filters, $matched_rules, $article_filters); |
||
750 | } |
||
751 | |||
752 | $matched_filter_ids = array_map(function($f) { return $f['id']; }, $matched_filters); |
||
753 | |||
754 | if (count($matched_filter_ids) > 0) { |
||
755 | $filter_ids_qmarks = arr_qmarks($matched_filter_ids); |
||
756 | |||
757 | $fsth = $pdo->prepare("UPDATE ttrss_filters2 SET last_triggered = NOW() WHERE |
||
758 | id IN ($filter_ids_qmarks) AND owner_uid = ?"); |
||
759 | |||
760 | $fsth->execute(array_merge($matched_filter_ids, [$owner_uid])); |
||
761 | } |
||
762 | |||
763 | if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) { |
||
764 | Debug::log("matched filters: ", Debug::$LOG_VERBOSE); |
||
765 | |||
766 | if (count($matched_filters != 0)) { |
||
767 | print_r($matched_filters); |
||
768 | } |
||
769 | |||
770 | Debug::log("matched filter rules: ", Debug::$LOG_VERBOSE); |
||
771 | |||
772 | if (count($matched_rules) != 0) { |
||
773 | print_r($matched_rules); |
||
774 | } |
||
775 | |||
776 | Debug::log("filter actions: ", Debug::$LOG_VERBOSE); |
||
777 | |||
778 | if (count($article_filters) != 0) { |
||
779 | print_r($article_filters); |
||
780 | } |
||
781 | } |
||
782 | |||
783 | $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin"); |
||
784 | $plugin_filter_actions = $pluginhost->get_filter_actions(); |
||
785 | |||
786 | if (count($plugin_filter_names) > 0) { |
||
787 | Debug::log("applying plugin filter actions...", Debug::$LOG_VERBOSE); |
||
788 | |||
789 | foreach ($plugin_filter_names as $pfn) { |
||
790 | list($pfclass, $pfaction) = explode(":", $pfn["param"]); |
||
791 | |||
792 | if (isset($plugin_filter_actions[$pfclass])) { |
||
793 | $plugin = $pluginhost->get_plugin($pfclass); |
||
794 | |||
795 | Debug::log("... $pfclass: $pfaction", Debug::$LOG_VERBOSE); |
||
796 | |||
797 | if ($plugin) { |
||
798 | $start = microtime(true); |
||
799 | $article = $plugin->hook_article_filter_action($article, $pfaction); |
||
800 | |||
801 | Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE); |
||
802 | } else { |
||
803 | Debug::log("??? $pfclass: plugin object not found.", Debug::$LOG_VERBOSE); |
||
804 | } |
||
805 | } else { |
||
806 | Debug::log("??? $pfclass: filter plugin not registered.", Debug::$LOG_VERBOSE); |
||
807 | } |
||
808 | } |
||
809 | } |
||
810 | |||
811 | $entry_tags = $article["tags"]; |
||
812 | $entry_title = strip_tags($article["title"]); |
||
813 | $entry_author = mb_substr(strip_tags($article["author"]), 0, 245); |
||
814 | $entry_link = strip_tags($article["link"]); |
||
815 | $entry_content = $article["content"]; // escaped below |
||
816 | $entry_force_catchup = $article["force_catchup"]; |
||
817 | $article_labels = $article["labels"]; |
||
818 | $entry_score_modifier = (int) $article["score_modifier"]; |
||
819 | $entry_language = $article["language"]; |
||
820 | $entry_timestamp = $article["timestamp"]; |
||
821 | $num_comments = $article["num_comments"]; |
||
822 | |||
823 | if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) { |
||
824 | $entry_timestamp = time(); |
||
825 | } |
||
826 | |||
827 | $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); |
||
828 | |||
829 | Debug::log("date $entry_timestamp [$entry_timestamp_fmt]", Debug::$LOG_VERBOSE); |
||
830 | Debug::log("num_comments: $num_comments", Debug::$LOG_VERBOSE); |
||
831 | |||
832 | if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) { |
||
833 | Debug::log("article labels:", Debug::$LOG_VERBOSE); |
||
834 | |||
835 | if (count($article_labels) != 0) { |
||
836 | print_r($article_labels); |
||
837 | } |
||
838 | } |
||
839 | |||
840 | Debug::log("force catchup: $entry_force_catchup", Debug::$LOG_VERBOSE); |
||
841 | |||
842 | if ($cache_images) { |
||
843 | RSSUtils::cache_media($entry_content, $site_url); |
||
844 | } |
||
845 | |||
846 | $csth = $pdo->prepare("SELECT id FROM ttrss_entries |
||
847 | WHERE guid = ? OR guid = ?"); |
||
848 | $csth->execute([$entry_guid, $entry_guid_hashed]); |
||
849 | |||
850 | if (!$row = $csth->fetch()) { |
||
851 | |||
852 | Debug::log("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", Debug::$LOG_VERBOSE); |
||
853 | |||
854 | // base post entry does not exist, create it |
||
855 | |||
856 | $usth = $pdo->prepare( |
||
857 | "INSERT INTO ttrss_entries |
||
858 | (title, |
||
859 | guid, |
||
860 | link, |
||
861 | updated, |
||
862 | content, |
||
863 | content_hash, |
||
864 | no_orig_date, |
||
865 | date_updated, |
||
866 | date_entered, |
||
867 | comments, |
||
868 | num_comments, |
||
869 | plugin_data, |
||
870 | lang, |
||
871 | author) |
||
872 | VALUES |
||
873 | (?, ?, ?, ?, ?, ?, |
||
874 | false, |
||
875 | NOW(), |
||
876 | ?, ?, ?, ?, ?, ?)"); |
||
877 | |||
878 | $usth->execute([$entry_title, |
||
879 | $entry_guid_hashed, |
||
880 | $entry_link, |
||
881 | $entry_timestamp_fmt, |
||
882 | "$entry_content", |
||
883 | $entry_current_hash, |
||
884 | $date_feed_processed, |
||
885 | $entry_comments, |
||
886 | (int) $num_comments, |
||
887 | $entry_plugin_data, |
||
888 | "$entry_language", |
||
889 | "$entry_author"]); |
||
890 | |||
891 | } |
||
892 | |||
893 | $csth->execute([$entry_guid, $entry_guid_hashed]); |
||
894 | |||
895 | $entry_ref_id = 0; |
||
896 | $entry_int_id = 0; |
||
897 | |||
898 | if ($row = $csth->fetch()) { |
||
899 | |||
900 | Debug::log("base guid found, checking for user record", Debug::$LOG_VERBOSE); |
||
901 | |||
902 | $ref_id = $row['id']; |
||
903 | $entry_ref_id = $ref_id; |
||
904 | |||
905 | if (RSSUtils::find_article_filter($article_filters, "filter")) { |
||
906 | Debug::log("article is filtered out, nothing to do.", Debug::$LOG_VERBOSE); |
||
907 | $pdo->commit(); |
||
908 | continue; |
||
909 | } |
||
910 | |||
911 | $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier; |
||
912 | |||
913 | Debug::log("initial score: $score [including plugin modifier: $entry_score_modifier]", Debug::$LOG_VERBOSE); |
||
914 | |||
915 | // check for user post link to main table |
||
916 | |||
917 | $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE |
||
918 | ref_id = ? AND owner_uid = ?"); |
||
919 | $sth->execute([$ref_id, $owner_uid]); |
||
920 | |||
921 | // okay it doesn't exist - create user entry |
||
922 | if ($row = $sth->fetch()) { |
||
923 | $entry_ref_id = $row["ref_id"]; |
||
924 | $entry_int_id = $row["int_id"]; |
||
925 | |||
926 | Debug::log("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", Debug::$LOG_VERBOSE); |
||
927 | } else { |
||
928 | |||
929 | Debug::log("user record not found, creating...", Debug::$LOG_VERBOSE); |
||
930 | |||
931 | if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) { |
||
932 | $unread = 1; |
||
933 | $last_read_qpart = null; |
||
934 | } else { |
||
935 | $unread = 0; |
||
936 | $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted |
||
937 | } |
||
938 | |||
939 | if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) { |
||
940 | $marked = 1; |
||
941 | } else { |
||
942 | $marked = 0; |
||
943 | } |
||
944 | |||
945 | if (RSSUtils::find_article_filter($article_filters, 'publish')) { |
||
946 | $published = 1; |
||
947 | } else { |
||
948 | $published = 0; |
||
949 | } |
||
950 | |||
951 | $last_marked = ($marked == 1) ? 'NOW()' : 'NULL'; |
||
952 | $last_published = ($published == 1) ? 'NOW()' : 'NULL'; |
||
953 | |||
954 | $sth = $pdo->prepare( |
||
955 | "INSERT INTO ttrss_user_entries |
||
956 | (ref_id, owner_uid, feed_id, unread, last_read, marked, |
||
957 | published, score, tag_cache, label_cache, uuid, |
||
958 | last_marked, last_published) |
||
959 | VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")"); |
||
960 | |||
961 | $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked, |
||
962 | $published, $score]); |
||
963 | |||
964 | $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE |
||
965 | ref_id = ? AND owner_uid = ? AND |
||
966 | feed_id = ? LIMIT 1"); |
||
967 | |||
968 | $sth->execute([$ref_id, $owner_uid, $feed]); |
||
969 | |||
970 | if ($row = $sth->fetch()) { |
||
971 | $entry_int_id = $row['int_id']; |
||
972 | } |
||
973 | } |
||
974 | |||
975 | Debug::log("resulting RID: $entry_ref_id, IID: $entry_int_id", Debug::$LOG_VERBOSE); |
||
976 | |||
977 | if (DB_TYPE == "pgsql") { |
||
978 | $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),"; |
||
979 | } else { |
||
980 | $tsvector_qpart = ""; |
||
981 | } |
||
982 | |||
983 | $sth = $pdo->prepare("UPDATE ttrss_entries |
||
984 | SET title = :title, |
||
985 | $tsvector_qpart |
||
986 | content = :content, |
||
987 | content_hash = :content_hash, |
||
988 | updated = :updated, |
||
989 | date_updated = NOW(), |
||
990 | num_comments = :num_comments, |
||
991 | plugin_data = :plugin_data, |
||
992 | author = :author, |
||
993 | lang = :lang |
||
994 | WHERE id = :id"); |
||
995 | |||
996 | $params = [":title" => $entry_title, |
||
997 | ":content" => "$entry_content", |
||
998 | ":content_hash" => $entry_current_hash, |
||
999 | ":updated" => $entry_timestamp_fmt, |
||
1000 | ":num_comments" => (int) $num_comments, |
||
1001 | ":plugin_data" => $entry_plugin_data, |
||
1002 | ":author" => "$entry_author", |
||
1003 | ":lang" => $entry_language, |
||
1004 | ":id" => $ref_id]; |
||
1005 | |||
1006 | if (DB_TYPE == "pgsql") { |
||
1007 | $params[":ts_lang"] = $feed_language; |
||
1008 | $params[":ts_content"] = mb_substr(strip_tags($entry_title." ".$entry_content), 0, 900000); |
||
1009 | } |
||
1010 | |||
1011 | $sth->execute($params); |
||
1012 | |||
1013 | // update aux data |
||
1014 | $sth = $pdo->prepare("UPDATE ttrss_user_entries |
||
1015 | SET score = ? WHERE ref_id = ?"); |
||
1016 | $sth->execute([$score, $ref_id]); |
||
1017 | |||
1018 | if ($mark_unread_on_update && |
||
1019 | !$entry_force_catchup && |
||
1020 | !RSSUtils::find_article_filter($article_filters, 'catchup')) { |
||
1021 | |||
1022 | Debug::log("article updated, marking unread as requested.", Debug::$LOG_VERBOSE); |
||
1023 | |||
1024 | $sth = $pdo->prepare("UPDATE ttrss_user_entries |
||
1025 | SET last_read = null, unread = true WHERE ref_id = ?"); |
||
1026 | $sth->execute([$ref_id]); |
||
1027 | } else { |
||
1028 | Debug::log("article updated, but we're forbidden to mark it unread.", Debug::$LOG_VERBOSE); |
||
1029 | } |
||
1030 | } |
||
1031 | |||
1032 | Debug::log("assigning labels [other]...", Debug::$LOG_VERBOSE); |
||
1033 | |||
1034 | foreach ($article_labels as $label) { |
||
1035 | Labels::add_article($entry_ref_id, $label[1], $owner_uid); |
||
1036 | } |
||
1037 | |||
1038 | Debug::log("assigning labels [filters]...", Debug::$LOG_VERBOSE); |
||
1039 | |||
1040 | RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters, |
||
1041 | $owner_uid, $article_labels); |
||
1042 | |||
1043 | Debug::log("looking for enclosures...", Debug::$LOG_VERBOSE); |
||
1044 | |||
1045 | // enclosures |
||
1046 | |||
1047 | $enclosures = array(); |
||
1048 | |||
1049 | $encs = $item->get_enclosures(); |
||
1050 | |||
1051 | if (is_array($encs)) { |
||
1052 | foreach ($encs as $e) { |
||
1053 | $e_item = array( |
||
1054 | rewrite_relative_url($site_url, $e->link), |
||
1055 | $e->type, $e->length, $e->title, $e->width, $e->height); |
||
1056 | |||
1057 | // Yet another episode of "mysql utf8_general_ci is gimped" |
||
1058 | if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") { |
||
1059 | for ($i = 0; $i < count($e_item); $i++) { |
||
1060 | if (is_string($e_item[$i])) { |
||
1061 | $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]); |
||
1062 | } |
||
1063 | } |
||
1064 | } |
||
1065 | |||
1066 | array_push($enclosures, $e_item); |
||
1067 | } |
||
1068 | } |
||
1069 | |||
1070 | if ($cache_images) { |
||
1071 | RSSUtils::cache_enclosures($enclosures, $site_url); |
||
1072 | } |
||
1073 | |||
1074 | if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) { |
||
1075 | Debug::log("article enclosures:", Debug::$LOG_VERBOSE); |
||
1076 | print_r($enclosures); |
||
1077 | } |
||
1078 | |||
1079 | $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures |
||
1080 | WHERE content_url = ? AND content_type = ? AND post_id = ?"); |
||
1081 | |||
1082 | $usth = $pdo->prepare("INSERT INTO ttrss_enclosures |
||
1083 | (content_url, content_type, title, duration, post_id, width, height) VALUES |
||
1084 | (?, ?, ?, ?, ?, ?, ?)"); |
||
1085 | |||
1086 | foreach ($enclosures as $enc) { |
||
1087 | $enc_url = $enc[0]; |
||
1088 | $enc_type = $enc[1]; |
||
1089 | $enc_dur = (int) $enc[2]; |
||
1090 | $enc_title = $enc[3]; |
||
1091 | $enc_width = intval($enc[4]); |
||
1092 | $enc_height = intval($enc[5]); |
||
1093 | |||
1094 | $esth->execute([$enc_url, $enc_type, $entry_ref_id]); |
||
1095 | |||
1096 | if (!$esth->fetch()) { |
||
1097 | $usth->execute([$enc_url, $enc_type, (string) $enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]); |
||
1098 | } |
||
1099 | } |
||
1100 | |||
1101 | // check for manual tags (we have to do it here since they're loaded from filters) |
||
1102 | |||
1103 | foreach ($article_filters as $f) { |
||
1104 | if ($f["type"] == "tag") { |
||
1105 | |||
1106 | $manual_tags = trim_array(explode(",", $f["param"])); |
||
1107 | |||
1108 | foreach ($manual_tags as $tag) { |
||
1109 | array_push($entry_tags, $tag); |
||
1110 | } |
||
1111 | } |
||
1112 | } |
||
1113 | |||
1114 | // Skip boring tags |
||
1115 | |||
1116 | $boring_tags = trim_array(explode(",", mb_strtolower(get_pref( |
||
1117 | 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8'))); |
||
1118 | |||
1119 | $filtered_tags = array(); |
||
1120 | $tags_to_cache = array(); |
||
1121 | |||
1122 | foreach ($entry_tags as $tag) { |
||
1123 | if (array_search($tag, $boring_tags) === false) { |
||
1124 | array_push($filtered_tags, $tag); |
||
1125 | } |
||
1126 | } |
||
1127 | |||
1128 | $filtered_tags = array_unique($filtered_tags); |
||
1129 | |||
1130 | if (Debug::get_loglevel() >= Debug::$LOG_VERBOSE) { |
||
1131 | Debug::log("filtered tags: ".implode(", ", $filtered_tags), Debug::$LOG_VERBOSE); |
||
1132 | |||
1133 | } |
||
1134 | |||
1135 | // Save article tags in the database |
||
1136 | |||
1137 | if (count($filtered_tags) > 0) { |
||
1138 | |||
1139 | $tsth = $pdo->prepare("SELECT id FROM ttrss_tags |
||
1140 | WHERE tag_name = ? AND post_int_id = ? AND |
||
1141 | owner_uid = ? LIMIT 1"); |
||
1142 | |||
1143 | $usth = $pdo->prepare("INSERT INTO ttrss_tags |
||
1144 | (owner_uid,tag_name,post_int_id) |
||
1145 | VALUES (?, ?, ?)"); |
||
1146 | |||
1147 | $filtered_tags = FeedItem_Common::normalize_categories($filtered_tags); |
||
1148 | |||
1149 | foreach ($filtered_tags as $tag) { |
||
1150 | $tsth->execute([$tag, $entry_int_id, $owner_uid]); |
||
1151 | |||
1152 | if (!$tsth->fetch()) { |
||
1153 | $usth->execute([$owner_uid, $tag, $entry_int_id]); |
||
1154 | } |
||
1155 | |||
1156 | array_push($tags_to_cache, $tag); |
||
1157 | } |
||
1158 | |||
1159 | /* update the cache */ |
||
1160 | $tags_str = join(",", $tags_to_cache); |
||
1161 | |||
1162 | $tsth = $pdo->prepare("UPDATE ttrss_user_entries |
||
1163 | SET tag_cache = ? WHERE ref_id = ? |
||
1164 | AND owner_uid = ?"); |
||
1165 | $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]); |
||
1166 | } |
||
1167 | |||
1168 | Debug::log("article processed", Debug::$LOG_VERBOSE); |
||
1169 | |||
1170 | $pdo->commit(); |
||
1171 | } |
||
1172 | |||
1173 | Debug::log("purging feed...", Debug::$LOG_VERBOSE); |
||
1174 | |||
1175 | Feeds::purge_feed($feed, 0); |
||
1176 | |||
1177 | $sth = $pdo->prepare("UPDATE ttrss_feeds |
||
1178 | SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?"); |
||
1179 | $sth->execute([$feed]); |
||
1180 | |||
1181 | } else { |
||
1182 | |||
1183 | $error_msg = mb_substr($rss->error(), 0, 245); |
||
1184 | |||
1185 | Debug::log("fetch error: $error_msg", Debug::$LOG_VERBOSE); |
||
1186 | |||
1187 | if (count($rss->errors()) > 1) { |
||
1188 | foreach ($rss->errors() as $error) { |
||
1189 | Debug::log("+ $error", Debug::$LOG_VERBOSE); |
||
1190 | } |
||
1191 | } |
||
1192 | |||
1193 | $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?, |
||
1194 | last_updated = NOW(), last_unconditional = NOW() WHERE id = ?"); |
||
1195 | $sth->execute([$error_msg, $feed]); |
||
1196 | |||
1197 | unset($rss); |
||
1198 | |||
1199 | Debug::log("update failed.", Debug::$LOG_VERBOSE); |
||
1200 | return false; |
||
1201 | } |
||
1202 | |||
1203 | Debug::log("update done.", Debug::$LOG_VERBOSE); |
||
1204 | |||
1205 | return true; |
||
1206 | } |
||
1755 |