Issues (1270)

classes/rssutils.php (1 issue)

1
<?php
2
class RSSUtils {
3
    public static function calculate_article_hash($article, $pluginhost) {
4
        $tmp = "";
5
6
        foreach ($article as $k => $v) {
7
            if ($k != "feed" && isset($v)) {
8
                $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10
                $tmp .= sha1("$k:".sha1($x));
11
            }
12
        }
13
14
        return sha1(implode(",", $pluginhost->get_plugin_names()).$tmp);
15
    }
16
17
    // Strips utf8mb4 characters (i.e. emoji) for mysql
18
    public static function strip_utf8mb4($str) {
19
        return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
20
    }
21
22
    public static function cleanup_feed_browser() {
23
        $pdo = Db::pdo();
24
        $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
25
    }
26
27
    public static function update_daemon_common($limit = DAEMON_FEED_LIMIT) {
28
        $schema_version = get_schema_version();
29
30
        if ($schema_version != SCHEMA_VERSION) {
31
            die("Schema version is wrong, please upgrade the database.\n");
32
        }
33
34
        $pdo = Db::pdo();
35
36
        if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
37
            if (DB_TYPE == "pgsql") {
38
                $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
39
            } else {
40
                $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
41
            }
42
        } else {
43
            $login_thresh_qpart = "";
44
        }
45
46
        if (DB_TYPE == "pgsql") {
47
            $update_limit_qpart = "AND ((
48
					ttrss_feeds.update_interval = 0
49
					AND ttrss_user_prefs.value != '-1'
50
					AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
51
				) OR (
52
					ttrss_feeds.update_interval > 0
53
					AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
54
				) OR (ttrss_feeds.last_updated IS NULL
55
					AND ttrss_user_prefs.value != '-1')
56
				OR (last_updated = '1970-01-01 00:00:00'
57
					AND ttrss_user_prefs.value != '-1'))";
58
        } else {
59
            $update_limit_qpart = "AND ((
60
					ttrss_feeds.update_interval = 0
61
					AND ttrss_user_prefs.value != '-1'
62
					AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
63
				) OR (
64
					ttrss_feeds.update_interval > 0
65
					AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
66
				) OR (ttrss_feeds.last_updated IS NULL
67
					AND ttrss_user_prefs.value != '-1')
68
				OR (last_updated = '1970-01-01 00:00:00'
69
					AND ttrss_user_prefs.value != '-1'))";
70
        }
71
72
        // Test if feed is currently being updated by another process.
73
        if (DB_TYPE == "pgsql") {
74
            $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
75
        } else {
76
            $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
77
        }
78
79
        $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
80
81
        // Update the least recently updated feeds first
82
        $query_order = "ORDER BY last_updated";
83
        if (DB_TYPE == "pgsql") {
84
            $query_order .= " NULLS FIRST";
85
        }
86
87
        $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
88
			FROM
89
				ttrss_feeds, ttrss_users, ttrss_user_prefs
90
			WHERE
91
				ttrss_feeds.owner_uid = ttrss_users.id
92
				AND ttrss_user_prefs.profile IS NULL
93
				AND ttrss_users.id = ttrss_user_prefs.owner_uid
94
				AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
95
				$login_thresh_qpart $update_limit_qpart
96
				$updstart_thresh_qpart
97
				$query_order $query_limit";
98
99
        $res = $pdo->query($query);
100
101
        $feeds_to_update = array();
102
        while ($line = $res->fetch()) {
103
            array_push($feeds_to_update, $line['feed_url']);
104
        }
105
106
        Debug::log(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
107
108
        // Update last_update_started before actually starting the batch
109
        // in order to minimize collision risk for parallel daemon tasks
110
        if (count($feeds_to_update) > 0) {
111
            $feeds_qmarks = arr_qmarks($feeds_to_update);
112
113
            $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
114
				WHERE feed_url IN ($feeds_qmarks)");
115
            $tmph->execute($feeds_to_update);
116
        }
117
118
        $nf = 0;
119
        $bstarted = microtime(true);
120
121
        $batch_owners = array();
122
123
        // since we have the data cached, we can deal with other feeds with the same url
124
        $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
125
			FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
126
				ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
127
				ttrss_users.id = ttrss_user_prefs.owner_uid AND
128
				ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
129
				ttrss_user_prefs.profile IS NULL AND
130
				feed_url = ?
131
				$update_limit_qpart
132
				$login_thresh_qpart
133
			ORDER BY ttrss_feeds.id $query_limit");
134
135
        foreach ($feeds_to_update as $feed) {
136
            Debug::log("Base feed: $feed");
137
138
            $usth->execute([$feed]);
139
            //update_rss_feed($line["id"], true);
140
141
            if ($tline = $usth->fetch()) {
142
                Debug::log(" => ".$tline["last_updated"].", ".$tline["id"]." ".$tline["owner_uid"]);
143
144
                if (array_search($tline["owner_uid"], $batch_owners) === false) {
145
                                    array_push($batch_owners, $tline["owner_uid"]);
146
                }
147
148
                $fstarted = microtime(true);
149
150
                try {
151
                    RSSUtils::update_rss_feed($tline["id"], true, false);
152
                } catch (PDOException $e) {
153
                    Logger::get()->log_error(E_USER_NOTICE, $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString());
154
155
                    try {
156
                        $pdo->rollback();
157
                    } catch (PDOException $e) {
158
                        // it doesn't matter if there wasn't actually anything to rollback, PDO Exception can be
159
                        // thrown outside of an active transaction during feed update
160
                    }
161
                }
162
163
                Debug::log(sprintf("    %.4f (sec)", microtime(true) - $fstarted));
164
165
                ++$nf;
166
            }
167
        }
168
169
        if ($nf > 0) {
170
            Debug::log(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
171
                microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
172
        }
173
174
        foreach ($batch_owners as $owner_uid) {
175
            Debug::log("Running housekeeping tasks for user $owner_uid...");
176
177
            RSSUtils::housekeeping_user($owner_uid);
178
        }
179
180
        // Send feed digests by email if needed.
181
        Digest::send_headlines_digests();
182
183
        return $nf;
184
    }
185
186
    // this is used when subscribing
187
    public static function set_basic_feed_info($feed) {
188
189
        $pdo = Db::pdo();
190
191
        $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login
192
				FROM ttrss_feeds WHERE id = ?");
193
        $sth->execute([$feed]);
194
195
        if ($row = $sth->fetch()) {
196
197
            $owner_uid = $row["owner_uid"];
198
            $auth_login = $row["auth_login"];
199
            $auth_pass = $row["auth_pass"];
200
            $fetch_url = $row["feed_url"];
201
202
            $pluginhost = new PluginHost();
203
            $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
204
205
            $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
206
            $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
207
            $pluginhost->load_data();
208
209
            $basic_info = array();
210
            foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
211
                $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
212
            }
213
214
            if (!$basic_info) {
215
                $feed_data = fetch_file_contents($fetch_url, false,
216
                    $auth_login, $auth_pass, false,
217
                    FEED_FETCH_TIMEOUT,
218
                    0);
219
220
                $feed_data = trim($feed_data);
221
222
                $rss = new FeedParser($feed_data);
223
                $rss->init();
224
225
                if (!$rss->error()) {
226
                    $basic_info = array(
227
                        'title' => mb_substr(clean($rss->get_title()), 0, 199),
228
                        'site_url' => mb_substr(rewrite_relative_url($fetch_url, clean($rss->get_link())), 0, 245)
229
                    );
230
                }
231
            }
232
233
            if ($basic_info && is_array($basic_info)) {
234
                $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
235
                $sth->execute([$feed]);
236
237
                if ($row = $sth->fetch()) {
238
239
                    $registered_title = $row["title"];
240
                    $orig_site_url = $row["site_url"];
241
242
                    if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
243
244
                        $sth = $pdo->prepare("UPDATE ttrss_feeds SET
245
							title = ? WHERE id = ?");
246
                        $sth->execute([$basic_info['title'], $feed]);
247
                    }
248
249
                    if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
250
                        $sth = $pdo->prepare("UPDATE ttrss_feeds SET
251
							site_url = ? WHERE id = ?");
252
                        $sth->execute([$basic_info['site_url'], $feed]);
253
                    }
254
255
                }
256
            }
257
        }
258
    }
259
260
    /**
261
     * @SuppressWarnings(PHPMD.UnusedFormalParameter)
262
     */
263
    public static function update_rss_feed($feed, $no_cache = false) {
264
265
        reset_fetch_domain_quota();
266
267
        Debug::log("start", Debug::$LOG_VERBOSE);
268
269
        $pdo = Db::pdo();
270
271
        $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
272
        $sth->execute([$feed]);
273
274
        if (!$row = $sth->fetch()) {
275
            Debug::log("feed $feed not found, skipping.");
276
            user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
277
            return false;
278
        }
279
280
        $title = $row["title"];
281
        $site_url = $row["site_url"];
282
283
        // feed was batch-subscribed or something, we need to get basic info
284
        // this is not optimal currently as it fetches stuff separately TODO: optimize
285
        if ($title == "[Unknown]" || !$title || !$site_url) {
286
            Debug::log("setting basic feed info for $feed [$title, $site_url]...");
287
            RSSUtils::set_basic_feed_info($feed);
288
        }
289
290
        $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
291
			feed_url,auth_pass,cache_images,
292
			mark_unread_on_update, owner_uid,
293
			auth_pass_encrypted, feed_language,
294
			last_modified,
295
			".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
296
			FROM ttrss_feeds WHERE id = ?");
297
        $sth->execute([$feed]);
298
299
        if ($row = $sth->fetch()) {
300
301
            $owner_uid = $row["owner_uid"];
302
            $mark_unread_on_update = $row["mark_unread_on_update"];
303
304
            $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
305
				WHERE id = ?");
306
            $sth->execute([$feed]);
307
308
            $auth_login = $row["auth_login"];
309
            $auth_pass = $row["auth_pass"];
310
            $stored_last_modified = $row["last_modified"];
311
            $last_unconditional = $row["last_unconditional"];
312
            $cache_images = $row["cache_images"];
313
            $fetch_url = $row["feed_url"];
314
315
            $feed_language = mb_strtolower($row["feed_language"]);
316
317
            if (!$feed_language) {
318
                            $feed_language = mb_strtolower(get_pref('DEFAULT_SEARCH_LANGUAGE', $owner_uid));
319
            }
320
321
            if (!$feed_language) {
322
                            $feed_language = 'simple';
323
            }
324
325
        } else {
326
            return false;
327
        }
328
329
        $date_feed_processed = date('Y-m-d H:i');
330
331
        $cache_filename = CACHE_DIR."/feeds/".sha1($fetch_url).".xml";
332
333
        $pluginhost = new PluginHost();
334
        $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
335
336
        $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
337
        $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
338
        $pluginhost->load_data();
339
340
        $rss_hash = false;
341
342
        $force_refetch = isset($_REQUEST["force_refetch"]);
343
        $feed_data = "";
344
345
        Debug::log("running HOOK_FETCH_FEED handlers...", Debug::$LOG_VERBOSE);
346
347
        foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
348
            Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE);
349
            $start = microtime(true);
350
            $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
351
            Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE);
352
        }
353
354
        if ($feed_data) {
355
            Debug::log("feed data has been modified by a plugin.", Debug::$LOG_VERBOSE);
356
        } else {
357
            Debug::log("feed data has not been modified by a plugin.", Debug::$LOG_VERBOSE);
358
        }
359
360
        // try cache
361
        if (!$feed_data &&
362
            file_exists($cache_filename) &&
363
            is_readable($cache_filename) &&
364
            !$auth_login && !$auth_pass &&
365
            filemtime($cache_filename) > time() - 30) {
366
367
            Debug::log("using local cache [$cache_filename].", Debug::$LOG_VERBOSE);
368
369
            @$feed_data = file_get_contents($cache_filename);
370
371
            if ($feed_data) {
372
                $rss_hash = sha1($feed_data);
373
            }
374
375
        } else {
376
            Debug::log("local cache will not be used for this feed", Debug::$LOG_VERBOSE);
377
        }
378
379
        global $fetch_last_modified;
380
381
        // fetch feed from source
382
        if (!$feed_data) {
383
            Debug::log("last unconditional update request: $last_unconditional", Debug::$LOG_VERBOSE);
384
385
            if (ini_get("open_basedir") && function_exists("curl_init")) {
386
                Debug::log("not using CURL due to open_basedir restrictions", Debug::$LOG_VERBOSE);
387
            }
388
389
            if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
390
                Debug::log("maximum allowed interval for conditional requests exceeded, forcing refetch", Debug::$LOG_VERBOSE);
391
392
                $force_refetch = true;
393
            } else {
394
                Debug::log("stored last modified for conditional request: $stored_last_modified", Debug::$LOG_VERBOSE);
395
            }
396
397
            Debug::log("fetching [$fetch_url] (force_refetch: $force_refetch)...", Debug::$LOG_VERBOSE);
398
399
            $feed_data = fetch_file_contents([
400
                "url" => $fetch_url,
401
                "login" => $auth_login,
402
                "pass" => $auth_pass,
403
                "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
404
                "last_modified" => $force_refetch ? "" : $stored_last_modified
405
            ]);
406
407
            $feed_data = trim($feed_data);
408
409
            Debug::log("fetch done.", Debug::$LOG_VERBOSE);
410
            Debug::log("source last modified: ".$fetch_last_modified, Debug::$LOG_VERBOSE);
411
412
            if ($feed_data && $fetch_last_modified != $stored_last_modified) {
413
                $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
414
                $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
415
            }
416
417
            // cache vanilla feed data for re-use
418
            if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR."/feeds")) {
419
                $new_rss_hash = sha1($feed_data);
420
421
                if ($new_rss_hash != $rss_hash) {
422
                    Debug::log("saving $cache_filename", Debug::$LOG_VERBOSE);
423
                    @file_put_contents($cache_filename, $feed_data);
424
                }
425
            }
426
        }
427
428
        if (!$feed_data) {
429
            global $fetch_last_error;
430
            global $fetch_last_error_code;
431
432
            Debug::log("unable to fetch: $fetch_last_error [$fetch_last_error_code]", Debug::$LOG_VERBOSE);
433
434
            // If-Modified-Since
435
            if ($fetch_last_error_code != 304) {
436
                $error_message = $fetch_last_error;
437
            } else {
438
                Debug::log("source claims data not modified, nothing to do.", Debug::$LOG_VERBOSE);
439
                $error_message = "";
440
            }
441
442
            $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
443
					last_updated = NOW() WHERE id = ?");
444
            $sth->execute([$error_message, $feed]);
445
446
            return;
447
        }
448
449
        Debug::log("running HOOK_FEED_FETCHED handlers...", Debug::$LOG_VERBOSE);
450
        $feed_data_checksum = md5($feed_data);
451
452
        foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
453
            Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE);
454
            $start = microtime(true);
455
            $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
456
            Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE);
457
        }
458
459
        if (md5($feed_data) != $feed_data_checksum) {
460
            Debug::log("feed data has been modified by a plugin.", Debug::$LOG_VERBOSE);
461
        } else {
462
            Debug::log("feed data has not been modified by a plugin.", Debug::$LOG_VERBOSE);
463
        }
464
465
        $rss = new FeedParser($feed_data);
466
        $rss->init();
467
468
        if (!$rss->error()) {
469
470
            Debug::log("running HOOK_FEED_PARSED handlers...", Debug::$LOG_VERBOSE);
471
472
            // We use local pluginhost here because we need to load different per-user feed plugins
473
474
            foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_PARSED) as $plugin) {
475
                Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE);
476
                $start = microtime(true);
477
                $plugin->hook_feed_parsed($rss);
478
                Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE);
479
            }
480
481
            Debug::log("language: $feed_language", Debug::$LOG_VERBOSE);
482
            Debug::log("processing feed data...", Debug::$LOG_VERBOSE);
483
484
            if (DB_TYPE == "pgsql") {
485
                $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
486
            } else {
487
                $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
488
            }
489
490
            $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
491
				(favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
492
						favicon_needs_check
493
				FROM ttrss_feeds WHERE id = ?");
494
            $sth->execute([$feed]);
495
496
            if ($row = $sth->fetch()) {
497
                $favicon_needs_check = $row["favicon_needs_check"];
498
                $favicon_avg_color = $row["favicon_avg_color"];
499
                $owner_uid = $row["owner_uid"];
500
            } else {
501
                return false;
502
            }
503
504
            $site_url = mb_substr(rewrite_relative_url($fetch_url, clean($rss->get_link())), 0, 245);
505
506
            Debug::log("site_url: $site_url", Debug::$LOG_VERBOSE);
507
            Debug::log("feed_title: ".clean($rss->get_title()), Debug::$LOG_VERBOSE);
508
509
            if ($favicon_needs_check || $force_refetch) {
510
511
                /* terrible hack: if we crash on floicon shit here, we won't check
512
				 * the icon avgcolor again (unless the icon got updated) */
513
514
                $favicon_file = ICONS_DIR."/$feed.ico";
515
                $favicon_modified = @filemtime($favicon_file);
516
517
                Debug::log("checking favicon...", Debug::$LOG_VERBOSE);
518
519
                RSSUtils::check_feed_favicon($site_url, $feed);
520
                $favicon_modified_new = @filemtime($favicon_file);
521
522
                if ($favicon_modified_new > $favicon_modified) {
523
                                    $favicon_avg_color = '';
524
                }
525
526
                $favicon_colorstring = "";
527
                if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
528
                    require_once "colors.php";
529
530
                    $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
531
							id = ?");
532
                    $sth->execute([$feed]);
533
534
                    $favicon_color = calculate_avg_color($favicon_file);
535
536
                    $favicon_colorstring = ",favicon_avg_color = ".$pdo->quote($favicon_color);
537
538
                } else if ($favicon_avg_color == 'fail') {
539
                    Debug::log("floicon failed on this file, not trying to recalculate avg color", Debug::$LOG_VERBOSE);
540
                }
541
542
                $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
543
					$favicon_colorstring WHERE id = ?");
544
                $sth->execute([$feed]);
545
            }
546
547
            Debug::log("loading filters & labels...", Debug::$LOG_VERBOSE);
548
549
            $filters = RSSUtils::load_filters($feed, $owner_uid);
550
551
            if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
552
                print_r($filters);
553
            }
554
555
            Debug::log("".count($filters)." filters loaded.", Debug::$LOG_VERBOSE);
556
557
            $items = $rss->get_items();
558
559
            if (!is_array($items)) {
560
                Debug::log("no articles found.", Debug::$LOG_VERBOSE);
561
562
                $sth = $pdo->prepare("UPDATE ttrss_feeds
563
					SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
564
                $sth->execute([$feed]);
565
566
                return true; // no articles
567
            }
568
569
            Debug::log("processing articles...", Debug::$LOG_VERBOSE);
570
571
            $tstart = time();
572
573
            foreach ($items as $item) {
574
                $pdo->beginTransaction();
575
576
                if (Debug::get_loglevel() >= 3) {
577
                    print_r($item);
578
                }
579
580
                if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
581
                    Debug::log("looks like there's too many articles to process at once, breaking out", Debug::$LOG_VERBOSE);
582
                    $pdo->commit();
583
                    break;
584
                }
585
586
                $entry_guid = strip_tags($item->get_id());
587
                if (!$entry_guid) {
588
                    $entry_guid = strip_tags($item->get_link());
589
                }
590
                if (!$entry_guid) {
591
                    $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
592
                }
593
594
                if (!$entry_guid) {
595
                    $pdo->commit();
596
                    continue;
597
                }
598
599
                $entry_guid = "$owner_uid,$entry_guid";
600
601
                $entry_guid_hashed = 'SHA1:'.sha1($entry_guid);
602
603
                Debug::log("guid $entry_guid / $entry_guid_hashed", Debug::$LOG_VERBOSE);
604
605
                $entry_timestamp = (int) $item->get_date();
606
607
                Debug::log("orig date: ".$item->get_date(), Debug::$LOG_VERBOSE);
608
609
                $entry_title = strip_tags($item->get_title());
610
611
                $entry_link = rewrite_relative_url($site_url, clean($item->get_link()));
612
613
                $entry_language = mb_substr(trim($item->get_language()), 0, 2);
614
615
                Debug::log("title $entry_title", Debug::$LOG_VERBOSE);
616
                Debug::log("link $entry_link", Debug::$LOG_VERBOSE);
617
                Debug::log("language $entry_language", Debug::$LOG_VERBOSE);
618
619
                if (!$entry_title) {
620
                    $entry_title = date("Y-m-d H:i:s", $entry_timestamp);
621
                }
622
                ;
623
624
                $entry_content = $item->get_content();
625
                if (!$entry_content) {
626
                    $entry_content = $item->get_description();
627
                }
628
629
                if (Debug::get_loglevel() >= 3) {
630
                    print "content: ";
631
                    print htmlspecialchars($entry_content);
632
                    print "\n";
633
                }
634
635
                $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
636
                $num_comments = (int) $item->get_comments_count();
637
638
                $entry_author = strip_tags($item->get_author());
639
                $entry_guid = mb_substr($entry_guid, 0, 245);
640
641
                Debug::log("author $entry_author", Debug::$LOG_VERBOSE);
642
                Debug::log("looking for tags...", Debug::$LOG_VERBOSE);
643
644
                $entry_tags = $item->get_categories();
645
                Debug::log("tags found: ".join(", ", $entry_tags), Debug::$LOG_VERBOSE);
646
647
                Debug::log("done collecting data.", Debug::$LOG_VERBOSE);
648
649
                $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
650
					WHERE guid = ? OR guid = ?");
651
                $sth->execute([$entry_guid, $entry_guid_hashed]);
652
653
                if ($row = $sth->fetch()) {
654
                    $base_entry_id = $row["id"];
655
                    $entry_stored_hash = $row["content_hash"];
656
                    $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
657
658
                    $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
659
                    $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
660
                } else {
661
                    $base_entry_id = false;
662
                    $entry_stored_hash = "";
663
                    $article_labels = array();
664
                }
665
666
                $article = array("owner_uid" => $owner_uid, // read only
667
                    "guid" => $entry_guid, // read only
668
                    "guid_hashed" => $entry_guid_hashed, // read only
669
                    "title" => $entry_title,
670
                    "content" => $entry_content,
671
                    "link" => $entry_link,
672
                    "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
673
                    "tags" => $entry_tags,
674
                    "author" => $entry_author,
675
                    "force_catchup" => false, // ugly hack for the time being
676
                    "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
677
                    "language" => $entry_language,
678
                    "timestamp" => $entry_timestamp,
679
                    "num_comments" => $num_comments,
680
                    "feed" => array("id" => $feed,
681
                        "fetch_url" => $fetch_url,
682
                        "site_url" => $site_url,
683
                        "cache_images" => $cache_images)
684
                );
685
686
                $entry_plugin_data = "";
687
                $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
688
689
                Debug::log("article hash: $entry_current_hash [stored=$entry_stored_hash]", Debug::$LOG_VERBOSE);
690
691
                if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
692
                    Debug::log("stored article seems up to date [IID: $base_entry_id], updating timestamp only", Debug::$LOG_VERBOSE);
693
694
                    // we keep encountering the entry in feeds, so we need to
695
                    // update date_updated column so that we don't get horrible
696
                    // dupes when the entry gets purged and reinserted again e.g.
697
                    // in the case of SLOW SLOW OMG SLOW updating feeds
698
699
                    $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
700
						WHERE id = ?");
701
                    $sth->execute([$base_entry_id]);
702
703
                    $pdo->commit();
704
                    continue;
705
                }
706
707
                Debug::log("hash differs, applying plugin filters:", Debug::$LOG_VERBOSE);
708
709
                foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
710
                    Debug::log("... ".get_class($plugin), Debug::$LOG_VERBOSE);
711
712
                    $start = microtime(true);
713
                    $article = $plugin->hook_article_filter($article);
714
715
                    Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE);
716
717
                    $entry_plugin_data .= mb_strtolower(get_class($plugin)).",";
718
                }
719
720
                if (Debug::get_loglevel() >= 3) {
721
                    print "processed content: ";
722
                    print htmlspecialchars($article["content"]);
723
                    print "\n";
724
                }
725
726
                Debug::log("plugin data: $entry_plugin_data", Debug::$LOG_VERBOSE);
727
728
                // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
729
                if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
730
                    foreach ($article as $k => $v) {
731
                        // i guess we'll have to take the risk of 4byte unicode labels & tags here
732
                        if (is_string($article[$k])) {
733
                            $article[$k] = RSSUtils::strip_utf8mb4($v);
734
                        }
735
                    }
736
                }
737
738
                /* Collect article tags here so we could filter by them: */
739
740
                $matched_rules = [];
741
                $matched_filters = [];
742
743
                $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
744
                    $article["content"], $article["link"], $article["author"],
745
                    $article["tags"], $matched_rules, $matched_filters);
746
747
                // $article_filters should be renamed to something like $filter_actions; actual filter objects are in $matched_filters
748
                foreach ($pluginhost->get_hooks(PluginHost::HOOK_FILTER_TRIGGERED) as $plugin) {
749
                    $plugin->hook_filter_triggered($feed, $owner_uid, $article, $matched_filters, $matched_rules, $article_filters);
750
                }
751
752
                $matched_filter_ids = array_map(function($f) { return $f['id']; }, $matched_filters);
753
754
                if (count($matched_filter_ids) > 0) {
755
                    $filter_ids_qmarks = arr_qmarks($matched_filter_ids);
756
757
                    $fsth = $pdo->prepare("UPDATE ttrss_filters2 SET last_triggered = NOW() WHERE
758
							   id IN ($filter_ids_qmarks) AND owner_uid = ?");
759
760
                    $fsth->execute(array_merge($matched_filter_ids, [$owner_uid]));
761
                }
762
763
                if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
764
                    Debug::log("matched filters: ", Debug::$LOG_VERBOSE);
765
766
                    if (count($matched_filters != 0)) {
767
                        print_r($matched_filters);
768
                    }
769
770
                    Debug::log("matched filter rules: ", Debug::$LOG_VERBOSE);
771
772
                    if (count($matched_rules) != 0) {
773
                        print_r($matched_rules);
774
                    }
775
776
                    Debug::log("filter actions: ", Debug::$LOG_VERBOSE);
777
778
                    if (count($article_filters) != 0) {
779
                        print_r($article_filters);
780
                    }
781
                }
782
783
                $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
784
                $plugin_filter_actions = $pluginhost->get_filter_actions();
785
786
                if (count($plugin_filter_names) > 0) {
787
                    Debug::log("applying plugin filter actions...", Debug::$LOG_VERBOSE);
788
789
                    foreach ($plugin_filter_names as $pfn) {
790
                        list($pfclass, $pfaction) = explode(":", $pfn["param"]);
791
792
                        if (isset($plugin_filter_actions[$pfclass])) {
793
                            $plugin = $pluginhost->get_plugin($pfclass);
794
795
                            Debug::log("... $pfclass: $pfaction", Debug::$LOG_VERBOSE);
796
797
                            if ($plugin) {
798
                                $start = microtime(true);
799
                                $article = $plugin->hook_article_filter_action($article, $pfaction);
800
801
                                Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE);
802
                            } else {
803
                                Debug::log("??? $pfclass: plugin object not found.", Debug::$LOG_VERBOSE);
804
                            }
805
                        } else {
806
                            Debug::log("??? $pfclass: filter plugin not registered.", Debug::$LOG_VERBOSE);
807
                        }
808
                    }
809
                }
810
811
                $entry_tags = $article["tags"];
812
                $entry_title = strip_tags($article["title"]);
813
                $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
814
                $entry_link = strip_tags($article["link"]);
815
                $entry_content = $article["content"]; // escaped below
816
                $entry_force_catchup = $article["force_catchup"];
817
                $article_labels = $article["labels"];
818
                $entry_score_modifier = (int) $article["score_modifier"];
819
                $entry_language = $article["language"];
820
                $entry_timestamp = $article["timestamp"];
821
                $num_comments = $article["num_comments"];
822
823
                if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
824
                    $entry_timestamp = time();
825
                }
826
827
                $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
828
829
                Debug::log("date $entry_timestamp [$entry_timestamp_fmt]", Debug::$LOG_VERBOSE);
830
                Debug::log("num_comments: $num_comments", Debug::$LOG_VERBOSE);
831
832
                if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
833
                    Debug::log("article labels:", Debug::$LOG_VERBOSE);
834
835
                    if (count($article_labels) != 0) {
836
                        print_r($article_labels);
837
                    }
838
                }
839
840
                Debug::log("force catchup: $entry_force_catchup", Debug::$LOG_VERBOSE);
841
842
                if ($cache_images) {
843
                                    RSSUtils::cache_media($entry_content, $site_url);
844
                }
845
846
                $csth = $pdo->prepare("SELECT id FROM ttrss_entries
847
					WHERE guid = ? OR guid = ?");
848
                $csth->execute([$entry_guid, $entry_guid_hashed]);
849
850
                if (!$row = $csth->fetch()) {
851
852
                    Debug::log("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", Debug::$LOG_VERBOSE);
853
854
                    // base post entry does not exist, create it
855
856
                    $usth = $pdo->prepare(
857
                        "INSERT INTO ttrss_entries
858
							(title,
859
							guid,
860
							link,
861
							updated,
862
							content,
863
							content_hash,
864
							no_orig_date,
865
							date_updated,
866
							date_entered,
867
							comments,
868
							num_comments,
869
							plugin_data,
870
							lang,
871
							author)
872
						VALUES
873
							(?, ?, ?, ?, ?, ?,
874
							false,
875
							NOW(),
876
							?, ?, ?, ?,	?, ?)");
877
878
                        $usth->execute([$entry_title,
879
                            $entry_guid_hashed,
880
                            $entry_link,
881
                            $entry_timestamp_fmt,
882
                            "$entry_content",
883
                            $entry_current_hash,
884
                            $date_feed_processed,
885
                            $entry_comments,
886
                            (int) $num_comments,
887
                            $entry_plugin_data,
888
                            "$entry_language",
889
                            "$entry_author"]);
890
891
                }
892
893
                $csth->execute([$entry_guid, $entry_guid_hashed]);
894
895
                $entry_ref_id = 0;
896
                $entry_int_id = 0;
897
898
                if ($row = $csth->fetch()) {
899
900
                    Debug::log("base guid found, checking for user record", Debug::$LOG_VERBOSE);
901
902
                    $ref_id = $row['id'];
903
                    $entry_ref_id = $ref_id;
904
905
                    if (RSSUtils::find_article_filter($article_filters, "filter")) {
906
                        Debug::log("article is filtered out, nothing to do.", Debug::$LOG_VERBOSE);
907
                        $pdo->commit();
908
                        continue;
909
                    }
910
911
                    $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
912
913
                    Debug::log("initial score: $score [including plugin modifier: $entry_score_modifier]", Debug::$LOG_VERBOSE);
914
915
                    // check for user post link to main table
916
917
                    $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
918
							ref_id = ? AND owner_uid = ?");
919
                    $sth->execute([$ref_id, $owner_uid]);
920
921
                    // okay it doesn't exist - create user entry
922
                    if ($row = $sth->fetch()) {
923
                        $entry_ref_id = $row["ref_id"];
924
                        $entry_int_id = $row["int_id"];
925
926
                        Debug::log("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", Debug::$LOG_VERBOSE);
927
                    } else {
928
929
                        Debug::log("user record not found, creating...", Debug::$LOG_VERBOSE);
930
931
                        if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
932
                            $unread = 1;
933
                            $last_read_qpart = null;
934
                        } else {
935
                            $unread = 0;
936
                            $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
937
                        }
938
939
                        if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
940
                            $marked = 1;
941
                        } else {
942
                            $marked = 0;
943
                        }
944
945
                        if (RSSUtils::find_article_filter($article_filters, 'publish')) {
946
                            $published = 1;
947
                        } else {
948
                            $published = 0;
949
                        }
950
951
                        $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
952
                        $last_published = ($published == 1) ? 'NOW()' : 'NULL';
953
954
                        $sth = $pdo->prepare(
955
                            "INSERT INTO ttrss_user_entries
956
								(ref_id, owner_uid, feed_id, unread, last_read, marked,
957
								published, score, tag_cache, label_cache, uuid,
958
								last_marked, last_published)
959
							VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
960
961
                        $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
962
                            $published, $score]);
963
964
                        $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
965
								ref_id = ? AND owner_uid = ? AND
966
								feed_id = ? LIMIT 1");
967
968
                        $sth->execute([$ref_id, $owner_uid, $feed]);
969
970
                        if ($row = $sth->fetch()) {
971
                                                    $entry_int_id = $row['int_id'];
972
                        }
973
                    }
974
975
                    Debug::log("resulting RID: $entry_ref_id, IID: $entry_int_id", Debug::$LOG_VERBOSE);
976
977
                    if (DB_TYPE == "pgsql") {
978
                                            $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
979
                    } else {
980
                                            $tsvector_qpart = "";
981
                    }
982
983
                    $sth = $pdo->prepare("UPDATE ttrss_entries
984
						SET title = :title,
985
							$tsvector_qpart
986
							content = :content,
987
							content_hash = :content_hash,
988
							updated = :updated,
989
							date_updated = NOW(),
990
							num_comments = :num_comments,
991
							plugin_data = :plugin_data,
992
							author = :author,
993
							lang = :lang
994
						WHERE id = :id");
995
996
                    $params = [":title" => $entry_title,
997
                        ":content" => "$entry_content",
998
                        ":content_hash" => $entry_current_hash,
999
                        ":updated" => $entry_timestamp_fmt,
1000
                        ":num_comments" => (int) $num_comments,
1001
                        ":plugin_data" => $entry_plugin_data,
1002
                        ":author" => "$entry_author",
1003
                        ":lang" => $entry_language,
1004
                        ":id" => $ref_id];
1005
1006
                    if (DB_TYPE == "pgsql") {
1007
                        $params[":ts_lang"] = $feed_language;
1008
                        $params[":ts_content"] = mb_substr(strip_tags($entry_title." ".$entry_content), 0, 900000);
1009
                    }
1010
1011
                    $sth->execute($params);
1012
1013
                    // update aux data
1014
                    $sth = $pdo->prepare("UPDATE ttrss_user_entries
1015
							SET score = ? WHERE ref_id = ?");
1016
                    $sth->execute([$score, $ref_id]);
1017
1018
                    if ($mark_unread_on_update &&
1019
                        !$entry_force_catchup &&
1020
                        !RSSUtils::find_article_filter($article_filters, 'catchup')) {
1021
1022
                        Debug::log("article updated, marking unread as requested.", Debug::$LOG_VERBOSE);
1023
1024
                        $sth = $pdo->prepare("UPDATE ttrss_user_entries
1025
							SET last_read = null, unread = true WHERE ref_id = ?");
1026
                        $sth->execute([$ref_id]);
1027
                    } else {
1028
                        Debug::log("article updated, but we're forbidden to mark it unread.", Debug::$LOG_VERBOSE);
1029
                    }
1030
                }
1031
1032
                Debug::log("assigning labels [other]...", Debug::$LOG_VERBOSE);
1033
1034
                foreach ($article_labels as $label) {
1035
                    Labels::add_article($entry_ref_id, $label[1], $owner_uid);
1036
                }
1037
1038
                Debug::log("assigning labels [filters]...", Debug::$LOG_VERBOSE);
1039
1040
                RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
1041
                    $owner_uid, $article_labels);
1042
1043
                Debug::log("looking for enclosures...", Debug::$LOG_VERBOSE);
1044
1045
                // enclosures
1046
1047
                $enclosures = array();
1048
1049
                $encs = $item->get_enclosures();
1050
1051
                if (is_array($encs)) {
1052
                    foreach ($encs as $e) {
1053
                        $e_item = array(
1054
                            rewrite_relative_url($site_url, $e->link),
1055
                            $e->type, $e->length, $e->title, $e->width, $e->height);
1056
1057
                        // Yet another episode of "mysql utf8_general_ci is gimped"
1058
                        if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
1059
                            for ($i = 0; $i < count($e_item); $i++) {
1060
                                if (is_string($e_item[$i])) {
1061
                                    $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1062
                                }
1063
                            }
1064
                        }
1065
1066
                        array_push($enclosures, $e_item);
1067
                    }
1068
                }
1069
1070
                if ($cache_images) {
1071
                                    RSSUtils::cache_enclosures($enclosures, $site_url);
1072
                }
1073
1074
                if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
1075
                    Debug::log("article enclosures:", Debug::$LOG_VERBOSE);
1076
                    print_r($enclosures);
1077
                }
1078
1079
                $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1080
						WHERE content_url = ? AND content_type = ? AND post_id = ?");
1081
1082
                $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1083
							(content_url, content_type, title, duration, post_id, width, height) VALUES
1084
							(?, ?, ?, ?, ?, ?, ?)");
1085
1086
                foreach ($enclosures as $enc) {
1087
                    $enc_url = $enc[0];
1088
                    $enc_type = $enc[1];
1089
                    $enc_dur = (int) $enc[2];
1090
                    $enc_title = $enc[3];
1091
                    $enc_width = intval($enc[4]);
1092
                    $enc_height = intval($enc[5]);
1093
1094
                    $esth->execute([$enc_url, $enc_type, $entry_ref_id]);
1095
1096
                    if (!$esth->fetch()) {
1097
                        $usth->execute([$enc_url, $enc_type, (string) $enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
1098
                    }
1099
                }
1100
1101
                // check for manual tags (we have to do it here since they're loaded from filters)
1102
1103
                foreach ($article_filters as $f) {
1104
                    if ($f["type"] == "tag") {
1105
1106
                        $manual_tags = trim_array(explode(",", $f["param"]));
1107
1108
                        foreach ($manual_tags as $tag) {
1109
                            array_push($entry_tags, $tag);
1110
                        }
1111
                    }
1112
                }
1113
1114
                // Skip boring tags
1115
1116
                $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
1117
                    'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1118
1119
                $filtered_tags = array();
1120
                $tags_to_cache = array();
1121
1122
                foreach ($entry_tags as $tag) {
1123
                    if (array_search($tag, $boring_tags) === false) {
1124
                        array_push($filtered_tags, $tag);
1125
                    }
1126
                }
1127
1128
                $filtered_tags = array_unique($filtered_tags);
1129
1130
                if (Debug::get_loglevel() >= Debug::$LOG_VERBOSE) {
1131
                    Debug::log("filtered tags: ".implode(", ", $filtered_tags), Debug::$LOG_VERBOSE);
1132
1133
                }
1134
1135
                // Save article tags in the database
1136
1137
                if (count($filtered_tags) > 0) {
1138
1139
                    $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1140
							WHERE tag_name = ? AND post_int_id = ? AND
1141
							owner_uid = ? LIMIT 1");
1142
1143
                    $usth = $pdo->prepare("INSERT INTO ttrss_tags
1144
									(owner_uid,tag_name,post_int_id)
1145
									VALUES (?, ?, ?)");
1146
1147
                    $filtered_tags = FeedItem_Common::normalize_categories($filtered_tags);
1148
1149
                    foreach ($filtered_tags as $tag) {
1150
                        $tsth->execute([$tag, $entry_int_id, $owner_uid]);
1151
1152
                        if (!$tsth->fetch()) {
1153
                            $usth->execute([$owner_uid, $tag, $entry_int_id]);
1154
                        }
1155
1156
                        array_push($tags_to_cache, $tag);
1157
                    }
1158
1159
                    /* update the cache */
1160
                    $tags_str = join(",", $tags_to_cache);
1161
1162
                    $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1163
						SET tag_cache = ? WHERE ref_id = ?
1164
						AND owner_uid = ?");
1165
                    $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
1166
                }
1167
1168
                Debug::log("article processed", Debug::$LOG_VERBOSE);
1169
1170
                $pdo->commit();
1171
            }
1172
1173
            Debug::log("purging feed...", Debug::$LOG_VERBOSE);
1174
1175
            Feeds::purge_feed($feed, 0);
1176
1177
            $sth = $pdo->prepare("UPDATE ttrss_feeds
1178
				SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1179
            $sth->execute([$feed]);
1180
1181
        } else {
1182
1183
            $error_msg = mb_substr($rss->error(), 0, 245);
1184
1185
            Debug::log("fetch error: $error_msg", Debug::$LOG_VERBOSE);
1186
1187
            if (count($rss->errors()) > 1) {
1188
                foreach ($rss->errors() as $error) {
1189
                    Debug::log("+ $error", Debug::$LOG_VERBOSE);
1190
                }
1191
            }
1192
1193
            $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1194
				last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1195
            $sth->execute([$error_msg, $feed]);
1196
1197
            unset($rss);
1198
1199
            Debug::log("update failed.", Debug::$LOG_VERBOSE);
1200
            return false;
1201
        }
1202
1203
        Debug::log("update done.", Debug::$LOG_VERBOSE);
1204
1205
        return true;
1206
    }
1207
1208
    public static function cache_enclosures($enclosures, $site_url) {
1209
        $cache = new DiskCache("images");
1210
1211
        if ($cache->isWritable()) {
1212
            foreach ($enclosures as $enc) {
1213
1214
                if (preg_match("/(image|audio|video)/", $enc[1])) {
1215
                    $src = rewrite_relative_url($site_url, $enc[0]);
1216
1217
                    $local_filename = sha1($src);
1218
1219
                    Debug::log("cache_enclosures: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
1220
1221
                    if (!$cache->exists($local_filename)) {
1222
1223
                        global $fetch_last_error_code;
1224
                        global $fetch_last_error;
1225
1226
                        $file_content = fetch_file_contents(array("url" => $src,
1227
                            "http_referrer" => $src,
1228
                            "max_size" => MAX_CACHE_FILE_SIZE));
1229
1230
                        if ($file_content) {
1231
                            $cache->put($local_filename, $file_content);
1232
                        } else {
1233
                            Debug::log("cache_enclosures: failed with $fetch_last_error_code: $fetch_last_error");
1234
                        }
1235
                    } else if (is_writable($local_filename)) {
1236
                        $cache->touch($local_filename);
1237
                    }
1238
                }
1239
            }
1240
        }
1241
    }
1242
1243
    public static function cache_media($html, $site_url) {
1244
        $cache = new DiskCache("images");
1245
1246
        if ($cache->isWritable()) {
1247
            $doc = new DOMDocument();
1248
            if ($doc->loadHTML($html)) {
1249
                $xpath = new DOMXPath($doc);
1250
1251
                $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
1252
1253
                foreach ($entries as $entry) {
1254
                    if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
1255
                        $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1256
1257
                        $local_filename = sha1($src);
1258
1259
                        Debug::log("cache_media: checking $src", Debug::$LOG_VERBOSE);
1260
1261
                        if (!$cache->exists($local_filename)) {
1262
                            Debug::log("cache_media: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
1263
1264
                            global $fetch_last_error_code;
1265
                            global $fetch_last_error;
1266
1267
                            $file_content = fetch_file_contents(array("url" => $src,
1268
                                "http_referrer" => $src,
1269
                                "max_size" => MAX_CACHE_FILE_SIZE));
1270
1271
                            if ($file_content) {
1272
                                $cache->put($local_filename, $file_content);
1273
                            } else {
1274
                                Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
1275
                            }
1276
                        } else if ($cache->isWritable($local_filename)) {
1277
                            $cache->touch($local_filename);
1278
                        }
1279
                    }
1280
                }
1281
            }
1282
        }
1283
    }
1284
1285
    public static function expire_error_log() {
1286
        Debug::log("Removing old error log entries...");
1287
1288
        $pdo = Db::pdo();
1289
1290
        if (DB_TYPE == "pgsql") {
1291
            $pdo->query("DELETE FROM ttrss_error_log
1292
				WHERE created_at < NOW() - INTERVAL '7 days'");
1293
        } else {
1294
            $pdo->query("DELETE FROM ttrss_error_log
1295
				WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1296
        }
1297
    }
1298
1299
    public static function expire_feed_archive() {
1300
        Debug::log("Removing old archived feeds...");
1301
1302
        $pdo = Db::pdo();
1303
1304
        if (DB_TYPE == "pgsql") {
1305
            $pdo->query("DELETE FROM ttrss_archived_feeds
1306
				WHERE created < NOW() - INTERVAL '1 month'");
1307
        } else {
1308
            $pdo->query("DELETE FROM ttrss_archived_feeds
1309
				WHERE created < DATE_SUB(NOW(), INTERVAL 1 MONTH)");
1310
        }
1311
    }
1312
1313
    public static function expire_lock_files() {
1314
        Debug::log("Removing old lock files...", Debug::$LOG_VERBOSE);
1315
1316
        $num_deleted = 0;
1317
1318
        if (is_writable(LOCK_DIRECTORY)) {
1319
            $files = glob(LOCK_DIRECTORY."/*.lock");
1320
1321
            if ($files) {
1322
                foreach ($files as $file) {
1323
                    if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400 * 2) {
1324
                        unlink($file);
1325
                        ++$num_deleted;
1326
                    }
1327
                }
1328
            }
1329
        }
1330
1331
        Debug::log("removed $num_deleted old lock files.");
1332
    }
1333
1334
    /**
1335
     * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1336
     * Returns the url query as associative array
1337
     *
1338
     * @param    string    query
1339
     * @return    array    params
1340
     */
1341
    public static function convertUrlQuery($query) {
1342
        $queryParts = explode('&', $query);
1343
1344
        $params = array();
1345
1346
        foreach ($queryParts as $param) {
1347
            $item = explode('=', $param);
1348
            $params[$item[0]] = $item[1];
1349
        }
1350
1351
        return $params;
1352
    }
1353
1354
    public static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false, &$matched_filters = false) {
1355
        $matches = array();
1356
1357
        foreach ($filters as $filter) {
1358
            $match_any_rule = $filter["match_any_rule"];
1359
            $inverse = $filter["inverse"];
1360
            $filter_match = false;
1361
1362
            foreach ($filter["rules"] as $rule) {
1363
                $match = false;
1364
                $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
1365
                $rule_inverse = $rule["inverse"];
1366
1367
                if (!$reg_exp) {
1368
                                    continue;
1369
                }
1370
1371
                switch ($rule["type"]) {
1372
                case "title":
1373
                    $match = @preg_match("/$reg_exp/iu", $title);
1374
                    break;
1375
                case "content":
1376
                    // we don't need to deal with multiline regexps
1377
                    $content = preg_replace("/[\r\n\t]/", "", $content);
1378
1379
                    $match = @preg_match("/$reg_exp/iu", $content);
1380
                    break;
1381
                case "both":
1382
                    // we don't need to deal with multiline regexps
1383
                    $content = preg_replace("/[\r\n\t]/", "", $content);
1384
1385
                    $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1386
                    break;
1387
                case "link":
1388
                    $match = @preg_match("/$reg_exp/iu", $link);
1389
                    break;
1390
                case "author":
1391
                    $match = @preg_match("/$reg_exp/iu", $author);
1392
                    break;
1393
                case "tag":
1394
                    foreach ($tags as $tag) {
1395
                        if (@preg_match("/$reg_exp/iu", $tag)) {
1396
                            $match = true;
1397
                            break;
1398
                        }
1399
                    }
1400
                    break;
1401
                }
1402
1403
                if ($rule_inverse) {
1404
                    $match = !$match;
1405
                }
1406
1407
                if ($match_any_rule) {
1408
                    if ($match) {
1409
                        $filter_match = true;
1410
                        break;
1411
                    }
1412
                } else {
1413
                    $filter_match = $match;
1414
                    if (!$match) {
1415
                        break;
1416
                    }
1417
                }
1418
            }
1419
1420
            if ($inverse) {
1421
                $filter_match = !$filter_match;
1422
            }
1423
1424
            if ($filter_match) {
1425
                if (is_array($matched_rules)) {
1426
                    array_push($matched_rules, $rule);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $rule does not seem to be defined for all execution paths leading up to this point.
Loading history...
1427
                }
1428
                if (is_array($matched_filters)) {
1429
                    array_push($matched_filters, $filter);
1430
                }
1431
1432
                foreach ($filter["actions"] as $action) {
1433
                    array_push($matches, $action);
1434
1435
                    // if Stop action encountered, perform no further processing
1436
                    if (isset($action["type"]) && $action["type"] == "stop") {
1437
                        return $matches;
1438
                    }
1439
                }
1440
            }
1441
        }
1442
1443
        return $matches;
1444
    }
1445
1446
    public static function find_article_filter($filters, $filter_name) {
1447
        foreach ($filters as $f) {
1448
            if ($f["type"] == $filter_name) {
1449
                return $f;
1450
            };
1451
        }
1452
        return false;
1453
    }
1454
1455
    public static function find_article_filters($filters, $filter_name) {
1456
        $results = array();
1457
1458
        foreach ($filters as $f) {
1459
            if ($f["type"] == $filter_name) {
1460
                array_push($results, $f);
1461
            };
1462
        }
1463
        return $results;
1464
    }
1465
1466
    public static function calculate_article_score($filters) {
1467
        $score = 0;
1468
1469
        foreach ($filters as $f) {
1470
            if ($f["type"] == "score") {
1471
                $score += $f["param"];
1472
            };
1473
        }
1474
        return $score;
1475
    }
1476
1477
    public static function labels_contains_caption($labels, $caption) {
1478
        foreach ($labels as $label) {
1479
            if ($label[1] == $caption) {
1480
                return true;
1481
            }
1482
        }
1483
1484
        return false;
1485
    }
1486
1487
    public static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
1488
        foreach ($filters as $f) {
1489
            if ($f["type"] == "label") {
1490
                if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
1491
                    Labels::add_article($id, $f["param"], $owner_uid);
1492
                }
1493
            }
1494
        }
1495
    }
1496
1497
    public static function make_guid_from_title($title) {
1498
        return preg_replace("/[ \"\',.:;]/", "-",
1499
            mb_strtolower(strip_tags($title), 'utf-8'));
1500
    }
1501
1502
    public static function cleanup_counters_cache() {
1503
        $pdo = Db::pdo();
1504
1505
        $res = $pdo->query("DELETE FROM ttrss_counters_cache
1506
			WHERE feed_id > 0 AND
1507
			(SELECT COUNT(id) FROM ttrss_feeds WHERE
1508
				id = feed_id AND
1509
				ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
1510
1511
        $frows = $res->rowCount();
1512
1513
        $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
1514
			WHERE feed_id > 0 AND
1515
			(SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1516
				id = feed_id AND
1517
				ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
1518
1519
        $crows = $res->rowCount();
1520
1521
        Debug::log("removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
1522
    }
1523
1524
    public static function housekeeping_user($owner_uid) {
1525
        $tmph = new PluginHost();
1526
1527
        load_user_plugins($owner_uid, $tmph);
1528
1529
        $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1530
    }
1531
1532
    public static function housekeeping_common() {
1533
        DiskCache::expire();
1534
1535
        RSSUtils::expire_lock_files();
1536
        RSSUtils::expire_error_log();
1537
        RSSUtils::expire_feed_archive();
1538
        RSSUtils::cleanup_feed_browser();
1539
1540
        Article::purge_orphans();
1541
        RSSUtils::cleanup_counters_cache();
1542
1543
        PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1544
    }
1545
1546
    public static function check_feed_favicon($site_url, $feed) {
1547
        #		print "FAVICON [$site_url]: $favicon_url\n";
1548
1549
        $icon_file = ICONS_DIR."/$feed.ico";
1550
1551
        if (!file_exists($icon_file)) {
1552
            $favicon_url = RSSUtils::get_favicon_url($site_url);
1553
1554
            if ($favicon_url) {
1555
                // Limiting to "image" type misses those served with text/plain
1556
                $contents = fetch_file_contents($favicon_url); // , "image");
1557
1558
                if ($contents) {
1559
                    // Crude image type matching.
1560
                    // Patterns gleaned from the file(1) source code.
1561
                    if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1562
                        // 0       string  \000\000\001\000        MS Windows icon resource
1563
                        //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1564
                    } elseif (preg_match('/^GIF8/', $contents)) {
1565
                        // 0       string          GIF8            GIF image data
1566
                        //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1567
                    } elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1568
                        // 0       string          \x89PNG\x0d\x0a\x1a\x0a         PNG image data
1569
                        //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1570
                    } elseif (preg_match('/^\xff\xd8/', $contents)) {
1571
                        // 0       beshort         0xffd8          JPEG image data
1572
                        //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1573
                    } elseif (preg_match('/^BM/', $contents)) {
1574
                        // 0	string		BM	PC bitmap (OS2, Windows BMP files)
1575
                        //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1576
                    } else {
1577
                        //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1578
                        $contents = "";
1579
                    }
1580
                }
1581
1582
                if ($contents) {
1583
                    $fp = @fopen($icon_file, "w");
1584
1585
                    if ($fp) {
1586
                        fwrite($fp, $contents);
1587
                        fclose($fp);
1588
                        chmod($icon_file, 0644);
1589
                    }
1590
                }
1591
            }
1592
            return $icon_file;
1593
        }
1594
    }
1595
1596
    public static function is_gzipped($feed_data) {
1597
        return strpos(substr($feed_data, 0, 3),
1598
                "\x1f"."\x8b"."\x08", 0) === 0;
1599
    }
1600
1601
    public static function load_filters($feed_id, $owner_uid) {
1602
        $filters = array();
1603
1604
        $feed_id = (int) $feed_id;
1605
        $cat_id = (int) Feeds::getFeedCategory($feed_id);
1606
1607
        if ($cat_id == 0) {
1608
                    $null_cat_qpart = "cat_id IS NULL OR";
1609
        } else {
1610
                    $null_cat_qpart = "";
1611
        }
1612
1613
        $pdo = Db::pdo();
1614
1615
        $sth = $pdo->prepare("SELECT * FROM ttrss_filters2 WHERE
1616
				owner_uid = ? AND enabled = true ORDER BY order_id, title");
1617
        $sth->execute([$owner_uid]);
1618
1619
        $check_cats = array_merge(
1620
            Feeds::getParentCategories($cat_id, $owner_uid),
1621
            [$cat_id]);
1622
1623
        $check_cats_str = join(",", $check_cats);
1624
        $check_cats_fullids = array_map(function($a) { return "CAT:$a"; }, $check_cats);
1625
1626
        while ($line = $sth->fetch()) {
1627
            $filter_id = $line["id"];
1628
1629
            $match_any_rule = sql_bool_to_bool($line["match_any_rule"]);
1630
1631
            $sth2 = $pdo->prepare("SELECT
1632
					r.reg_exp, r.inverse, r.feed_id, r.cat_id, r.cat_filter, r.match_on, t.name AS type_name
1633
					FROM ttrss_filters2_rules AS r,
1634
					ttrss_filter_types AS t
1635
					WHERE
1636
						(match_on IS NOT NULL OR
1637
						  (($null_cat_qpart (cat_id IS NULL AND cat_filter = false) OR cat_id IN ($check_cats_str)) AND
1638
						  (feed_id IS NULL OR feed_id = ?))) AND
1639
						filter_type = t.id AND filter_id = ?");
1640
            $sth2->execute([$feed_id, $filter_id]);
1641
1642
            $rules = array();
1643
            $actions = array();
1644
1645
            while ($rule_line = $sth2->fetch()) {
1646
                #				print_r($rule_line);
1647
1648
                if ($rule_line["match_on"]) {
1649
                    $match_on = json_decode($rule_line["match_on"], true);
1650
1651
                    if (in_array("0", $match_on) || in_array($feed_id, $match_on) || count(array_intersect($check_cats_fullids, $match_on)) > 0) {
1652
1653
                        $rule = array();
1654
                        $rule["reg_exp"] = $rule_line["reg_exp"];
1655
                        $rule["type"] = $rule_line["type_name"];
1656
                        $rule["inverse"] = sql_bool_to_bool($rule_line["inverse"]);
1657
1658
                        array_push($rules, $rule);
1659
                    } else if (!$match_any_rule) {
1660
                        // this filter contains a rule that doesn't match to this feed/category combination
1661
                        // thus filter has to be rejected
1662
1663
                        $rules = [];
1664
                        break;
1665
                    }
1666
1667
                } else {
1668
1669
                    $rule = array();
1670
                    $rule["reg_exp"] = $rule_line["reg_exp"];
1671
                    $rule["type"] = $rule_line["type_name"];
1672
                    $rule["inverse"] = sql_bool_to_bool($rule_line["inverse"]);
1673
1674
                    array_push($rules, $rule);
1675
                }
1676
            }
1677
1678
            if (count($rules) > 0) {
1679
                $sth2 = $pdo->prepare("SELECT a.action_param,t.name AS type_name
1680
						FROM ttrss_filters2_actions AS a,
1681
						ttrss_filter_actions AS t
1682
						WHERE
1683
							action_id = t.id AND filter_id = ?");
1684
                $sth2->execute([$filter_id]);
1685
1686
                while ($action_line = $sth2->fetch()) {
1687
                    #				print_r($action_line);
1688
1689
                    $action = array();
1690
                    $action["type"] = $action_line["type_name"];
1691
                    $action["param"] = $action_line["action_param"];
1692
1693
                    array_push($actions, $action);
1694
                }
1695
            }
1696
1697
            $filter = [];
1698
            $filter["id"] = $filter_id;
1699
            $filter["match_any_rule"] = sql_bool_to_bool($line["match_any_rule"]);
1700
            $filter["inverse"] = sql_bool_to_bool($line["inverse"]);
1701
            $filter["rules"] = $rules;
1702
            $filter["actions"] = $actions;
1703
1704
            if (count($rules) > 0 && count($actions) > 0) {
1705
                array_push($filters, $filter);
1706
            }
1707
        }
1708
1709
        return $filters;
1710
    }
1711
1712
    /**
1713
     * Try to determine the favicon URL for a feed.
1714
     * adapted from wordpress favicon plugin by Jeff Minard (http://thecodepro.com/)
1715
     * http://dev.wp-plugins.org/file/favatars/trunk/favatars.php
1716
     *
1717
     * @param string $url A feed or page URL
1718
     * @access public
1719
     * @return mixed The favicon URL, or false if none was found.
1720
     */
1721
    public static function get_favicon_url($url) {
1722
1723
        $favicon_url = false;
1724
1725
        if ($html = @fetch_file_contents($url)) {
1726
1727
            $doc = new DOMDocument();
1728
            if ($doc->loadHTML($html)) {
1729
                $xpath = new DOMXPath($doc);
1730
1731
                $base = $xpath->query('/html/head/base[@href]');
1732
                foreach ($base as $b) {
1733
                    $url = rewrite_relative_url($url, $b->getAttribute("href"));
1734
                    break;
1735
                }
1736
1737
                $entries = $xpath->query('/html/head/link[@rel="shortcut icon" or @rel="icon"]');
1738
                if (count($entries) > 0) {
1739
                    foreach ($entries as $entry) {
1740
                        $favicon_url = rewrite_relative_url($url, $entry->getAttribute("href"));
1741
                        break;
1742
                    }
1743
                }
1744
            }
1745
        }
1746
1747
        if (!$favicon_url) {
1748
                    $favicon_url = rewrite_relative_url($url, "/favicon.ico");
1749
        }
1750
1751
        return $favicon_url;
1752
    }
1753
1754
}
1755