|
1
|
|
|
# -*- encoding: utf-8 -*- |
|
2
|
|
|
""" |
|
3
|
|
|
cron: 22 6-22/2 * * * |
|
4
|
|
|
new Env('RSS 订阅'); |
|
5
|
|
|
""" |
|
6
|
|
|
|
|
7
|
|
|
from datetime import datetime, timedelta, timezone |
|
8
|
|
|
from time import mktime |
|
9
|
|
|
|
|
10
|
|
|
import feedparser |
|
11
|
|
|
|
|
12
|
|
|
from notify_mtr import send |
|
13
|
|
|
from utils_models import History, Rss, db |
|
14
|
|
|
|
|
15
|
|
|
|
|
16
|
|
|
class RssRobot: |
|
17
|
|
|
def main(self): |
|
18
|
|
|
self.remove_old_history() |
|
19
|
|
|
|
|
20
|
|
|
rss_list = Rss.select() |
|
21
|
|
|
post_url_list = [ |
|
22
|
|
|
rss_history.url |
|
23
|
|
|
for rss_history in History.select().where( |
|
24
|
|
|
History.publish_at == datetime.now().strftime("%Y-%m-%d") |
|
25
|
|
|
) |
|
26
|
|
|
] |
|
27
|
|
|
|
|
28
|
|
|
no = 0 |
|
29
|
|
|
msg = "" |
|
30
|
|
|
for rss in rss_list: |
|
31
|
|
|
rss_history_list = [] |
|
32
|
|
|
feed = feedparser.parse(rss.feed) |
|
33
|
|
|
title = True |
|
34
|
|
|
c_no = 1 |
|
35
|
|
|
for entry in feed.entries: |
|
36
|
|
|
pub_t = datetime.fromtimestamp(mktime(entry["published_parsed"])) |
|
37
|
|
|
|
|
38
|
|
|
# 此网站单独处理 |
|
39
|
|
|
if rss.url == "https://www.foreverblog.cn": |
|
40
|
|
|
pub_t = pub_t.replace( |
|
41
|
|
|
year=datetime.now(timezone.utc).year |
|
42
|
|
|
) + timedelta(hours=8) |
|
43
|
|
|
|
|
44
|
|
|
elif rss.url == "https://www.zhihu.com": |
|
45
|
|
|
entry.link = entry.link.split("/answer")[0] |
|
46
|
|
|
|
|
47
|
|
|
if ( |
|
48
|
|
|
entry.link not in post_url_list |
|
49
|
|
|
and ( |
|
50
|
|
|
datetime.timestamp(datetime.now(timezone.utc)) |
|
51
|
|
|
- datetime.timestamp(pub_t) |
|
52
|
|
|
) |
|
53
|
|
|
< rss.before * 86400 |
|
54
|
|
|
): |
|
55
|
|
|
if title: |
|
56
|
|
|
msg += f"<b>{rss.title.strip()}</b>\n" |
|
57
|
|
|
title = False |
|
58
|
|
|
msg = f'{msg}{c_no}. <a href="{entry.link}">{entry.title}</a>\n' |
|
59
|
|
|
no += 1 |
|
60
|
|
|
c_no += 1 |
|
61
|
|
|
if no % 20 == 0: |
|
62
|
|
|
send("RSS 订阅", msg) |
|
63
|
|
|
msg = "" |
|
64
|
|
|
title = False |
|
65
|
|
|
rss_history_list.append(History(url=entry.link)) |
|
66
|
|
|
with db.atomic(): |
|
67
|
|
|
History.bulk_create(rss_history_list, batch_size=10) |
|
68
|
|
|
|
|
69
|
|
|
if no % 20 != 0 and msg: |
|
70
|
|
|
send("RSS 订阅", msg) |
|
71
|
|
|
|
|
72
|
|
|
@staticmethod |
|
73
|
|
|
def remove_old_history(): |
|
74
|
|
|
# 只保留最近一周的记录 |
|
75
|
|
|
week_date_range = datetime.now() + timedelta(days=-7) |
|
76
|
|
|
History.delete().where( |
|
77
|
|
|
History.publish_at < week_date_range.strftime("%Y-%m-%d") |
|
78
|
|
|
).execute() |
|
79
|
|
|
|
|
80
|
|
|
|
|
81
|
|
|
if __name__ == "__main__": |
|
82
|
|
|
RssRobot().main() |
|
83
|
|
|
|