| Total Complexity | 13 |
| Total Lines | 76 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | # -*- encoding: utf-8 -*- |
||
| 2 | """ |
||
| 3 | cron: * 6-22/2 * * * |
||
| 4 | new Env('RSS 订阅'); |
||
| 5 | """ |
||
| 6 | |||
| 7 | from datetime import datetime, timedelta |
||
| 8 | from time import mktime |
||
| 9 | |||
| 10 | import feedparser |
||
| 11 | |||
| 12 | from notify_mtr import send |
||
| 13 | from utils_models import History, Rss, db |
||
| 14 | |||
| 15 | |||
| 16 | class RssRobot: |
||
| 17 | def main(self): |
||
| 18 | self.remove_old_history() |
||
| 19 | |||
| 20 | rss_list = Rss.select() |
||
| 21 | msg = "" |
||
| 22 | post_url_list = [ |
||
| 23 | rss_history.url |
||
| 24 | for rss_history in History.select().where( |
||
| 25 | History.publish_at == datetime.today().strftime("%Y-%m-%d") |
||
| 26 | ) |
||
| 27 | ] |
||
| 28 | |||
| 29 | no = 0 |
||
| 30 | for rss in rss_list: |
||
| 31 | rss_history_list = [] |
||
| 32 | feed = feedparser.parse(rss.feed) |
||
| 33 | title = True |
||
| 34 | c_no = 1 |
||
| 35 | for entry in feed.entries: |
||
| 36 | pub_t = datetime.fromtimestamp(mktime(entry["published_parsed"])) |
||
| 37 | |||
| 38 | # 此网站单独处理 |
||
| 39 | if rss.url == "https://www.foreverblog.cn": |
||
| 40 | pub_t = pub_t.replace(year=datetime.utcnow().year) + timedelta(hours=8) |
||
| 41 | elif rss.url == "https://www.zhihu.com": |
||
| 42 | entry.link = entry.link.split("/answer")[0] |
||
| 43 | |||
| 44 | if ( |
||
| 45 | entry.link not in post_url_list |
||
| 46 | and (datetime.timestamp(datetime.utcnow()) - datetime.timestamp(pub_t)) |
||
| 47 | < rss.before * 86400 |
||
| 48 | ): |
||
| 49 | if title: |
||
| 50 | msg += f"\n--{rss.title}--\n" |
||
| 51 | title = False |
||
| 52 | msg = msg + f"{str(c_no).zfill(2)}.{entry.title}\n{entry.link}\n" |
||
| 53 | no += 1 |
||
| 54 | c_no += 1 |
||
| 55 | if no % 20 == 0: |
||
| 56 | send(f"RSS 订阅", msg) |
||
| 57 | msg = "" |
||
| 58 | title = False |
||
| 59 | rss_history_list.append(History(url=entry.link)) |
||
| 60 | with db.atomic(): |
||
| 61 | History.bulk_create(rss_history_list, batch_size=10) |
||
| 62 | |||
| 63 | if no % 20 != 0 and msg: |
||
| 64 | send(f"RSS 订阅", msg) |
||
| 65 | |||
| 66 | def remove_old_history(self): |
||
| 67 | # 只保留最近一周的记录 |
||
| 68 | week_date_range = datetime.now() + timedelta(days=-7) |
||
| 69 | History.delete().where( |
||
| 70 | History.publish_at < week_date_range.strftime("%Y-%m-%d") |
||
| 71 | ).execute() |
||
| 72 | |||
| 73 | |||
| 74 | if __name__ == "__main__": |
||
| 75 | res = RssRobot().main() |
||
| 76 |