| Total Complexity | 10 |
| Total Lines | 72 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | # -*- encoding: utf-8 -*- |
||
| 2 | """ |
||
| 3 | cron: */15 6-22/2 * * * |
||
| 4 | new Env('RSS 订阅'); |
||
| 5 | """ |
||
| 6 | |||
| 7 | from datetime import datetime, timedelta, timezone |
||
| 8 | |||
| 9 | import feedparser |
||
| 10 | |||
| 11 | from notify_mtr import send |
||
| 12 | from utils_models import History, Rss, db |
||
| 13 | |||
| 14 | |||
| 15 | class RssRobot: |
||
| 16 | def main(self): |
||
| 17 | self.remove_old_history() |
||
| 18 | |||
| 19 | rss_list = Rss.select() |
||
| 20 | msg = "" |
||
| 21 | no = 1 |
||
| 22 | post_url_list = [ |
||
| 23 | rss_history.url |
||
| 24 | for rss_history in History.select().where( |
||
| 25 | History.publish_at == datetime.today().strftime("%Y-%m-%d") |
||
| 26 | ) |
||
| 27 | ] |
||
| 28 | |||
| 29 | for rss in rss_list: |
||
| 30 | rss_history_list = [] |
||
| 31 | feed = feedparser.parse(rss.feed) |
||
| 32 | for entry in feed.entries: |
||
| 33 | # print(entry["published"]) |
||
| 34 | # print(datetime.now()) |
||
| 35 | pub_t = datetime.strptime( |
||
| 36 | entry["published"], |
||
| 37 | rss.date_type, |
||
| 38 | ) |
||
| 39 | |||
| 40 | if pub_t.tzinfo is None: |
||
| 41 | pub_t = pub_t.replace(tzinfo=timezone.utc) |
||
| 42 | # print(pub_t) |
||
| 43 | |||
| 44 | if ( |
||
| 45 | entry.link not in post_url_list |
||
| 46 | and (datetime.timestamp(datetime.now()) - datetime.timestamp(pub_t)) |
||
| 47 | < rss.before * 86400 |
||
| 48 | ): |
||
| 49 | msg = msg + f"{str(no).zfill(2)}.{entry.title}\n{entry.link}\n\n" |
||
| 50 | if no % 10 == 0: |
||
| 51 | send("RSS 订阅", msg) |
||
| 52 | msg = "" |
||
| 53 | no += 1 |
||
| 54 | rss_history_list.append(History(url=entry.link)) |
||
| 55 | |||
| 56 | with db.atomic(): |
||
| 57 | History.bulk_create(rss_history_list, batch_size=10) |
||
| 58 | |||
| 59 | if no % 10 != 0: |
||
| 60 | send("RSS 订阅", msg) |
||
| 61 | |||
| 62 | def remove_old_history(self): |
||
| 63 | # 只保留最近一周的记录 |
||
| 64 | week_date_range = datetime.now() + timedelta(days=-7) |
||
| 65 | History.delete().where( |
||
| 66 | History.publish_at < week_date_range.strftime("%Y-%m-%d") |
||
| 67 | ).execute() |
||
| 68 | |||
| 69 | |||
| 70 | if __name__ == "__main__": |
||
| 71 | res = RssRobot().main() |
||
| 72 |