1
|
|
|
# coding: utf-8 |
2
|
|
|
import arrow |
3
|
|
|
import datetime |
4
|
|
|
import time |
5
|
|
|
from logging import getLogger |
6
|
|
|
|
7
|
|
|
# django classes |
8
|
|
|
from django.conf import settings |
9
|
|
|
from django.core.cache import caches |
10
|
|
|
|
11
|
|
|
# django_th classes |
12
|
|
|
from django_th.services.services import ServicesMgr |
13
|
|
|
# th_rss classes |
14
|
|
|
from th_rss.lib.feedsservice import Feeds |
15
|
|
|
|
16
|
|
|
logger = getLogger('django_th.trigger_happy') |
17
|
|
|
|
18
|
|
|
cache = caches['django_th'] |
19
|
|
|
|
20
|
|
|
|
21
|
|
|
class ServiceRss(ServicesMgr): |
22
|
|
|
""" |
23
|
|
|
Service RSS |
24
|
|
|
""" |
25
|
|
|
def __init__(self, token=None, **kwargs): |
26
|
|
|
super(ServiceRss, self).__init__(token, **kwargs) |
27
|
|
|
|
28
|
|
|
def _get_published(self, entry): |
29
|
|
|
""" |
30
|
|
|
get the 'published' attribute |
31
|
|
|
:param entry: |
32
|
|
|
:return: |
33
|
|
|
""" |
34
|
|
|
published = '' |
35
|
|
|
if hasattr(entry, 'published_parsed'): |
36
|
|
|
if entry.published_parsed is not None: |
37
|
|
|
published = datetime.datetime.utcfromtimestamp( |
38
|
|
|
time.mktime(entry.published_parsed)) |
39
|
|
|
elif hasattr(entry, 'created_parsed'): |
40
|
|
|
if entry.created_parsed is not None: |
41
|
|
|
published = datetime.datetime.utcfromtimestamp( |
42
|
|
|
time.mktime(entry.created_parsed)) |
43
|
|
|
elif hasattr(entry, 'updated_parsed'): |
44
|
|
|
if entry.updated_parsed is not None: |
45
|
|
|
published = datetime.datetime.utcfromtimestamp( |
46
|
|
|
time.mktime(entry.updated_parsed)) |
47
|
|
|
return published |
48
|
|
|
|
49
|
|
|
def read_data(self, **kwargs): |
50
|
|
|
""" |
51
|
|
|
get the data from the service |
52
|
|
|
|
53
|
|
|
:param kwargs: contain keyword args : trigger_id and model name |
54
|
|
|
:type kwargs: dict |
55
|
|
|
:rtype: dict |
56
|
|
|
""" |
57
|
|
|
date_triggered = kwargs.get('date_triggered') |
58
|
|
|
trigger_id = kwargs.get('trigger_id') |
59
|
|
|
kwargs['model_name'] = 'Rss' |
60
|
|
|
kwargs['app_label'] = 'django_th' |
61
|
|
|
# get the URL from the trigger id |
62
|
|
|
rss = super(ServiceRss, self).read_data(**kwargs) |
63
|
|
|
|
64
|
|
|
logger.debug("RSS Feeds from %s : url %s", rss.name, rss.url) |
65
|
|
|
|
66
|
|
|
now = arrow.utcnow().to(settings.TIME_ZONE) |
67
|
|
|
my_feeds = [] |
68
|
|
|
|
69
|
|
|
# retrieve the data |
70
|
|
|
feeds = Feeds(**{'url_to_parse': rss.url}).datas() |
71
|
|
|
|
72
|
|
|
for entry in feeds.entries: |
73
|
|
|
# entry.*_parsed may be None when the date in a RSS Feed is invalid |
74
|
|
|
# so will have the "now" date as default |
75
|
|
|
published = self._get_published(entry) |
76
|
|
|
|
77
|
|
|
if published == '': |
78
|
|
|
published = now |
79
|
|
|
else: |
80
|
|
|
published = arrow.get(str(published)).to(settings.TIME_ZONE) |
81
|
|
|
|
82
|
|
|
date_triggered = arrow.get( |
83
|
|
|
str(date_triggered)).to(settings.TIME_ZONE) |
84
|
|
|
|
85
|
|
|
if date_triggered is not None and\ |
86
|
|
|
published is not None and\ |
87
|
|
|
now >= published >= date_triggered: |
88
|
|
|
my_feeds.append(entry) |
89
|
|
|
|
90
|
|
|
# digester |
91
|
|
|
self.send_digest_event(trigger_id, entry.title, entry.link) |
92
|
|
|
|
93
|
|
|
cache.set('th_rss_' + str(trigger_id), my_feeds) |
94
|
|
|
cache.set('th_rss_uuid_{}'.format(rss.uuid), my_feeds) |
95
|
|
|
# return the data |
96
|
|
|
return my_feeds |
97
|
|
|
|