|
1
|
|
|
# coding: utf-8 |
|
2
|
|
|
import arrow |
|
3
|
|
|
import datetime |
|
4
|
|
|
import time |
|
5
|
|
|
from logging import getLogger |
|
6
|
|
|
|
|
7
|
|
|
# django classes |
|
8
|
|
|
from django.conf import settings |
|
9
|
|
|
from django.core.cache import caches |
|
10
|
|
|
|
|
11
|
|
|
# django_th classes |
|
12
|
|
|
from django_th.services.services import ServicesMgr |
|
13
|
|
|
# th_rss classes |
|
14
|
|
|
from th_rss.lib.feedsservice import Feeds |
|
15
|
|
|
|
|
16
|
|
|
logger = getLogger('django_th.trigger_happy') |
|
17
|
|
|
|
|
18
|
|
|
cache = caches['django_th'] |
|
19
|
|
|
|
|
20
|
|
|
|
|
21
|
|
|
class ServiceRss(ServicesMgr): |
|
22
|
|
|
""" |
|
23
|
|
|
Service RSS |
|
24
|
|
|
""" |
|
25
|
|
|
def __init__(self, token=None, **kwargs): |
|
26
|
|
|
super(ServiceRss, self).__init__(token, **kwargs) |
|
27
|
|
|
|
|
28
|
|
|
def _get_published(self, entry): |
|
29
|
|
|
""" |
|
30
|
|
|
get the 'published' attribute |
|
31
|
|
|
:param entry: |
|
32
|
|
|
:return: |
|
33
|
|
|
""" |
|
34
|
|
|
published = '' |
|
35
|
|
|
if hasattr(entry, 'published_parsed'): |
|
36
|
|
|
if entry.published_parsed is not None: |
|
37
|
|
|
published = datetime.datetime.utcfromtimestamp( |
|
38
|
|
|
time.mktime(entry.published_parsed)) |
|
39
|
|
|
elif hasattr(entry, 'created_parsed'): |
|
40
|
|
|
if entry.created_parsed is not None: |
|
41
|
|
|
published = datetime.datetime.utcfromtimestamp( |
|
42
|
|
|
time.mktime(entry.created_parsed)) |
|
43
|
|
|
elif hasattr(entry, 'updated_parsed'): |
|
44
|
|
|
if entry.updated_parsed is not None: |
|
45
|
|
|
published = datetime.datetime.utcfromtimestamp( |
|
46
|
|
|
time.mktime(entry.updated_parsed)) |
|
47
|
|
|
return published |
|
48
|
|
|
|
|
49
|
|
|
def read_data(self, **kwargs): |
|
50
|
|
|
""" |
|
51
|
|
|
get the data from the service |
|
52
|
|
|
|
|
53
|
|
|
:param kwargs: contain keyword args : trigger_id and model name |
|
54
|
|
|
:type kwargs: dict |
|
55
|
|
|
:rtype: dict |
|
56
|
|
|
""" |
|
57
|
|
|
date_triggered = kwargs.get('date_triggered') |
|
58
|
|
|
trigger_id = kwargs.get('trigger_id') |
|
59
|
|
|
kwargs['model_name'] = 'Rss' |
|
60
|
|
|
kwargs['app_label'] = 'django_th' |
|
61
|
|
|
# get the URL from the trigger id |
|
62
|
|
|
rss = super(ServiceRss, self).read_data(**kwargs) |
|
63
|
|
|
|
|
64
|
|
|
logger.debug("RSS Feeds from %s : url %s", rss.name, rss.url) |
|
65
|
|
|
|
|
66
|
|
|
now = arrow.utcnow().to(settings.TIME_ZONE) |
|
67
|
|
|
my_feeds = [] |
|
68
|
|
|
|
|
69
|
|
|
# retrieve the data |
|
70
|
|
|
feeds = Feeds(**{'url_to_parse': rss.url}).datas() |
|
71
|
|
|
|
|
72
|
|
|
for entry in feeds.entries: |
|
73
|
|
|
# entry.*_parsed may be None when the date in a RSS Feed is invalid |
|
74
|
|
|
# so will have the "now" date as default |
|
75
|
|
|
published = self._get_published(entry) |
|
76
|
|
|
|
|
77
|
|
|
if published == '': |
|
78
|
|
|
published = now |
|
79
|
|
|
else: |
|
80
|
|
|
published = arrow.get(str(published)).to(settings.TIME_ZONE) |
|
81
|
|
|
|
|
82
|
|
|
date_triggered = arrow.get( |
|
83
|
|
|
str(date_triggered)).to(settings.TIME_ZONE) |
|
84
|
|
|
|
|
85
|
|
|
if date_triggered is not None and\ |
|
86
|
|
|
published is not None and\ |
|
87
|
|
|
now >= published >= date_triggered: |
|
88
|
|
|
my_feeds.append(entry) |
|
89
|
|
|
|
|
90
|
|
|
# digester |
|
91
|
|
|
self.send_digest_event(trigger_id, entry.title, entry.link) |
|
92
|
|
|
|
|
93
|
|
|
cache.set('th_rss_' + str(trigger_id), my_feeds) |
|
94
|
|
|
cache.set('th_rss_uuid_{}'.format(rss.uuid), my_feeds) |
|
95
|
|
|
# return the data |
|
96
|
|
|
return my_feeds |
|
97
|
|
|
|