ServiceRss.read_data()   B
last analyzed

Complexity

Conditions 6

Size

Total Lines 48

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
dl 0
loc 48
rs 7.6528
c 0
b 0
f 0
1
# coding: utf-8
2
import arrow
3
import datetime
4
import time
5
from logging import getLogger
6
7
# django classes
8
from django.conf import settings
9
from django.core.cache import caches
10
11
# django_th classes
12
from django_th.services.services import ServicesMgr
13
# th_rss classes
14
from th_rss.lib.feedsservice import Feeds
15
16
logger = getLogger('django_th.trigger_happy')
17
18
cache = caches['django_th']
19
20
21
class ServiceRss(ServicesMgr):
22
    """
23
        Service RSS
24
    """
25
    def __init__(self, token=None, **kwargs):
26
        super(ServiceRss, self).__init__(token, **kwargs)
27
28
    def _get_published(self, entry):
29
        """
30
        get the 'published' attribute
31
        :param entry:
32
        :return:
33
        """
34
        published = ''
35
        if hasattr(entry, 'published_parsed'):
36
            if entry.published_parsed is not None:
37
                published = datetime.datetime.utcfromtimestamp(
38
                    time.mktime(entry.published_parsed))
39
        elif hasattr(entry, 'created_parsed'):
40
            if entry.created_parsed is not None:
41
                published = datetime.datetime.utcfromtimestamp(
42
                    time.mktime(entry.created_parsed))
43
        elif hasattr(entry, 'updated_parsed'):
44
            if entry.updated_parsed is not None:
45
                published = datetime.datetime.utcfromtimestamp(
46
                    time.mktime(entry.updated_parsed))
47
        return published
48
49
    def read_data(self, **kwargs):
50
        """
51
            get the data from the service
52
53
            :param kwargs: contain keyword args : trigger_id and model name
54
            :type kwargs: dict
55
            :rtype: dict
56
        """
57
        date_triggered = kwargs.get('date_triggered')
58
        trigger_id = kwargs.get('trigger_id')
59
        kwargs['model_name'] = 'Rss'
60
        kwargs['app_label'] = 'django_th'
61
        # get the URL from the trigger id
62
        rss = super(ServiceRss, self).read_data(**kwargs)
63
64
        logger.debug("RSS Feeds from %s : url %s", rss.name, rss.url)
65
66
        now = arrow.utcnow().to(settings.TIME_ZONE)
67
        my_feeds = []
68
69
        # retrieve the data
70
        feeds = Feeds(**{'url_to_parse': rss.url}).datas()
71
72
        for entry in feeds.entries:
73
            # entry.*_parsed may be None when the date in a RSS Feed is invalid
74
            # so will have the "now" date as default
75
            published = self._get_published(entry)
76
77
            if published == '':
78
                published = now
79
            else:
80
                published = arrow.get(str(published)).to(settings.TIME_ZONE)
81
82
            date_triggered = arrow.get(
83
                str(date_triggered)).to(settings.TIME_ZONE)
84
85
            if date_triggered is not None and\
86
               published is not None and\
87
               now >= published >= date_triggered:
88
                my_feeds.append(entry)
89
90
                # digester
91
                self.send_digest_event(trigger_id, entry.title, entry.link)
92
93
        cache.set('th_rss_' + str(trigger_id), my_feeds)
94
        cache.set('th_rss_uuid_{}'.format(rss.uuid), my_feeds)
95
        # return the data
96
        return my_feeds
97