Feeds.__init__()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
c 0
b 0
f 0
dl 0
loc 6
rs 9.4285
1
# -*- coding: utf-8 -*-
2
import feedparser
3
4
__all__ = ['Feeds']
5
6
7
class Feeds(object):
8
9
    URL_TO_PARSE = ''
10
    USER_AGENT = 'TriggerHappy/1.0 +http://trigger-happy.eu/'
11
12
    def __init__(self, **kwargs):
13
14
        if 'url_to_parse' in kwargs and kwargs['url_to_parse'] != '':
15
            self.URL_TO_PARSE = kwargs['url_to_parse']
16
        else:
17
            raise KeyError('Missing argument "url_to_parse" eg.'
18
                           ' url_to_parse="/path/to/local/file.rss" or'
19
                           ' url_to_parse="http://domain.com/file.rss"')
20
21
    def datas(self):
22
        """
23
            read the data from a given URL or path to a local file
24
        """
25
        data = feedparser.parse(self.URL_TO_PARSE, agent=self.USER_AGENT)
26
27
        # when chardet says
28
        # >>> chardet.detect(data)
29
        # {'confidence': 0.99, 'encoding': 'utf-8'}
30
        # bozo says sometimes
31
        # >>> data.bozo_exception
32
        # CharacterEncodingOverride('document declared as us-ascii, but parsed as utf-8', )  # invalid Feed
33
        # so I remove this detection :(
34
        # the issue come from the server that return a charset different from the feeds
35
        # it is not related to Feedparser but from the HTTP server itself
36
        if data.bozo == 1:
37
            data.entries = ''
38
39
        return data
40