Total Complexity | 5 |
Total Lines | 33 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 1 | Features | 1 |
1 | # -*- coding: utf-8 -*- |
||
7 | class Feeds(object): |
||
8 | |||
9 | URL_TO_PARSE = '' |
||
10 | USER_AGENT = 'TriggerHappy/1.0 +http://trigger-happy.eu/' |
||
11 | |||
12 | def __init__(self, **kwargs): |
||
13 | |||
14 | if 'url_to_parse' in kwargs and kwargs['url_to_parse'] != '': |
||
15 | self.URL_TO_PARSE = kwargs['url_to_parse'] |
||
16 | else: |
||
17 | raise KeyError('Missing argument "url_to_parse" eg.' |
||
18 | ' url_to_parse="/path/to/local/file.rss" or' |
||
19 | ' url_to_parse="http://domain.com/file.rss"') |
||
20 | |||
21 | def datas(self): |
||
22 | """ |
||
23 | read the data from a given URL or path to a local file |
||
24 | """ |
||
25 | data = feedparser.parse(self.URL_TO_PARSE, agent=self.USER_AGENT) |
||
26 | |||
27 | # when chardet says |
||
28 | # >>> chardet.detect(data) |
||
29 | # {'confidence': 0.99, 'encoding': 'utf-8'} |
||
30 | # bozo says sometimes |
||
31 | # >>> data.bozo_exception |
||
32 | # CharacterEncodingOverride('document declared as us-ascii, but parsed as utf-8', ) # invalid Feed |
||
33 | # so I remove this detection :( |
||
34 | # the issue come from the server that return a charset different from the feeds |
||
35 | # it is not related to Feedparser but from the HTTP server itself |
||
36 | if data.bozo == 1: |
||
37 | data.entries = '' |
||
38 | |||
39 | return data |
||
40 |