| Total Complexity | 5 |
| Total Lines | 33 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 1 | Features | 1 |
| 1 | # -*- coding: utf-8 -*- |
||
| 7 | class Feeds(object): |
||
| 8 | |||
| 9 | URL_TO_PARSE = '' |
||
| 10 | USER_AGENT = 'TriggerHappy/1.0 +http://trigger-happy.eu/' |
||
| 11 | |||
| 12 | def __init__(self, **kwargs): |
||
| 13 | |||
| 14 | if 'url_to_parse' in kwargs and kwargs['url_to_parse'] != '': |
||
| 15 | self.URL_TO_PARSE = kwargs['url_to_parse'] |
||
| 16 | else: |
||
| 17 | raise KeyError('Missing argument "url_to_parse" eg.' |
||
| 18 | ' url_to_parse="/path/to/local/file.rss" or' |
||
| 19 | ' url_to_parse="http://domain.com/file.rss"') |
||
| 20 | |||
| 21 | def datas(self): |
||
| 22 | """ |
||
| 23 | read the data from a given URL or path to a local file |
||
| 24 | """ |
||
| 25 | data = feedparser.parse(self.URL_TO_PARSE, agent=self.USER_AGENT) |
||
| 26 | |||
| 27 | # when chardet says |
||
| 28 | # >>> chardet.detect(data) |
||
| 29 | # {'confidence': 0.99, 'encoding': 'utf-8'} |
||
| 30 | # bozo says sometimes |
||
| 31 | # >>> data.bozo_exception |
||
| 32 | # CharacterEncodingOverride('document declared as us-ascii, but parsed as utf-8', ) # invalid Feed |
||
| 33 | # so I remove this detection :( |
||
| 34 | # the issue come from the server that return a charset different from the feeds |
||
| 35 | # it is not related to Feedparser but from the HTTP server itself |
||
| 36 | if data.bozo == 1: |
||
| 37 | data.entries = '' |
||
| 38 | |||
| 39 | return data |
||
| 40 |