Conditions | 2 |
Total Lines | 19 |
Lines | 0 |
Ratio | 0 % |
Changes | 1 | ||
Bugs | 1 | Features | 1 |
1 | # -*- coding: utf-8 -*- |
||
21 | def datas(self): |
||
22 | """ |
||
23 | read the data from a given URL or path to a local file |
||
24 | """ |
||
25 | data = feedparser.parse(self.URL_TO_PARSE, agent=self.USER_AGENT) |
||
26 | |||
27 | # when chardet says |
||
28 | # >>> chardet.detect(data) |
||
29 | # {'confidence': 0.99, 'encoding': 'utf-8'} |
||
30 | # bozo says sometimes |
||
31 | # >>> data.bozo_exception |
||
32 | # CharacterEncodingOverride('document declared as us-ascii, but parsed as utf-8', ) # invalid Feed |
||
33 | # so I remove this detection :( |
||
34 | # the issue come from the server that return a charset different from the feeds |
||
35 | # it is not related to Feedparser but from the HTTP server itself |
||
36 | if data.bozo == 1: |
||
37 | data.entries = '' |
||
38 | |||
39 | return data |
||
40 |