Total Complexity | 8 |
Total Lines | 39 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | # coding: utf-8 |
||
6 | class HtmlEntities: |
||
7 | |||
8 | def __init__(self, my_string): |
||
9 | self.my_string = my_string |
||
10 | |||
11 | def html_entity_decode_char(self, m, defs=htmlentities.entitydefs): |
||
12 | """ |
||
13 | decode html entity into one of the html char |
||
14 | """ |
||
15 | try: |
||
16 | char = defs[m.group(1)] |
||
17 | return "&{char};".format(char=char) |
||
18 | except ValueError: |
||
19 | return m.group(0) |
||
20 | except KeyError: |
||
21 | return m.group(0) |
||
22 | |||
23 | def html_entity_decode_codepoint(self, m, |
||
24 | defs=htmlentities.codepoint2name): |
||
25 | """ |
||
26 | decode html entity into one of the codepoint2name |
||
27 | """ |
||
28 | try: |
||
29 | char = defs[m.group(1)] |
||
30 | return "&{char};".format(char=char) |
||
31 | except ValueError: |
||
32 | return m.group(0) |
||
33 | except KeyError: |
||
34 | return m.group(0) |
||
35 | |||
36 | @property |
||
37 | def html_entity_decode(self): |
||
38 | """ |
||
39 | entry point of this set of tools |
||
40 | to decode html entities |
||
41 | """ |
||
42 | pattern = re.compile(r"&#(\w+?);") |
||
43 | string = pattern.sub(self.html_entity_decode_char, self.my_string) |
||
44 | return pattern.sub(self.html_entity_decode_codepoint, string) |
||
45 |