| @@ 17-39 (lines=23) @@ | ||
| 14 | ||
| 15 | ||
| 16 | # from http://stackoverflow.com/questions/1197981/convert-html-entities |
|
| 17 | def asciify2(s): |
|
| 18 | matches = re.findall("&#\d+;", s) |
|
| 19 | if len(matches) > 0: |
|
| 20 | hits = set(matches) |
|
| 21 | for hit in hits: |
|
| 22 | name = hit[2:-1] |
|
| 23 | try: |
|
| 24 | entnum = int(name) |
|
| 25 | s = s.replace(hit, chr(entnum)) |
|
| 26 | except ValueError: |
|
| 27 | pass |
|
| 28 | ||
| 29 | matches = re.findall("&\w+;", s) |
|
| 30 | hits = set(matches) |
|
| 31 | amp = "&" |
|
| 32 | if amp in hits: |
|
| 33 | hits.remove(amp) |
|
| 34 | for hit in hits: |
|
| 35 | name = hit[1:-1] |
|
| 36 | if name in html.entities.name2codepoint: |
|
| 37 | s = s.replace(hit, "") |
|
| 38 | s = s.replace(amp, "&") |
|
| 39 | return s |
|
| 40 | ||
| 41 | ||
| 42 | def opensoup(url): |
|
| @@ 16-38 (lines=23) @@ | ||
| 13 | ||
| 14 | ||
| 15 | # from http://stackoverflow.com/questions/1197981/convert-html-entities |
|
| 16 | def asciify2(s): |
|
| 17 | matches = re.findall("&#\d+;", s) |
|
| 18 | if len(matches) > 0: |
|
| 19 | hits = set(matches) |
|
| 20 | for hit in hits: |
|
| 21 | name = hit[2:-1] |
|
| 22 | try: |
|
| 23 | entnum = int(name) |
|
| 24 | s = s.replace(hit, chr(entnum)) |
|
| 25 | except ValueError: |
|
| 26 | pass |
|
| 27 | ||
| 28 | matches = re.findall("&\w+;", s) |
|
| 29 | hits = set(matches) |
|
| 30 | amp = "&" |
|
| 31 | if amp in hits: |
|
| 32 | hits.remove(amp) |
|
| 33 | for hit in hits: |
|
| 34 | name = hit[1:-1] |
|
| 35 | if name in html.entities.name2codepoint: |
|
| 36 | s = s.replace(hit, "") |
|
| 37 | s = s.replace(amp, "&") |
|
| 38 | return s |
|
| 39 | ||
| 40 | ||
| 41 | def opensoup(url): |
|