|
1
|
|
|
# coding=utf-8 |
|
2
|
|
|
""" |
|
3
|
|
|
etymology.py - Sopel Etymology Module |
|
4
|
|
|
Copyright 2007-9, Sean B. Palmer, inamidst.com |
|
5
|
|
|
Copyright 2018-9, Sopel contributors |
|
6
|
|
|
Licensed under the Eiffel Forum License 2. |
|
7
|
|
|
|
|
8
|
|
|
https://sopel.chat |
|
9
|
|
|
""" |
|
10
|
|
|
from __future__ import unicode_literals, absolute_import, print_function, division |
|
11
|
|
|
|
|
12
|
|
|
from re import sub |
|
13
|
|
|
|
|
14
|
|
|
from requests import get |
|
15
|
|
|
|
|
16
|
|
|
from sopel.module import commands, example, NOLIMIT |
|
17
|
|
|
from sopel.tools import web |
|
18
|
|
|
|
|
19
|
|
|
try: |
|
20
|
|
|
# Python 2.7 |
|
21
|
|
|
from HTMLParser import HTMLParser |
|
22
|
|
|
h = HTMLParser() |
|
23
|
|
|
unescape = h.unescape |
|
24
|
|
|
except ImportError: |
|
25
|
|
|
try: |
|
26
|
|
|
# Python 3.4+ |
|
27
|
|
|
from html import unescape # https://stackoverflow.com/a/2087433 |
|
28
|
|
|
except ImportError: |
|
29
|
|
|
# Python 3.3... sigh |
|
30
|
|
|
from html.parser import HTMLParser |
|
31
|
|
|
h = HTMLParser() |
|
32
|
|
|
unescape = h.unescape |
|
33
|
|
|
|
|
34
|
|
|
|
|
35
|
|
|
ETYURI = 'https://www.etymonline.com/word/%s' |
|
36
|
|
|
ETYSEARCH = 'https://www.etymonline.com/search?q=%s' |
|
37
|
|
|
|
|
38
|
|
|
|
|
39
|
|
|
def etymology(word): |
|
40
|
|
|
# @@ <nsh> sbp, would it be possible to have a flag for .ety to get 2nd/etc |
|
41
|
|
|
# entries? - http://swhack.com/logs/2006-07-19#T15-05-29 |
|
42
|
|
|
|
|
43
|
|
|
if not word: |
|
44
|
|
|
raise ValueError("No word to look for.") |
|
45
|
|
|
|
|
46
|
|
|
if len(word) > 25: |
|
47
|
|
|
raise ValueError("Word too long: %s[…]" % word[:10]) |
|
48
|
|
|
|
|
49
|
|
|
ety = get(ETYURI % web.quote(word)) |
|
50
|
|
|
if ety.status_code != 200: |
|
51
|
|
|
return None |
|
52
|
|
|
|
|
53
|
|
|
# Let's find it |
|
54
|
|
|
start = ety.text.find("word__defination") |
|
55
|
|
|
start = ety.text.find("<p>", start) |
|
56
|
|
|
stop = ety.text.find("</p>", start) |
|
57
|
|
|
sentence = ety.text[start + 3:stop] |
|
58
|
|
|
# Clean up |
|
59
|
|
|
sentence = unescape(sentence) |
|
60
|
|
|
sentence = sub('<[^<]+?>', '', sentence) |
|
61
|
|
|
|
|
62
|
|
|
maxlength = 275 |
|
63
|
|
|
if len(sentence) > maxlength: |
|
64
|
|
|
sentence = sentence[:maxlength] |
|
65
|
|
|
words = sentence[:-5].split(' ') |
|
66
|
|
|
words.pop() |
|
67
|
|
|
sentence = ' '.join(words) + ' […]' |
|
68
|
|
|
|
|
69
|
|
|
sentence = '"' + sentence.replace('"', "'") + '"' |
|
70
|
|
|
return sentence + ' - ' + (ETYURI % web.quote(word)) |
|
71
|
|
|
|
|
72
|
|
|
|
|
73
|
|
|
@commands('ety') |
|
74
|
|
|
@example('.ety word') |
|
75
|
|
|
def f_etymology(bot, trigger): |
|
76
|
|
|
"""Look up the etymology of a word""" |
|
77
|
|
|
word = trigger.group(2) |
|
78
|
|
|
|
|
79
|
|
|
try: |
|
80
|
|
|
result = etymology(word) |
|
81
|
|
|
except IOError: |
|
82
|
|
|
msg = "Can't connect to etymonline.com (%s)" % (ETYURI % web.quote(word)) |
|
83
|
|
|
bot.say(msg, trigger.sender) |
|
84
|
|
|
return NOLIMIT |
|
85
|
|
|
except (AttributeError, TypeError): |
|
86
|
|
|
result = None |
|
87
|
|
|
except ValueError as ve: |
|
88
|
|
|
result = str(ve) |
|
89
|
|
|
|
|
90
|
|
|
if result is not None: |
|
91
|
|
|
bot.say(result, trigger.sender) |
|
92
|
|
|
else: |
|
93
|
|
|
uri = ETYSEARCH % web.quote(word) |
|
94
|
|
|
msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri) |
|
95
|
|
|
bot.say(msg, trigger.sender) |
|
96
|
|
|
return NOLIMIT |
|
97
|
|
|
|