|
1
|
|
|
#! /usr/bin/env python
|
|
2
|
|
|
# -*- coding: utf-8 -*-
|
|
3
|
|
|
|
|
4
|
|
|
"""
|
|
5
|
|
|
Module that can be used to get a string containing a short description
|
|
6
|
|
|
from the first search result on 'developer.mozilla.org'. It also adds
|
|
7
|
|
|
the URL to the details page.
|
|
8
|
|
|
|
|
9
|
|
|
Right now it is only made with JavaScript in mind, but searching
|
|
10
|
|
|
in other categories can easily be added later.
|
|
11
|
|
|
|
|
12
|
|
|
It was created to be used with the irc bot marvin:
|
|
13
|
|
|
https://github.com/mosbth/irc2phpbb
|
|
14
|
|
|
|
|
15
|
|
|
Created by Andreas 'thebiffman' Andersson ([email protected])
|
|
16
|
|
|
"""
|
|
17
|
|
|
|
|
18
|
|
|
import urllib2
|
|
19
|
|
|
from bs4 import BeautifulSoup, SoupStrainer
|
|
20
|
|
|
import os
|
|
21
|
|
|
|
|
22
|
|
|
# Used to create the complete URL
|
|
23
|
|
|
BASE_URL = 'https://developer.mozilla.org/en-US/search?q='
|
|
24
|
|
|
URL_TOPIC_JS = '&topic=api&topic=js'
|
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
def getResultString(function, filter='js'):
|
|
28
|
|
|
"""
|
|
29
|
|
|
Uses the given function name and searches for it on the mozilla
|
|
30
|
|
|
developer network. Returns a string with "Nothing found" if nothing
|
|
31
|
|
|
was found, or a pretty string with the information requested along
|
|
32
|
|
|
with a link.
|
|
33
|
|
|
"""
|
|
34
|
|
|
|
|
35
|
|
|
# Asemble the basic search url
|
|
36
|
|
|
url = BASE_URL+function
|
|
37
|
|
|
|
|
38
|
|
|
#print("Url: " + url)
|
|
39
|
|
|
|
|
40
|
|
|
if 'js' in filter or 'javascript' in filter:
|
|
41
|
|
|
url = url + URL_TOPIC_JS
|
|
42
|
|
|
|
|
43
|
|
|
#print("Url: " + url)
|
|
44
|
|
|
|
|
45
|
|
|
# Try to fetch the site. If a incorrect function name is
|
|
46
|
|
|
# used, this will fail and print an error code.
|
|
47
|
|
|
siteData = None
|
|
48
|
|
|
try:
|
|
49
|
|
|
#print('Start to read')
|
|
50
|
|
|
siteData = urllib2.urlopen(url)
|
|
51
|
|
|
#print('Done reading.')
|
|
52
|
|
|
except urllib2.HTTPError, e:
|
|
53
|
|
|
print(e.code)
|
|
54
|
|
|
except urllib2.URLError, e:
|
|
55
|
|
|
print(e.args)
|
|
56
|
|
|
|
|
57
|
|
|
# This is the default value that will be returned if nothing is found.
|
|
58
|
|
|
result = 'Found nothing.'
|
|
59
|
|
|
|
|
60
|
|
|
# Actually parse and find the text
|
|
61
|
|
|
if siteData is not None:
|
|
62
|
|
|
|
|
63
|
|
|
# Use SoupStrainer to only parse what I need
|
|
64
|
|
|
strainer = SoupStrainer('li',{'class': 'result-1'})
|
|
65
|
|
|
|
|
66
|
|
|
# Create the soup object, using the SoupStrainer.
|
|
67
|
|
|
soup = BeautifulSoup(siteData, "lxml", parse_only=strainer)
|
|
68
|
|
|
|
|
69
|
|
|
# Get all a tags
|
|
70
|
|
|
linkTags = soup.find_all("a")
|
|
71
|
|
|
descriptionTag = soup.find("p")
|
|
72
|
|
|
|
|
73
|
|
|
if len(linkTags) < 2:
|
|
74
|
|
|
return result
|
|
75
|
|
|
|
|
76
|
|
|
# Trying to check that fields arent empty or contain too strange data
|
|
77
|
|
|
#if len(linkTags[0].get_text()) < 4 or len(linkTags[1].get_text()) < 21 or len(descriptionTag.get_text()) < 15:
|
|
78
|
|
|
# return result
|
|
79
|
|
|
|
|
80
|
|
|
# First a tag is the title/name of the result
|
|
81
|
|
|
resultName = linkTags[0].get_text()
|
|
82
|
|
|
|
|
83
|
|
|
# Second a tag is the url of the result
|
|
84
|
|
|
resultLink = 'https://' + linkTags[1].get_text()
|
|
85
|
|
|
|
|
86
|
|
|
# The P tag contains the description
|
|
87
|
|
|
resultDescription = descriptionTag.get_text().rstrip()
|
|
88
|
|
|
|
|
89
|
|
|
# Put the text without html tags in my fancy string
|
|
90
|
|
|
result = 'MDN: ' + resultName + ' - ' + resultDescription + ' - ' + resultLink
|
|
91
|
|
|
|
|
92
|
|
|
result = result.encode('utf-8')
|
|
93
|
|
|
|
|
94
|
|
|
#print(result)
|
|
95
|
|
|
|
|
96
|
|
|
# Return the result
|
|
97
|
|
|
return result
|
|
98
|
|
|
|
|
99
|
|
|
# Used for testing
|
|
100
|
|
|
#print(getResultString('getElementById'))
|
|
101
|
|
|
|