Passed
Branch master (ed257a)
by Mikael
03:02
created

dev_mozilla   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 98
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 31
dl 0
loc 98
rs 10
c 0
b 0
f 0
wmc 7
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
""" 
5
Module that can be used to get a string containing a short description 
6
from the first search result on 'developer.mozilla.org'. It also adds 
7
the URL to the details page.
8
9
Right now it is only made with JavaScript in mind, but searching
10
in other categories can easily be added later. 
11
12
It was created to be used with the irc bot marvin:
13
https://github.com/mosbth/irc2phpbb
14
15
Created by Andreas 'thebiffman' Andersson ([email protected])
16
"""
17
18
import urllib2
19
from bs4 import BeautifulSoup, SoupStrainer
20
import os
21
22
# Used to create the complete URL
23
BASE_URL = 'https://developer.mozilla.org/en-US/search?q='
24
URL_TOPIC_JS = '&topic=api&topic=js'
25
26
27
def getResultString(function, filter='js'):
28
    """ 
29
    Uses the given function name and searches for it on the mozilla
30
    developer network. Returns a string with "Nothing found" if nothing 
31
    was found, or a pretty string with the information requested along
32
    with a link.
33
    """
34
35
    # Asemble the basic search url
36
    url = BASE_URL+function
37
38
    #print("Url: " + url)
39
40
    if 'js' in filter or 'javascript' in filter:
41
        url = url + URL_TOPIC_JS
42
43
    #print("Url: " + url)
44
45
    # Try to fetch the site. If a incorrect function name is 
46
    # used, this will fail and print an error code. 
47
    siteData = None
48
    try:
49
        #print('Start to read')
50
        siteData = urllib2.urlopen(url)
51
        #print('Done reading.')
52
    except urllib2.HTTPError, e:
53
        print(e.code)
54
    except urllib2.URLError, e:
55
        print(e.args)
56
57
    # This is the default value that will be returned if nothing is found.
58
    result = 'Found nothing.'
59
60
    # Actually parse and find the text 
61
    if siteData is not None:
62
63
        # Use SoupStrainer to only parse what I need
64
        strainer = SoupStrainer('li',{'class': 'result-1'})
65
66
        # Create the soup object, using the SoupStrainer.
67
        soup = BeautifulSoup(siteData, "lxml",  parse_only=strainer)
68
69
        # Get all a tags
70
        linkTags = soup.find_all("a")
71
        descriptionTag = soup.find("p")
72
73
        if len(linkTags) < 2:
74
            return result
75
76
        # Trying to check that fields arent empty or contain too strange data
77
        #if len(linkTags[0].get_text()) < 4 or len(linkTags[1].get_text()) < 21 or len(descriptionTag.get_text()) < 15:
78
        #    return result
79
80
        # First a tag is the title/name of the result
81
        resultName = linkTags[0].get_text()
82
83
        # Second a tag is the url of the result
84
        resultLink = 'https://' + linkTags[1].get_text()
85
86
        # The P tag contains the description
87
        resultDescription = descriptionTag.get_text().rstrip()
88
89
        # Put the text without html tags in my fancy string
90
        result = 'MDN: ' + resultName + ' - ' + resultDescription + ' - ' + resultLink
91
92
        result = result.encode('utf-8')
93
94
        #print(result)
95
96
    # Return the result
97
    return result
98
99
# Used for testing
100
#print(getResultString('getElementById'))
101