Passed
Branch master (ed257a)
by Mikael
03:02
created

phpmanual.cacheLookup()   A

Complexity

Conditions 5

Size

Total Lines 16
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 12
nop 1
dl 0
loc 16
rs 9.3333
c 0
b 0
f 0
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
""" 
5
Module that can be used to get a string containing a really short
6
description of a function in PHP. It pareses the PHP manual website to
7
get the information. 
8
9
It was created to be used with the irc bot marvin:
10
https://github.com/mosbth/irc2phpbb
11
12
Created by Andreas 'thebiffman' Andersson ([email protected])
13
"""
14
15
import urllib2
16
from bs4 import BeautifulSoup, SoupStrainer
17
import os
18
19
# Used to create the complete URL
20
BASE_URL = 'http://php.net/manual/en/function.'
21
ENDING_URL = '.php'
22
23
# File used to cache the function description strings
24
CACHE_FILE = 'phpmanual_cache.txt'
25
26
27
def cacheLookup(function):
28
    """
29
    If the function description is cached in the file, the function 
30
    returns it, otherwise it returns None.
31
    """
32
    if os.path.isfile(CACHE_FILE):
33
        try:
34
            cacheFile = open(CACHE_FILE, 'r')
35
            for line in cacheFile:
36
                endPos = line.index("http://p")
37
                if function in line[12:endPos]:
38
                    return line.rstrip('\n')
39
            cacheFile.close()
40
        except:
41
            return None
42
    return None
43
44
45
def saveToCache(description):
46
    """ 
47
    Appends the cache file with the prepared string that cotains 
48
    the function description.
49
    """
50
    cacheFile = open(CACHE_FILE, 'a')
51
    #print('Saving the following to cache:') DEBUG
52
    #print(description + '\n') DEBUG
53
    cacheFile.write(description + '\n')
54
    cacheFile.close()
55
56
57
def getShortDescr(function):
58
    """ 
59
    Uses the given function name and attemps to get a short description 
60
    from the php manual. Returns a string with "Nothing found" if nothing 
61
    was found, or a pretty string with the information requested. 
62
    """
63
64
    # If the function description is cached, return it
65
    cached = cacheLookup(function)
66
    if(cached is not None):
67
        return cached
68
69
    # Replace '_' with '-'
70
    function = function.replace('_', '-')
71
72
    # Complete URL to the manual page (if it exists)
73
    url = BASE_URL+function+ENDING_URL
74
75
    # Try to fetch the site. If a incorrect function name is 
76
    # used, this will fail and print an error code. 
77
    siteData = None
78
    try:
79
        #print('Start to read') DEBUG
80
        siteData = urllib2.urlopen(url)
81
        #print('Done reading.') DEBUG
82
    except urllib2.HTTPError, e:
83
        print(e.code)
84
    except urllib2.URLError, e:
85
        print(e.args)
86
87
    # This is the default value that will be returned if nothing is found.
88
    result = 'Found nothing.'
89
90
    # Actually parse and find the text 
91
    if siteData is not None:
92
        # Use SoupStrainer to only parse what I need
93
        tagsWithClass = SoupStrainer('p',{'class': 'refpurpose'})
94
95
        #print('Done creating SoupStrainer.') DEBUG
96
97
        # Create the soup object, using the SoupStrainer.
98
        # This is what takes the most time (hence the .txt-file cache)
99
        soup = BeautifulSoup(siteData, "lxml",  parse_only=tagsWithClass)
100
101
        #print('Done creating BeautifulSoup.') DEBUG
102
103
        # Get the specific tag I need
104
        shortDescrPtag = soup.find("p", { "class" : "refpurpose" })
105
106
        #print('Done finding tag.') DEBUG
107
        try:
108
            # Put the text without html tags in my fancy string
109
            result = 'PHP-manualen: ' + shortDescrPtag.get_text() + ' - ' + url
110
            result = result.replace('\n', '')
111
            result = result.encode('utf-8')
112
            # Cache the result (i.e. save it to the cache txt-file)
113
            saveToCache(result)
114
        except:
115
            result = 'Found nothing.'
116
117
    # Return the result
118
    return result
119
120
# Used for testing
121
#print(getShortDescr('substr'))
122