|
1
|
|
|
#! /usr/bin/env python
|
|
2
|
|
|
# -*- coding: utf-8 -*-
|
|
3
|
|
|
|
|
4
|
|
|
"""
|
|
5
|
|
|
Module that can be used to get a string containing a really short
|
|
6
|
|
|
description of a function in PHP. It pareses the PHP manual website to
|
|
7
|
|
|
get the information.
|
|
8
|
|
|
|
|
9
|
|
|
It was created to be used with the irc bot marvin:
|
|
10
|
|
|
https://github.com/mosbth/irc2phpbb
|
|
11
|
|
|
|
|
12
|
|
|
Created by Andreas 'thebiffman' Andersson ([email protected])
|
|
13
|
|
|
"""
|
|
14
|
|
|
|
|
15
|
|
|
import urllib2
|
|
16
|
|
|
from bs4 import BeautifulSoup, SoupStrainer
|
|
17
|
|
|
import os
|
|
18
|
|
|
|
|
19
|
|
|
# Used to create the complete URL
|
|
20
|
|
|
BASE_URL = 'http://php.net/manual/en/function.'
|
|
21
|
|
|
ENDING_URL = '.php'
|
|
22
|
|
|
|
|
23
|
|
|
# File used to cache the function description strings
|
|
24
|
|
|
CACHE_FILE = 'phpmanual_cache.txt'
|
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
def cacheLookup(function):
|
|
28
|
|
|
"""
|
|
29
|
|
|
If the function description is cached in the file, the function
|
|
30
|
|
|
returns it, otherwise it returns None.
|
|
31
|
|
|
"""
|
|
32
|
|
|
if os.path.isfile(CACHE_FILE):
|
|
33
|
|
|
try:
|
|
34
|
|
|
cacheFile = open(CACHE_FILE, 'r')
|
|
35
|
|
|
for line in cacheFile:
|
|
36
|
|
|
endPos = line.index("http://p")
|
|
37
|
|
|
if function in line[12:endPos]:
|
|
38
|
|
|
return line.rstrip('\n')
|
|
39
|
|
|
cacheFile.close()
|
|
40
|
|
|
except:
|
|
41
|
|
|
return None
|
|
42
|
|
|
return None
|
|
43
|
|
|
|
|
44
|
|
|
|
|
45
|
|
|
def saveToCache(description):
|
|
46
|
|
|
"""
|
|
47
|
|
|
Appends the cache file with the prepared string that cotains
|
|
48
|
|
|
the function description.
|
|
49
|
|
|
"""
|
|
50
|
|
|
cacheFile = open(CACHE_FILE, 'a')
|
|
51
|
|
|
#print('Saving the following to cache:') DEBUG
|
|
52
|
|
|
#print(description + '\n') DEBUG
|
|
53
|
|
|
cacheFile.write(description + '\n')
|
|
54
|
|
|
cacheFile.close()
|
|
55
|
|
|
|
|
56
|
|
|
|
|
57
|
|
|
def getShortDescr(function):
|
|
58
|
|
|
"""
|
|
59
|
|
|
Uses the given function name and attemps to get a short description
|
|
60
|
|
|
from the php manual. Returns a string with "Nothing found" if nothing
|
|
61
|
|
|
was found, or a pretty string with the information requested.
|
|
62
|
|
|
"""
|
|
63
|
|
|
|
|
64
|
|
|
# If the function description is cached, return it
|
|
65
|
|
|
cached = cacheLookup(function)
|
|
66
|
|
|
if(cached is not None):
|
|
67
|
|
|
return cached
|
|
68
|
|
|
|
|
69
|
|
|
# Replace '_' with '-'
|
|
70
|
|
|
function = function.replace('_', '-')
|
|
71
|
|
|
|
|
72
|
|
|
# Complete URL to the manual page (if it exists)
|
|
73
|
|
|
url = BASE_URL+function+ENDING_URL
|
|
74
|
|
|
|
|
75
|
|
|
# Try to fetch the site. If a incorrect function name is
|
|
76
|
|
|
# used, this will fail and print an error code.
|
|
77
|
|
|
siteData = None
|
|
78
|
|
|
try:
|
|
79
|
|
|
#print('Start to read') DEBUG
|
|
80
|
|
|
siteData = urllib2.urlopen(url)
|
|
81
|
|
|
#print('Done reading.') DEBUG
|
|
82
|
|
|
except urllib2.HTTPError, e:
|
|
83
|
|
|
print(e.code)
|
|
84
|
|
|
except urllib2.URLError, e:
|
|
85
|
|
|
print(e.args)
|
|
86
|
|
|
|
|
87
|
|
|
# This is the default value that will be returned if nothing is found.
|
|
88
|
|
|
result = 'Found nothing.'
|
|
89
|
|
|
|
|
90
|
|
|
# Actually parse and find the text
|
|
91
|
|
|
if siteData is not None:
|
|
92
|
|
|
# Use SoupStrainer to only parse what I need
|
|
93
|
|
|
tagsWithClass = SoupStrainer('p',{'class': 'refpurpose'})
|
|
94
|
|
|
|
|
95
|
|
|
#print('Done creating SoupStrainer.') DEBUG
|
|
96
|
|
|
|
|
97
|
|
|
# Create the soup object, using the SoupStrainer.
|
|
98
|
|
|
# This is what takes the most time (hence the .txt-file cache)
|
|
99
|
|
|
soup = BeautifulSoup(siteData, "lxml", parse_only=tagsWithClass)
|
|
100
|
|
|
|
|
101
|
|
|
#print('Done creating BeautifulSoup.') DEBUG
|
|
102
|
|
|
|
|
103
|
|
|
# Get the specific tag I need
|
|
104
|
|
|
shortDescrPtag = soup.find("p", { "class" : "refpurpose" })
|
|
105
|
|
|
|
|
106
|
|
|
#print('Done finding tag.') DEBUG
|
|
107
|
|
|
try:
|
|
108
|
|
|
# Put the text without html tags in my fancy string
|
|
109
|
|
|
result = 'PHP-manualen: ' + shortDescrPtag.get_text() + ' - ' + url
|
|
110
|
|
|
result = result.replace('\n', '')
|
|
111
|
|
|
result = result.encode('utf-8')
|
|
112
|
|
|
# Cache the result (i.e. save it to the cache txt-file)
|
|
113
|
|
|
saveToCache(result)
|
|
114
|
|
|
except:
|
|
115
|
|
|
result = 'Found nothing.'
|
|
116
|
|
|
|
|
117
|
|
|
# Return the result
|
|
118
|
|
|
return result
|
|
119
|
|
|
|
|
120
|
|
|
# Used for testing
|
|
121
|
|
|
#print(getShortDescr('substr'))
|
|
122
|
|
|
|