1 import urllib2
, urllib
, re
2 from HTMLParser
import HTMLParser
4 MAX_DOWNLOAD
= 10240 # KB
9 class TitleParser(HTMLParser
):
11 def handle_starttag(self
, tag
, args
):
14 def handle_endtag(self
, tag
):
18 def handle_data(self
, data
):
22 class FirstGoogleParser(HTMLParser
):
24 def handle_starttag(self
, tag
, args
):
27 if args
.get("class") == "l":
28 self
.first_google
= args
['href']
31 class GoogleCalcParser(HTMLParser
):
34 def handle_starttag(self
, tag
, args
):
37 if args
.get("size") == "+1":
39 elif args
.get("size") == "-1":
43 def handle_data(self
, data
):
44 if self
.in_calc
and data
!= "Web":
46 def handle_charref(self
, char
):
50 end_of
= {"'": "'", '"': '"', '[': '\\]', '(': ')', '<': '>'}
51 def extract_url(message
):
53 start
= message
.index('http://')
55 end_chars
+= end_of
.get(message
[start
-1], "")
56 url
= re
.split("[%s]" % end_chars
, message
[start
:], 1)[0]
61 whitespace
= re
.compile(r
"\s+")
65 html
= urllib2
.urlopen(url
).read(MAX_DOWNLOAD
)
68 title
= whitespace
.sub(" ", p
.title
)
71 def first_google(query
):
72 url
= "http://www.google.com/search?q=" + urllib
.quote_plus(query
)
73 p
= FirstGoogleParser()
74 html
= urllib2
.urlopen(url
).read(MAX_DOWNLOAD
)
79 def google_calc(query
):
80 url
= "http://www.google.com/search?q=" + urllib
.quote_plus(query
)
81 p
= GoogleCalcParser()
82 html
= urllib2
.urlopen(url
).read(MAX_DOWNLOAD
)