1 import urllib2
, urllib
, re
2 from HTMLParser
import HTMLParser
7 class TitleParser(HTMLParser
):
9 def handle_starttag(self
, tag
, args
):
12 def handle_endtag(self
, tag
):
16 def handle_data(self
, data
):
20 class FirstGoogleParser(HTMLParser
):
22 def handle_starttag(self
, tag
, args
):
24 if tag
== 'a' and args
.get("class") == "l":
25 self
.first_google
= args
['href']
28 end_of
= {"'": "'", '"': '"', '[': '\\]', '(': ')', '<': '>'}
29 def extract_url(message
):
31 start
= message
.index('http://')
33 end_chars
+= end_of
.get(message
[start
-1], "")
34 url
= re
.split("[%s]" % end_chars
, message
[start
:], 1)[0]
39 whitespace
= re
.compile(r
"\s+")
43 html
= urllib2
.urlopen(url
).read(10240)
46 title
= whitespace
.sub(" ", p
.title
)
49 def first_google(query
):
50 url
= "http://www.google.com/search?q=" + urllib
.quote_plus(query
)
51 p
= FirstGoogleParser()
52 html
= urllib2
.urlopen(url
).read(10240)