1 import urllib2
, urllib
, re
2 from HTMLParser
import HTMLParser
4 MAX_DOWNLOAD
= 10240 * 2 # KB
9 class TitleParser(HTMLParser
):
11 def handle_starttag(self
, tag
, args
):
14 def handle_endtag(self
, tag
):
18 def handle_data(self
, data
):
22 class FirstONRParser(HTMLParser
):
24 def handle_starttag(self
, tag
, args
):
27 if args
.get("class") == "searchlink":
28 self
.result
= args
['href']
31 class FirstGoogleParser(HTMLParser
):
33 def handle_starttag(self
, tag
, args
):
36 if args
.get("class") == "l":
37 self
.result
= args
['href']
40 class GoogleCalcParser(HTMLParser
):
43 def handle_starttag(self
, tag
, args
):
46 if args
.get("size") == "+1":
48 elif self
.in_calc
and args
.get("size") == "-1":
52 def handle_data(self
, data
):
53 if self
.in_calc
and data
== "Web":
57 def handle_charref(self
, char
):
61 end_of
= {"'": "'", '"': '"', '[': '\\]', '(': ')', '<': '>'}
62 def extract_url(message
):
64 start
= message
.index('http://')
66 end_chars
+= end_of
.get(message
[start
-1], "")
67 url
= re
.split("[%s]" % end_chars
, message
[start
:], 1)[0]
72 def do_parse(url
, parser
):
73 html
= urllib2
.urlopen(url
).read(MAX_DOWNLOAD
)
80 whitespace
= re
.compile(r
"\s+")
83 raw_title
= do_parse(url
, TitleParser())
84 safe_title
= whitespace
.sub(" ", raw_title
)
85 title
= safe_title
.strip()
88 def first_onr(query
, comic
=None):
89 url
= 'http://ohnorobot.com/index.pl?s=' + urllib
.quote_plus(query
)
91 url
+= "&comic=%d" % comic
92 return do_parse(url
, FirstONRParser())
94 def first_google(query
):
95 url
= "http://www.google.com/search?q=" + urllib
.quote_plus(query
)
96 return do_parse(url
, FirstGoogleParser())
98 def google_calc(query
):
99 url
= "http://www.google.com/search?q=" + urllib
.quote_plus(query
)
100 return do_parse(url
, GoogleCalcParser())