Added $calc, dummy for $weather.
[halbot.git] / html.py
bloba5943dd20a6b649f27a17da27c3b0c2cd1b32f7c
1 import urllib2, urllib, re
2 from HTMLParser import HTMLParser
4 MAX_DOWNLOAD = 10240 # KB
6 class Done(Exception):
7 pass
9 class TitleParser(HTMLParser):
10 is_title = False
11 def handle_starttag(self, tag, args):
12 if tag == "title":
13 self.is_title = True
14 def handle_endtag(self, tag):
15 if tag == "title":
16 raise Done
17 title = ""
18 def handle_data(self, data):
19 if self.is_title:
20 self.title += data
22 class FirstGoogleParser(HTMLParser):
23 first_google = ""
24 def handle_starttag(self, tag, args):
25 if tag == 'a':
26 args = dict(args)
27 if args.get("class") == "l":
28 self.first_google = args['href']
29 raise Done
31 class GoogleCalcParser(HTMLParser):
32 result = ""
33 in_calc = False
34 def handle_starttag(self, tag, args):
35 if tag == "font":
36 args = dict(args)
37 if args.get("size") == "+1":
38 self.in_calc = True
39 elif args.get("size") == "-1":
40 raise Done
41 elif tag == "sup":
42 self.result += "^"
43 def handle_data(self, data):
44 if self.in_calc and data != "Web":
45 self.result += data
46 def handle_charref(self, char):
47 if char == "215":
48 self.result += "x"
50 end_of = {"'": "'", '"': '"', '[': '\\]', '(': ')', '<': '>'}
51 def extract_url(message):
52 end_chars = " ,!"
53 start = message.index('http://')
54 if start != 0:
55 end_chars += end_of.get(message[start-1], "")
56 url = re.split("[%s]" % end_chars, message[start:], 1)[0]
57 if url[-1] in '.?':
58 url = url[:-1]
59 return url
61 whitespace = re.compile(r"\s+")
63 def get_title(url):
64 p = TitleParser()
65 html = urllib2.urlopen(url).read(MAX_DOWNLOAD)
66 try: p.feed(html)
67 except Done: pass
68 title = whitespace.sub(" ", p.title)
69 return title.strip()
71 def first_google(query):
72 url = "http://www.google.com/search?q=" + urllib.quote_plus(query)
73 p = FirstGoogleParser()
74 html = urllib2.urlopen(url).read(MAX_DOWNLOAD)
75 try: p.feed(html)
76 except Done: pass
77 return p.first_google
79 def google_calc(query):
80 url = "http://www.google.com/search?q=" + urllib.quote_plus(query)
81 p = GoogleCalcParser()
82 html = urllib2.urlopen(url).read(MAX_DOWNLOAD)
83 try: p.feed(html)
84 except Done: pass
85 return p.result