Tweak stop and station regexes to catch more cases
[ottawa-travel-planner.git] / BusStopMashup.py
blobe4efbd2ff9de0e9811219eccca906f9d4ab3c63e
2 # vi: set softtabstop=4 shiftwidth=4 tabstop=8 expandtab:
4 """Downloads Google Maps bus stop locations using the official mashup."""
6 import urllib
7 import urllib2
8 import sys
9 import re
11 class StopRoute(object):
12 def __init__(self, number, direction):
13 self.number = number # route number
14 self.direction = direction # route direction, seems to be 10 or 11
16 def __repr__(self):
17 return "route(%s,%s)" % (self.number, self.direction)
19 class StopLocation(object):
20 def __init__(self, latitude, longitude):
21 self.latitude = float(latitude)
22 self.longitude = float(longitude)
24 def __repr__(self):
25 return "loc (%s,%s)" % (self.latitude, self.longitude)
27 class BaseStop(object):
28 def __init__(self):
29 self.number = None # 560-1000 number
30 # This is not necessarily unique - there
31 # are multiple stops #3034, for each
32 # platform at Billings Bridge
33 self.code = None # e.g. WD360 - looks to be unique
34 self.name = None # usually an intersection
35 self.location = None
37 def basestr(self):
38 return "560 %s,%s,%s,%s" % (self.number, self.code,
39 self.name, self.location)
41 def __repr__(self):
42 return "basestop (%s)" % self.basestr()
44 class Stop(BaseStop):
45 """A regular bus stop."""
46 def __init__(self):
47 super(Stop, self).__init__()
48 self.routes = [] # list of StopRoutes at this stop
50 def __repr__(self):
51 return "stop (%s): %s" % (
52 self.basestr(), ", ".join([str(r) for r in self.routes]))
54 class Station(BaseStop):
55 """A Transitway or O-Train station."""
56 def __init__(self):
57 super(Station, self).__init__()
58 # code will be short (3- or 4-letter) station identifier
59 # name will be station name
61 def __repr__(self):
62 return "station (%s)" % self.basestr()
64 class UnknownStopType(BaseStop):
65 def __init__(self, location, html):
66 self.location = location
67 self.html = html
69 def __repr__(self):
70 return "UNKNOWN (%s, %s)" % (self.location, self.html)
72 class HomeLocation(BaseStop):
73 def __init__(self, location, requestedAddress, respondedAddress):
74 self.location = location
75 self.requestedAddress = requestedAddress
76 self.respondedAddress = respondedAddress
78 def __repr__(self):
79 return "HOME: (asked %s, got %s, %s)" % (
80 self.requestedAddress, self.respondedAddress, self.location)
82 class Marker(object):
83 def __init__(self, location, html):
84 self.location = location
85 self.html = html
87 def __repr__(self):
88 return "marker (%s, %s)" % (self.location, self.html)
90 class InvalidAddressException(Exception):
91 def __init__(self, address):
92 super(InvalidAddressException, self).__init__(address)
94 class Client(object):
95 def findStops(self, address):
96 html = self._grabHTML(address)
97 self._checkForErrors(html, address)
98 for marker in self._findMarkers(html):
99 for (rx, func) in (
100 (_station_rx, self._parseStation),
101 (_stop_rx, self._parseStop),
102 (_home_rx, self._parseHome)):
104 match = rx.search(marker.html)
105 if match is not None:
106 ret = func(address, marker, match)
107 if ret is not None:
108 yield ret
109 break
110 if match is None:
111 yield UnknownStopType(marker.location, marker.html)
113 def _grabHTML(self, address):
114 params = { 'address': address }
115 params.update(FIXED_PARAMS)
116 f = urllib2.urlopen(URL, urllib.urlencode(params))
118 html = ""
119 for line in f:
120 html += line
121 f.close()
123 return html
125 def _checkForErrors(self, html, address):
126 if _error_rx.search(html) is not None:
127 raise InvalidAddressException(address)
129 def _findMarkers(self, html):
130 for m in _marker_rx.finditer(html):
131 yield Marker(StopLocation(m.group("latitude"),
132 m.group("longitude")),
133 m.group("html"))
135 def _fillBaseStop(self, marker, match, stop):
136 stop.number = match.group("stopnum")
137 stop.code = match.group("code")
138 stop.name = match.group("name").strip()
139 stop.location = marker.location
141 def _parseStation(self, address, marker, match):
142 stop = Station()
143 self._fillBaseStop(marker, match, stop)
144 return stop
146 def _parseStop(self, address, marker, match):
147 stop = Stop()
148 self._fillBaseStop(marker, match, stop)
150 for m in _stop_route_rx.finditer(marker.html):
151 stop.routes.append(
152 StopRoute(m.group("routenum"), m.group("direction")))
153 return stop
155 def _parseHome(self, address, marker, match):
156 return HomeLocation(marker.location, address, match.group("homeaddr"))
158 URL = "http://www.octranspo.com/maps/busstops/imap.asp"
159 FIXED_PARAMS = { 'page': 'search' }
161 # Any type of marker
162 # var marker = createMarker(new GPoint(..., ...), "blah blah");
163 # note: gmaps api v1 uses GPoint(long, lat) instead of GLatLng(lat, long)
164 _marker_re = (r'createMarker\(new GPoint\('
165 r'(?P<longitude>[^,]+),\s*(?P<latitude>[^,]+)\),\s*'
166 # ?s: DOTALL: . matches \n
167 # *? is non-greedy
168 r'"(?s)(?P<html>.*?)"\);\r?\n')
169 _marker_rx = re.compile(_marker_re)
172 # Marker for a transitway station
173 # <span><strong><b>613-560-1000 plus <a href='iframe.asp?route=busstop&INFO_PHONE=3034' target='iframe'>3034</a><br><a href='iframe.asp?route=bus_station&INFO_PHONE=3034&station_name=BILLINGS BRIDGE&station_id=BIB' target='iframe'>BILLINGS BRIDGE</b></strong></span>
174 # (?i): Make it case-insensitive
175 # For station name, assume they'll URL-encode it properly someday
176 _station_re = (r'(?i)INFO_PHONE=(?P<stopnum>\w+)'
177 r'.*station_name=(?P<name>[^<>&]+)'
178 r'.*station_id=(?P<code>\w+)')
179 _station_rx = re.compile(_station_re)
181 # Marker for a regular stop
182 # <span><strong><b>613-560-1000 plus <a href='iframe.asp?route=busstop&INFO_PHONE=4897' target='iframe'>4897</a></b></strong><small> (RA040)</small><br>BANK / TRANSITWAY<br><a href='iframe.asp?route=1&dir=10' target='iframe'>1</a> <a href='iframe.asp?route=5&dir=10' target='iframe'>5</a> <a href='iframe.asp?route=111&dir=10' target='iframe'>111</a> <a href='iframe.asp?route=141&dir=11' target='iframe'>141</a> <a href='iframe.asp?route=148&dir=10' target='iframe'>148</a> </span>
183 _stop_re = (r'(?i)INFO_PHONE=(?P<stopnum>\w*)'
184 r'.*?\<small\>\s*\((?P<code>[^)]+)\)'
185 r'.*\<br\>(?P<name>[^<>]+)\<br\>')
186 _stop_rx = re.compile(_stop_re)
188 _stop_route_re = (r'(?i)route=(?P<routenum>\w+)&dir=(?P<direction>\w+)')
189 _stop_route_rx = re.compile(_stop_route_re)
191 # Marker for the current location
192 # <span><strong>HERON RD & DATA CENTRE RD, OTTAWA, ON, CANADA</strong></span>"
193 _home_re = (r'(?i)(?P<homeaddr>[^<>]+), CANADA')
194 _home_rx = re.compile(_home_re)
196 _error_re = (r'G_GEO_UNKNOWN_ADDRESS')
197 _error_rx = re.compile(_error_re)
199 def main(argv=None):
200 if argv is None:
201 argv = sys.argv
203 cmdstr = " ".join(argv[1:])
204 for s in Client().findStops(cmdstr):
205 print s
207 if __name__ == '__main__':
208 sys.exit(main())