Script to grab stop codes from allstops.xml and sort
[ottawa-travel-planner.git] / BusStopMashup.py
blob11c25635d95d4e3673de278a4a9d3163571686bb
2 # vi: set softtabstop=4 shiftwidth=4 tabstop=8 expandtab:
4 """Downloads Google Maps bus stop locations using the official mashup."""
6 import urllib
7 import urllib2
8 import sys
9 import re
11 class StopRoute(object):
12 def __init__(self, number, direction):
13 self.number = number # route number
14 self.direction = direction # route direction, seems to be 10 or 11
16 def __repr__(self):
17 return "route(%s,%s)" % (self.number, self.direction)
19 class StopLocation(object):
20 def __init__(self, latitude, longitude):
21 self.latitude = float(latitude)
22 self.longitude = float(longitude)
24 def __repr__(self):
25 return "loc (%s,%s)" % (self.latitude, self.longitude)
27 class BaseStop(object):
28 def __init__(self):
29 self.number = None # 560-1000 number
30 # This is not necessarily unique - there
31 # are multiple stops #3034, for each
32 # platform at Billings Bridge
33 self.code = None # e.g. WD360 - looks to be unique
34 self.name = None # usually an intersection
35 self.location = None
37 self.requestedAddress = None # search address that returned this stop
39 def basestr(self):
40 return "560 %s,%s,%s,%s" % (
41 self.number, repr(self.code), repr(self.name), self.location)
43 def __repr__(self):
44 return "basestop (%s)" % self.basestr()
46 class Stop(BaseStop):
47 """A regular bus stop."""
48 def __init__(self):
49 super(Stop, self).__init__()
50 self.routes = [] # list of StopRoutes at this stop
52 def __repr__(self):
53 return "stop (%s): %s" % (
54 self.basestr(), ", ".join([str(r) for r in self.routes]))
56 class Station(BaseStop):
57 """A Transitway or O-Train station."""
58 def __init__(self):
59 super(Station, self).__init__()
60 # code will be short (3- or 4-letter) station identifier
61 # name will be station name
63 def __repr__(self):
64 return "station (%s)" % self.basestr()
66 class UnknownStopType(BaseStop):
67 def __init__(self, location, html):
68 super(UnknownStopType, self).__init__()
69 self.location = location
70 self.html = html
72 def __repr__(self):
73 return "UNKNOWN (%s, %s)" % (self.location, repr(self.html))
75 class HomeLocation(BaseStop):
76 def __init__(self, location, requestedAddress, respondedAddress):
77 super(HomeLocation, self).__init__()
78 self.location = location
79 self.requestedAddress = requestedAddress
80 self.respondedAddress = respondedAddress
82 def __repr__(self):
83 return "HOME: (asked %s, got %s, %s)" % (
84 repr(self.requestedAddress), repr(self.respondedAddress),
85 self.location)
87 class Marker(object):
88 def __init__(self, location, html):
89 self.location = location
90 self.html = html
92 def __repr__(self):
93 return "marker (%s, %s)" % (self.location, repr(self.html))
95 class InvalidAddressException(Exception):
96 def __init__(self, address):
97 super(InvalidAddressException, self).__init__(address)
99 class Client(object):
100 def findStops(self, address):
101 html = self._grabHTML(address)
102 self._checkForErrors(html, address)
103 for marker in self._findMarkers(html):
104 for (rx, func) in (
105 (_station_rx, self._parseStation),
106 (_stop_rx, self._parseStop),
107 (_home_rx, self._parseHome)):
109 match = rx.search(marker.html)
110 if match is not None:
111 ret = func(address, marker, match)
112 if ret is not None:
113 yield ret
114 break
115 if match is None:
116 yield UnknownStopType(marker.location, marker.html)
118 def _grabHTML(self, address):
119 params = { 'address': address }
120 params.update(FIXED_PARAMS)
121 f = urllib2.urlopen(URL, urllib.urlencode(params))
123 html = ""
124 for line in f:
125 html += line
126 f.close()
128 return html
130 def _checkForErrors(self, html, address):
131 if _error_rx.search(html) is not None:
132 raise InvalidAddressException(address)
134 def _findMarkers(self, html):
135 for m in _marker_rx.finditer(html):
136 yield Marker(StopLocation(m.group("latitude"),
137 m.group("longitude")),
138 m.group("html").decode(REAL_ENCODING))
140 def _fillBaseStop(self, address, marker, match, stop):
141 stop.number = match.group("stopnum")
142 stop.code = match.group("code")
143 stop.name = match.group("name").strip()
144 stop.location = marker.location
145 stop.requestedAddress = address
147 def _parseStation(self, address, marker, match):
148 stop = Station()
149 self._fillBaseStop(address, marker, match, stop)
150 return stop
152 def _parseStop(self, address, marker, match):
153 stop = Stop()
154 self._fillBaseStop(address, marker, match, stop)
156 for m in _stop_route_rx.finditer(marker.html):
157 stop.routes.append(
158 StopRoute(m.group("routenum"), m.group("direction")))
159 return stop
161 def _parseHome(self, address, marker, match):
162 return HomeLocation(marker.location, address, match.group("homeaddr"))
164 URL = "http://www.octranspo.com/maps/busstops/imap.asp"
165 FIXED_PARAMS = { 'page': 'search' }
167 # There's a meta tag that says it's UTF-8, but it turns out it's latin-1.
168 REAL_ENCODING = "latin-1"
170 # Any type of marker
171 # var marker = createMarker(new GPoint(..., ...), "blah blah");
172 # note: gmaps api v1 uses GPoint(long, lat) instead of GLatLng(lat, long)
173 _marker_re = (r'createMarker\(new GPoint\('
174 r'(?P<longitude>[^,]+),\s*(?P<latitude>[^,]+)\),\s*'
175 # ?s: DOTALL: . matches \n
176 # *? is non-greedy
177 r'"(?s)(?P<html>.*?)"\);\r?\n')
178 _marker_rx = re.compile(_marker_re)
181 # Marker for a transitway station
182 # <span><strong><b>613-560-1000 plus <a href='iframe.asp?route=busstop&INFO_PHONE=3034' target='iframe'>3034</a><br><a href='iframe.asp?route=bus_station&INFO_PHONE=3034&station_name=BILLINGS BRIDGE&station_id=BIB' target='iframe'>BILLINGS BRIDGE</b></strong></span>
183 # (?i): Make it case-insensitive
184 # For station name, assume they'll URL-encode it properly someday
185 _station_re = (r'(?i)INFO_PHONE=(?P<stopnum>\w+)'
186 r'.*station_name=(?P<name>[^<>&]+)'
187 r'.*station_id=(?P<code>\w+)')
188 _station_rx = re.compile(_station_re)
190 # Marker for a regular stop
191 # <span><strong><b>613-560-1000 plus <a href='iframe.asp?route=busstop&INFO_PHONE=4897' target='iframe'>4897</a></b></strong><small> (RA040)</small><br>BANK / TRANSITWAY<br><a href='iframe.asp?route=1&dir=10' target='iframe'>1</a> <a href='iframe.asp?route=5&dir=10' target='iframe'>5</a> <a href='iframe.asp?route=111&dir=10' target='iframe'>111</a> <a href='iframe.asp?route=141&dir=11' target='iframe'>141</a> <a href='iframe.asp?route=148&dir=10' target='iframe'>148</a> </span>
192 _stop_re = (r'(?i)INFO_PHONE=(?P<stopnum>\w*)'
193 r'.*?\<small\>\s*\((?P<code>[^)]+)\)'
194 r'.*\<br\>(?P<name>[^<>]+)\<br\>')
195 _stop_rx = re.compile(_stop_re)
197 _stop_route_re = (r'(?i)route=(?P<routenum>\w+)&dir=(?P<direction>\w+)')
198 _stop_route_rx = re.compile(_stop_route_re)
200 # Marker for the current location
201 # <span><strong>HERON RD & DATA CENTRE RD, OTTAWA, ON, CANADA</strong></span>"
202 _home_re = (r'(?i)(?P<homeaddr>[^<>]+), CANADA')
203 _home_rx = re.compile(_home_re)
205 _error_re = (r'G_GEO_UNKNOWN_ADDRESS')
206 _error_rx = re.compile(_error_re)
208 def main(argv=None):
209 if argv is None:
210 argv = sys.argv
212 cmdstr = " ".join(argv[1:])
213 for s in Client().findStops(cmdstr):
214 print s
216 if __name__ == '__main__':
217 sys.exit(main())