Planner.py

   1 #
   2 # vi: set softtabstop=4 shiftwidth=4 tabstop=8 expandtab:
   3
   4 """A frontend for the OC Transpo Travel Planner."""
   5
   6 import cookielib
   7 import urllib2
   8 import urllib
   9 import re
  10
  11 import Itinerary
  12 from PlannerExceptions import *
  13
  14 def plan(start, end, time):
  15     """Plans a route between two Locations at a certain PlanTime."""
  16     planner = TravelPlannerClient()
  17     planner.feedStartLocation(start)
  18     planner.feedEndLocation(end)
  19     html = planner.feedTime(time)
  20     return Itinerary.Itinerary(start, end, time, html)
  21
  22 class TravelPlannerClient:
  23     def __init__(self):
  24         # Set up a cookie-aware client.
  25         self.cj = cookielib.CookieJar()
  26         self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
  27
  28         # Initialize the session.
  29         r = self._sendRequest(self.START_PAGE, None)
  30
  31     def feedStartLocation(self, loc):
  32         self._sendRequest(loc.getPage(True), loc.toPlannerParams(True))
  33
  34     def feedEndLocation(self, loc):
  35         self._sendRequest(loc.getPage(False), loc.toPlannerParams(False))
  36
  37     def feedTime(self, time):
  38         # name="tp_time" action="SelectTime.oci"
  39         params = time.toPlannerParams()
  40         r = self._sendRequest("SelectTime.oci", params)
  41
  42         return r
  43
  44     def _sendRequest(self, page, params):
  45         url = self.URL_BASE + page
  46         if params is not None:
  47             url += "?" + urllib.urlencode(params)
  48
  49         response = self.opener.open(url)
  50         self._checkObviousBadness(response)
  51
  52         html = self._grabLimitedResponse(response)
  53         self._scanForError(html)
  54         return html
  55
  56     def _checkObviousBadness(self, response):
  57         if response.code != 200:
  58             raise TravelPlannerException("Got HTTP " + response.code
  59                                          + " error from server")
  60         if "errorPage.oci" in response.geturl():
  61             # try obtaining a specific error string
  62             html = self._grabLimitedResponse(response)
  63             self._scanForError(html)
  64
  65             # otherwise, throw a generic one
  66             raise TravelPlannerException("Redirected to error page")
  67
  68     def _grabLimitedResponse(self, response):
  69         count = 0
  70
  71         accum = ""
  72         for line in response:
  73             count += len(line)
  74             if (count <= self.RESPONSE_SIZE_LIMIT):
  75                 accum += line
  76             else:
  77                 break
  78         return accum
  79
  80
  81
  82     # Regular expressions: probably the worst way to parse HTML
  83     # Probably the best thing to put in your breakfast cereal.
  84
  85     def _scanForError(self, text):
  86         match = _error_rx.search(text)
  87         if match:
  88             raise TravelPlannerException(match.group("msg"))
  89
  90     URL_BASE = "http://www.octranspo.com/tps/jnot/"
  91     START_PAGE = "startEN.oci"
  92     RESPONSE_SIZE_LIMIT = 100000
  93
  94
  95
  96 # Scan for an error.
  97 # <table cellpadding="0" cellspacing="0" summary="Warning message" class="warning" width="85%">
  98 #   <tr>
  99 #       <td><img src="tripPlanning/images/imgWarning.gif"></td>
 100 #       <td>The address you specified was not found.  Please enter another.</td>
 101 #   </tr>
 102 # </table>
 103 _error_re = ('<table[^>]*class="(?:warning|error)[^>]*>\s*'
 104                 '<tr>(?:\s*<td>\s*<img[^>]*>\s*</td>)?'
 105                     # ?s: DOTALL: . matches \n
 106                     # *? is non-greedy
 107                     '\s*<td>(?s)\s*(?P<msg>[\d\D]*?)\s*</td>')
 108 _error_rx = re.compile(_error_re)