Script to grab stop codes from allstops.xml and sort
[ottawa-travel-planner.git] / landmarkLeech.py
blob488793a78ec33861f258691ac7113a3aa00b787c
1 #!/usr/bin/python
2 # vi: set softtabstop=4 shiftwidth=4 tabstop=8 expandtab:
4 """Downloads a list of landmarks from the server."""
6 import re
7 import sys
8 import urllib
10 import Planner
12 class LandmarkLeech(Planner.TravelPlannerClient):
14 def leech(self, fp):
15 html = self._sendRequest("FromLandmarkSetup.oci", None)
16 for match in _category_rx.finditer(html):
17 self.grabCategory(fp, match.group("code"), match.group("desc"))
19 def grabCategory(self, fp, code, desc):
20 # the description is just cosmetic - it gets displayed at the
21 # top of the FromLandmarkType page.
22 params = (("landmarkType", code), ("landmarkDesc", desc))
23 html = self._sendRequest("FromLandmarkType.oci", params)
24 for landmark in _landmark_rx.findall(html):
25 print >> fp, "|".join((code, urllib.unquote(landmark)))
27 _category_re = '(?i)<option\s+value="(?P<code>.*?)">(?P<desc>.*?)</option>'
28 _category_rx = re.compile(_category_re)
30 _landmark_re = ('(?i)<a href="FromLandmarkMultipleMatch.oci\?'
31 'landmarkAddress=(.*)">')
32 _landmark_rx = re.compile(_landmark_re)
34 def main(argv=None):
35 if argv is None:
36 argv = sys.argv
38 l = LandmarkLeech()
39 l.leech(sys.stdout)
41 return 0
43 if __name__ == '__main__':
44 sys.exit(main())