From b9be9dc6b0d90fd50cd092cdf51ba921a8fab1c3 Mon Sep 17 00:00:00 2001 From: Thomas Perl Date: Sat, 20 Aug 2011 12:43:12 +0200 Subject: [PATCH] Support for POST requests MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This feature and the format in urls.txt has been proposed by Sébastien Fricker. --- README | 7 +++++++ examples/urls.txt.example | 7 +++++++ lib/urlwatch/handler.py | 15 +++++++++++---- urlwatch | 2 +- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/README b/README index edb3505..1a01e3e 100644 --- a/README +++ b/README @@ -44,6 +44,13 @@ Q: Is there a way to make the output more human-readable? Q: Is there a way to turn it into a diff of parsed HTML perhaps? A: Of course. See the example hooks.py file -> use html2txt.html2text(data) +Q: Why do I get an error with URLs with spaces in them? +A: Please make sure to URL-encode the URLs properly. Use %20 for spaces. + +Q: The website I want to watch requires a POST request. How do I send one? +A: Add the POST data in the same line, separated by a single space. The format + in urls.txt is: http://example.org/script.cgi value=5&q=search&button=Go + CONTACT ------- diff --git a/examples/urls.txt.example b/examples/urls.txt.example index 209b86e..c9bfe57 100644 --- a/examples/urls.txt.example +++ b/examples/urls.txt.example @@ -19,3 +19,10 @@ http://guckes.net/cal/ # You can use the pipe character to "watch" the output of shell commands |ls -al ~ +# If you want to use spaces in URLs, you have to URL-encode them (e.g. %20) +http://example.org/With%20Spaces/ + +# You can do POST requests by writing the POST data behind the URL, +# separated by a single space character. POST data is URL-encoded. +http://example.com/search.cgi button=Search&q=something&category=4 + diff --git a/lib/urlwatch/handler.py b/lib/urlwatch/handler.py index 686f53b..7ec89f1 100755 --- a/lib/urlwatch/handler.py +++ b/lib/urlwatch/handler.py @@ -62,7 +62,7 @@ class JobBase(object): else: return sha.new(self.location).hexdigest() - def retrieve(self, timestamp=None, filter=None, headers=None): + def retrieve(self, timestamp=None, filter=None, headers=None, log=None): raise Exception('Not implemented') class ShellError(Exception): @@ -90,7 +90,7 @@ def use_filter(filter, url, input): class ShellJob(JobBase): - def retrieve(self, timestamp=None, filter=None, headers=None): + def retrieve(self, timestamp=None, filter=None, headers=None, log=None): process = subprocess.Popen(self.location, \ stdout=subprocess.PIPE, \ shell=True) @@ -105,12 +105,19 @@ class ShellJob(JobBase): class UrlJob(JobBase): CHARSET_RE = re.compile('text/(html|plain); charset=(.*)') - def retrieve(self, timestamp=None, filter=None, headers=None): + def retrieve(self, timestamp=None, filter=None, headers=None, log=None): headers = dict(headers) if timestamp is not None: timestamp = email.Utils.formatdate(timestamp) headers['If-Modified-Since'] = timestamp - request = urllib2.Request(self.location, None, headers) + + if ' ' in self.location: + self.location, post_data = self.location.split(' ', 1) + log.info('Sending POST request to %s', self.location) + else: + post_data = None + + request = urllib2.Request(self.location, post_data, headers) response = urllib2.urlopen(request) headers = response.info() content = response.read() diff --git a/urlwatch b/urlwatch index 04d0de7..b820591 100755 --- a/urlwatch +++ b/urlwatch @@ -229,7 +229,7 @@ if __name__ == '__main__': timestamp = None # Retrieve the data - data = job.retrieve(timestamp, filter, headers) + data = job.retrieve(timestamp, filter, headers, log) if os.path.exists(filename): log.info('%s exists - creating unified diff' % filename) -- 2.11.4.GIT