From 499b50b0aa60b3581784e7fb79d23b3714982fc9 Mon Sep 17 00:00:00 2001 From: Thomas Perl Date: Mon, 16 Aug 2010 22:27:58 +0200 Subject: [PATCH] Allow None as return value for filters In order to make the filtering mechanism more robust against user error, urlwatch now accepts the None value as return value from the filter function, and interprets it as "don't filter". --- examples/hooks.py.example | 4 ++++ lib/urlwatch/handler.py | 21 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/examples/hooks.py.example b/examples/hooks.py.example index 0419c42..1ef516f 100644 --- a/examples/hooks.py.example +++ b/examples/hooks.py.example @@ -81,5 +81,9 @@ def filter(url, data): # "re" (does not need anything, but only strips tags # using a regular expression and does no formatting) return html2txt.html2text(data, method='lynx') + + # The next line is optional - if the filter function returns + # None (or no value at all), the input data will be taken as + # the result -> None as return value means "don't filter". return data diff --git a/lib/urlwatch/handler.py b/lib/urlwatch/handler.py index 1748a12..721c990 100755 --- a/lib/urlwatch/handler.py +++ b/lib/urlwatch/handler.py @@ -75,6 +75,20 @@ class ShellError(Exception): def __str__(self): return '%s: Exit status %d' % (self.__class__.__name__, self.result) + +def use_filter(filter, url, input): + """Apply a filter function to input from an URL""" + output = filter(url, input) + + if output is None: + # If the filter does not return a value, it is + # assumed that the input does not need filtering. + # In this case, we simply return the input. + return input + + return output + + class ShellJob(JobBase): def retrieve(self, timestamp=None, filter=None, headers=None): process = subprocess.Popen(self.location, \ @@ -84,7 +98,9 @@ class ShellJob(JobBase): result = process.wait() if result != 0: raise ShellError(result) - return filter(self.location, stdout_data) + + return use_filter(filter, self.location, stdout_data) + class UrlJob(JobBase): CHARSET_RE = re.compile('text/(html|plain); charset=(.*)') @@ -111,8 +127,7 @@ class UrlJob(JobBase): content_unicode = content.decode(encoding, 'ignore') content = content_unicode.encode('utf-8') - data = filter(self.location, content) - return data + return use_filter(filter, self.location, content) def parse_urls_txt(urls_txt): -- 2.11.4.GIT