From 499b50b0aa60b3581784e7fb79d23b3714982fc9 Mon Sep 17 00:00:00 2001
From: Thomas Perl <thp@thpinfo.com>
Date: Mon, 16 Aug 2010 22:27:58 +0200
Subject: [PATCH] Allow None as return value for filters

In order to make the filtering mechanism more
robust against user error, urlwatch now accepts
the None value as return value from the filter
function, and interprets it as "don't filter".
---
 examples/hooks.py.example |  4 ++++
 lib/urlwatch/handler.py   | 21 ++++++++++++++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/examples/hooks.py.example b/examples/hooks.py.example
index 0419c42..1ef516f 100644
--- a/examples/hooks.py.example
+++ b/examples/hooks.py.example
@@ -81,5 +81,9 @@ def filter(url, data):
         # "re" (does not need anything, but only strips tags
         # using a regular expression and does no formatting)
         return html2txt.html2text(data, method='lynx')
+
+    # The next line is optional - if the filter function returns
+    # None (or no value at all), the input data will be taken as
+    # the result -> None as return value means "don't filter".
     return data
 
diff --git a/lib/urlwatch/handler.py b/lib/urlwatch/handler.py
index 1748a12..721c990 100755
--- a/lib/urlwatch/handler.py
+++ b/lib/urlwatch/handler.py
@@ -75,6 +75,20 @@ class ShellError(Exception):
     def __str__(self):
         return '%s: Exit status %d' % (self.__class__.__name__, self.result)
 
+
+def use_filter(filter, url, input):
+    """Apply a filter function to input from an URL"""
+    output = filter(url, input)
+
+    if output is None:
+        # If the filter does not return a value, it is
+        # assumed that the input does not need filtering.
+        # In this case, we simply return the input.
+        return input
+
+    return output
+
+
 class ShellJob(JobBase):
     def retrieve(self, timestamp=None, filter=None, headers=None):
         process = subprocess.Popen(self.location, \
@@ -84,7 +98,9 @@ class ShellJob(JobBase):
         result = process.wait()
         if result != 0:
             raise ShellError(result)
-        return filter(self.location, stdout_data)
+
+        return use_filter(filter, self.location, stdout_data)
+
 
 class UrlJob(JobBase):
     CHARSET_RE = re.compile('text/(html|plain); charset=(.*)')
@@ -111,8 +127,7 @@ class UrlJob(JobBase):
             content_unicode = content.decode(encoding, 'ignore')
             content = content_unicode.encode('utf-8')
 
-        data = filter(self.location, content)
-        return data
+        return use_filter(filter, self.location, content)
 
 
 def parse_urls_txt(urls_txt):
-- 
2.11.4.GIT