Unicode-related fixes
authorThomas Perl <m@thp.io>
Tue, 15 Nov 2011 13:12:20 +0000 (15 14:12 +0100)
committerThomas Perl <m@thp.io>
Tue, 15 Nov 2011 13:12:20 +0000 (15 14:12 +0100)
Again, it's time for Python 3 to come around soon!

lib/urlwatch/html2txt.py
urlwatch

index 6ef19ba..b3010ee 100644 (file)
@@ -43,6 +43,9 @@ def html2text(data, method='lynx'):
     
     Dependencies: apt-get install lynx html2text
     """
+    if isinstance(data, unicode):
+        data = data.encode('utf-8')
+
     if method == 're':
         stripped_tags = re.sub(r'<[^>]*>', '', data)
         d = '\n'.join((l.rstrip() for l in stripped_tags.splitlines() if l.strip() != ''))
index 01b5d22..4220e99 100755 (executable)
--- a/urlwatch
+++ b/urlwatch
@@ -265,7 +265,8 @@ if __name__ == '__main__':
                 log.info('%s exists - creating unified diff' % filename)
                 old_data = open(filename).read()
 
-                if not isinstance(old_data, unicode):
+                if (not isinstance(old_data, unicode) and
+                        isinstance(data, unicode)):
                     # Fix for Python 2's unicode/str woes
                     data = data.encode('utf-8')