From 65b4ea27a5970a0c245246feb6f9173736807593 Mon Sep 17 00:00:00 2001 From: Thomas Perl Date: Tue, 15 Nov 2011 14:12:20 +0100 Subject: [PATCH] Unicode-related fixes Again, it's time for Python 3 to come around soon! --- lib/urlwatch/html2txt.py | 3 +++ urlwatch | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/urlwatch/html2txt.py b/lib/urlwatch/html2txt.py index 6ef19ba..b3010ee 100644 --- a/lib/urlwatch/html2txt.py +++ b/lib/urlwatch/html2txt.py @@ -43,6 +43,9 @@ def html2text(data, method='lynx'): Dependencies: apt-get install lynx html2text """ + if isinstance(data, unicode): + data = data.encode('utf-8') + if method == 're': stripped_tags = re.sub(r'<[^>]*>', '', data) d = '\n'.join((l.rstrip() for l in stripped_tags.splitlines() if l.strip() != '')) diff --git a/urlwatch b/urlwatch index 01b5d22..4220e99 100755 --- a/urlwatch +++ b/urlwatch @@ -265,7 +265,8 @@ if __name__ == '__main__': log.info('%s exists - creating unified diff' % filename) old_data = open(filename).read() - if not isinstance(old_data, unicode): + if (not isinstance(old_data, unicode) and + isinstance(data, unicode)): # Fix for Python 2's unicode/str woes data = data.encode('utf-8') -- 2.11.4.GIT