From: Thomas Perl Date: Tue, 15 Nov 2011 13:12:20 +0000 (+0100) Subject: Unicode-related fixes X-Git-Tag: 1.14~1 X-Git-Url: https://repo.or.cz/w/urlwatch.git/commitdiff_plain/65b4ea27a5970a0c245246feb6f9173736807593 Unicode-related fixes Again, it's time for Python 3 to come around soon! --- diff --git a/lib/urlwatch/html2txt.py b/lib/urlwatch/html2txt.py index 6ef19ba..b3010ee 100644 --- a/lib/urlwatch/html2txt.py +++ b/lib/urlwatch/html2txt.py @@ -43,6 +43,9 @@ def html2text(data, method='lynx'): Dependencies: apt-get install lynx html2text """ + if isinstance(data, unicode): + data = data.encode('utf-8') + if method == 're': stripped_tags = re.sub(r'<[^>]*>', '', data) d = '\n'.join((l.rstrip() for l in stripped_tags.splitlines() if l.strip() != '')) diff --git a/urlwatch b/urlwatch index 01b5d22..4220e99 100755 --- a/urlwatch +++ b/urlwatch @@ -265,7 +265,8 @@ if __name__ == '__main__': log.info('%s exists - creating unified diff' % filename) old_data = open(filename).read() - if not isinstance(old_data, unicode): + if (not isinstance(old_data, unicode) and + isinstance(data, unicode)): # Fix for Python 2's unicode/str woes data = data.encode('utf-8')