From ae1b4fc605190fc3847bcfba7f3661a48866eadc Mon Sep 17 00:00:00 2001 From: Thomas Perl Date: Wed, 7 Jan 2009 19:27:39 +0100 Subject: [PATCH] Even more output sanitizing for Lynx html2text mode --- ChangeLog | 3 +++ lib/urlwatch/html2txt.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index d42fa8e..4b309a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -56,3 +56,6 @@ 2009-01-05 Thomas Perl * Fix a problem with relative links in Lynx' "-dump" mode +2009-01-07 Thomas Perl + * Fix another problem with file-relative links in html2text w/ Lynx + diff --git a/lib/urlwatch/html2txt.py b/lib/urlwatch/html2txt.py index 68ce8da..ce35d07 100644 --- a/lib/urlwatch/html2txt.py +++ b/lib/urlwatch/html2txt.py @@ -68,6 +68,8 @@ def html2text(data, method='lynx'): # expose itself as change on the website (it's a Lynx-related thing # Thanks to Evert Meulie for pointing that out stdout = re.sub(r'file://localhost/tmp/[^/]*/', '', stdout) + # Also remove file names like L9816-5928TMP.html + stdout = re.sub(r'L\d+-\d+TMP.html', '', stdout) return stdout -- 2.11.4.GIT