From f138845a5a4a135ba83123398d1d65dd9491fb23 Mon Sep 17 00:00:00 2001 From: Pawel Solyga Date: Mon, 18 May 2009 19:37:38 +0200 Subject: [PATCH] Replace usage of feedparser for sanitizing html with new HtmlSanitizer module. HtmlSanitizer prevents from XSS attacks (Fixes issue 606). --- app/soc/logic/cleaning.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/app/soc/logic/cleaning.py b/app/soc/logic/cleaning.py index 9eac748a..78b0877f 100644 --- a/app/soc/logic/cleaning.py +++ b/app/soc/logic/cleaning.py @@ -24,7 +24,7 @@ __authors__ = [ ] -import feedparser +from htmlsanitizer import HtmlSanitizer from google.appengine.api import users @@ -379,16 +379,21 @@ def clean_html_content(field_name): def wrapped(self): """Decorator wrapper method. """ + from HTMLParser import HTMLParseError content = self.cleaned_data.get(field_name) if user_logic.isDeveloper(): return content - - sanitizer = feedparser._HTMLSanitizer('utf-8') - sanitizer.feed(content) - content = sanitizer.output() - content = content.decode('utf-8') + + try: + cleaner = HtmlSanitizer.Cleaner() + cleaner.string = content + cleaner.clean() + except HTMLParseError, msg: + raise forms.ValidationError(msg) + + content = cleaner.string content = content.strip().replace('\r\n', '\n') return content -- 2.11.4.GIT