From 158acb2979b91ffc534764a194eb2da1318a3ad7 Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Fri, 3 Apr 2015 17:50:26 +1300 Subject: [PATCH] Ignore RSS post titles unless there are blacklisted titles or authors --- planetfilter | 79 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/planetfilter b/planetfilter index 47090ae..6433fdd 100755 --- a/planetfilter +++ b/planetfilter @@ -102,22 +102,25 @@ def filter_rss2(xmldocument, blacklist): for item in items: deleted = False titles = item.getElementsByTagName('title') - for title in titles: - textnode = title.firstChild - if textnode and Node.TEXT_NODE == textnode.nodeType: - titlestring = textnode.nodeValue - if blacklist['authors']: - for author in blacklist['authors']: - if 0 == titlestring.find(author): - delete_node(item) - deleted = True - break - if not deleted and blacklist['titles']: - for title in blacklist['titles']: - if titlestring.find(title) > -1: - delete_node(item) - deleted = True - break + if blacklist['authors'] or blacklist['titles']: + for title in titles: + textnode = title.firstChild + if textnode and Node.TEXT_NODE == textnode.nodeType: + titlestring = textnode.nodeValue + if blacklist['authors']: + for author in blacklist['authors']: + if 0 == titlestring.find(author): + delete_node(item) + deleted = True + break + if not deleted and blacklist['titles']: + for title in blacklist['titles']: + if titlestring.find(title) > -1: + delete_node(item) + deleted = True + break + if deleted: + break if not deleted and blacklist['urls']: links = item.getElementsByTagName('link') @@ -153,8 +156,9 @@ def filter_atom(xmldocument, blacklist): delete_node(entry) deleted = True break - if deleted: - break + if deleted: + break + if not deleted and blacklist['titles']: titles = entry.getElementsByTagName('title') for title in titles: @@ -166,8 +170,8 @@ def filter_atom(xmldocument, blacklist): delete_node(entry) deleted = True break - if deleted: - break + if deleted: + break if not deleted and blacklist['urls']: links = entry.getElementsByTagName('link') @@ -193,22 +197,25 @@ def filter_rss1(xmldocument, blacklist): for item in items: deleted = False titles = item.getElementsByTagName('title') - for title in titles: - textnode = title.firstChild - if textnode and Node.TEXT_NODE == textnode.nodeType: - titlestring = textnode.nodeValue - if blacklist['authors']: - for author in blacklist['authors']: - if 0 == titlestring.find(author): - delete_rss1_item(item) - deleted = True - break - if not deleted and blacklist['titles']: - for title in blacklist['titles']: - if titlestring.find(title) > -1: - delete_rss1_item(item) - deleted = True - break + if blacklist['authors'] or blacklist['titles']: + for title in titles: + textnode = title.firstChild + if textnode and Node.TEXT_NODE == textnode.nodeType: + titlestring = textnode.nodeValue + if blacklist['authors']: + for author in blacklist['authors']: + if 0 == titlestring.find(author): + delete_rss1_item(item) + deleted = True + break + if not deleted and blacklist['titles']: + for title in blacklist['titles']: + if titlestring.find(title) > -1: + delete_rss1_item(item) + deleted = True + break + if deleted: + break if not deleted and blacklist['urls']: links = item.getElementsByTagName('link') -- 2.11.4.GIT