From 248223ec040c75489180a9a28d94acbd79b7de10 Mon Sep 17 00:00:00 2001 From: mazze Date: Fri, 2 Feb 2018 19:12:28 +0000 Subject: [PATCH] - Use HTMLParser().unescape() instead of str.replace(). This catches more cases. - "Creator" -> "Author" - Show 14 entries - Code cleanup git-svn-id: https://svn.aros.org/svn/aros/trunk/AROS@55049 fb15a70f-31f2-0310-bbcc-cdcc74a49acc --- scripts/updatecommits | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/scripts/updatecommits b/scripts/updatecommits index 57849dedc0..d2f7fd4e68 100755 --- a/scripts/updatecommits +++ b/scripts/updatecommits @@ -7,6 +7,13 @@ import urllib2 #import ssl # not needed on Sourceforge with Python 2.6 import re +import HTMLParser + +# We must set the encoding or HTMLParser().unescape() fails with error +# "UnicodeDecodeError: 'ascii' codec can't decode byte..." +import sys +reload(sys) +sys.setdefaultencoding('utf8') # regex for parsing a RSS item rssre = re.compile(r'''.*? @@ -18,7 +25,7 @@ rssre = re.compile(r'''.*? ''', re.DOTALL | re.VERBOSE) -count = 10 # number of entries +count = 14 # number of entries rsspath='https://trac.aros.org/trac/timeline?changeset=on&wiki=on&max=%d&authors=&daysback=90&format=rss' % (count) #ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) #rssfile = urllib2.urlopen(rsspath, context=ctx) @@ -31,26 +38,29 @@ targetfile = open(targetpath, 'w') # output of the content in html format -targetfile.write('\n') -targetfile.write('\n') -targetfile.write('\n') -targetfile.write('AROS commits\n') -targetfile.write('\n') -targetfile.write('\n') -targetfile.write('\n') -targetfile.write('\n') -targetfile.write('\n') +targetfile.write(''' + + + +AROS commits + + + + +\n +''') for entry in rssre.finditer(content): - desc = entry.group('description') - desc = desc.replace('<', '<') - desc = desc.replace('>', '>') - - targetfile.write('

%s

\n' % (entry.group('link'), entry.group('title'))) - targetfile.write('Creator: %s; Date: %s
\n' % (entry.group('creator'), entry.group('pubDate'))) - targetfile.write('%s\n' % (desc)) + targetfile.write(''' +

%s

+Author: %s; Date: %s +%s\n''' % ( + entry.group('link'), + HTMLParser.HTMLParser().unescape(entry.group('title')), + HTMLParser.HTMLParser().unescape(entry.group('creator')), + HTMLParser.HTMLParser().unescape(entry.group('pubDate')), + HTMLParser.HTMLParser().unescape(entry.group('description')))) -targetfile.write('\n') -targetfile.write('\n') +targetfile.write('\n\n') targetfile.close() -- 2.11.4.GIT