From 7c1f1cfa9cddf7f84fb0079d511e49bb2f8b3ab2 Mon Sep 17 00:00:00 2001 From: Thomas Perl Date: Fri, 14 Nov 2008 12:44:51 +0100 Subject: [PATCH] urlwatch 1.5 (ical2txt, utidylib, documentation) --- ChangeLog | 7 +++++++ README | 9 +++++++-- hooks.py | 13 ++++++++++++- ical2txt.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ makefile | 2 +- urls.txt | 1 + 6 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 ical2txt.py diff --git a/ChangeLog b/ChangeLog index 6edd3c4..0793fd4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -24,3 +24,10 @@ 2008-05-16 Thomas Perl * Release version 1.3 +2008-11-14 Thomas Perl + + Add example for using HTML Tidy (needs python-utidylib) + + Add example for using the ical2txt module (needs python-vobject) + + Add ical2txt.py module for converting ics to plaintext + * More comments in hooks.py for better user documentation + * Release version 1.4 + diff --git a/README b/README index 1d574f2..294326f 100644 --- a/README +++ b/README @@ -30,10 +30,15 @@ A: Simply add watch.py to your crontab. Make sure the stdout of your cronjobs is mailed to you, so you get the notifications. +Q: Is there an easy way to show changes of .ics files? +A: Indeed there is. See hooks.py and ical2txt.py + +Q: What about badly-formed HTML (long lines, etc..)? +A: Use python-utidylib. See hooks.py for an example ~~ cONTACT ~~ Website: http://thpinfo.com/2008/urlwatch -Contact: thp [thpinfo.com] -Jabber: thp [jabber.org] +Contact: thp [thpinfo.com/about] +Jabber: thp [at jabber.org] diff --git a/hooks.py b/hooks.py index d912f9a..707e17c 100644 --- a/hooks.py +++ b/hooks.py @@ -4,7 +4,6 @@ # the part that you want to filter, so the noise is removed. import re -import tidy def filter(url, data): if url == 'http://www.inso.tuwien.ac.at/lectures/usability/': @@ -16,14 +15,26 @@ def filter(url, data): elif url == 'http://www.mv-eberau.at/terminliste.php': return data.replace('
', '\n') elif 'iuner.lukas-krispel.at' in url: + # Remove always-changing entries from FTP server listing return re.sub('drwx.*usage', '', re.sub('drwx.*logs', '', data)) elif url.startswith('http://ti.tuwien.ac.at/rts/teaching/courses/'): + # example of using the "tidy" module for cleaning up bad HTML + import tidy mlr = re.compile('magicCalendarHeader.*magicCalendarBottom', re.S) data = str(tidy.parseString(data, output_xhtml=1, indent=0, tidy_mark=0)) return re.sub(mlr, '', data) elif url == 'http://www.poleros.at/calender.htm': + # remove style changes, because we only want to see content changes return re.sub('style="[^"]"', '', data) elif url == 'http://www.ads.tuwien.ac.at/teaching/LVA/186170.html': return re.sub('Saved in parser cache with key .* and timestamp .* --', '', re.sub('Served by aragon in .* secs\.', '', re.sub('This page has been accessed .* times\.', '', data))) + elif url.endswith('.ics') or url == 'http://www.kukuk.at/ical/events': + # example of generating a summary for icalendar files + # the ical2txt.py module is included with urlwatch + import ical2txt + # append "data" to the converted ical data, so you get + # all minor changes to the ICS that are not included + # in the ical2text summary (remove this if you want) + return ical2txt.ical2text(data).encode('utf-8') + '\n\n' + data return data diff --git a/ical2txt.py b/ical2txt.py new file mode 100644 index 0000000..4bc1e89 --- /dev/null +++ b/ical2txt.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# Convert iCalendar data to plaintext (very basic, don't rely on it :) +# Requirements: python-vobject (http://vobject.skyhouseconsulting.com/) +# Thomas Perl ; Fri, 14 Nov 2008 12:26:42 +0100 +# Website: http://thpinfo.com/2008/urlwatch/ + +def ical2text(ical_string): + import vobject + result = [] + if isinstance(ical_string, unicode): + parsedCal = vobject.readOne(ical_string) + else: + try: + parsedCal = vobject.readOne(ical_string) + except: + parsedCal = vobject.readOne(ical_string.decode('utf-8', 'ignore')) + + for event in parsedCal.getChildren(): + if event.name == 'VEVENT': + if hasattr(event, 'dtstart'): + start = event.dtstart.value.strftime('%F %H:%M') + else: + start = 'unknown start date' + + if hasattr(event, 'dtend'): + end = event.dtend.value.strftime('%F %H:%M') + else: + end = start + + if start == end: + date_str = start + else: + date_str = '%s -- %s' % (start, end) + + result.append('%s: %s' % (date_str, event.summary.value)) + + return '\n'.join(result) + +if __name__ == '__main__': + import sys + + if len(sys.argv) == 2: + print ical2text(open(sys.argv[1]).read()) + else: + print 'Usage: %s icalendarfile.ics' % (sys.argv[0]) + sys.exit(1) + diff --git a/makefile b/makefile index 99bc6ba..1c2c122 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,7 @@ # makefile for urlwatch PACKAGE=urlwatch -VERSION=1.3 +VERSION=1.4 FILES=*.txt README *.py makefile ChangeLog all: diff --git a/urls.txt b/urls.txt index e018815..e82f16d 100644 --- a/urls.txt +++ b/urls.txt @@ -22,4 +22,5 @@ http://www.bar-akira.com/events.php http://www.ads.tuwien.ac.at/teaching/LVA/186170.html http://www.complang.tuwien.ac.at/anton/lvas/effiziente-programme.html http://www.complang.tuwien.ac.at/anton/lvas/effizienz-aufgabe08/ +http://www.kukuk.at/ical/events -- 2.11.4.GIT