only change the version in one location (watch.py)
[urlwatch.git] / watch.py
blob454606dedc0dd90925ca8a64f5185625c1688272
1 #!/usr/bin/python
2 # Minimalistic Python URL watcher
3 # 2008-03-04 Thomas Perl <thpinfo.com>
4 # http://thpinfo.com/2008/urlwatch
6 # 1. Create an "urls.txt" file and add one URL per
7 # line that you want to watch.
8 # 2. Add watch.py as a cronjob or run it manually.
9 # 3. If something changed, you'll get a diff output
10 # to stdout. If nothing changed, no output.
11 # 4. If you want to filter the web pages, because
12 # there is some dynamic content that _always_
13 # changes, create a "hooks.py" file that has a
14 # filter(url, data) -> filtered_data function
16 __version__ = 1.4
18 # Configuration section
19 display_errors = False
20 user_agent = 'urlwatch/%s (+http://thpinfo.com/2008/urlwatch/info.html)' % __version__
22 # Code section
24 import sha
25 import sys
26 import os.path
27 import urllib2
28 import difflib
30 if __name__ == '__main__':
31 os.chdir(os.path.dirname(os.path.abspath(sys.argv[0])))
33 headers = {
34 'User-agent': user_agent,
37 if os.path.exists('hooks.py'):
38 from hooks import filter
39 else:
40 filter = lambda x, y: y
42 for url in (x for x in open('urls.txt').read().splitlines() if not (x.startswith('#') or x.strip()=='')):
43 filename = sha.new(url).hexdigest()
44 try:
45 request = urllib2.Request(url, None, headers)
46 data = filter(url, urllib2.urlopen(request).read())
47 if os.path.exists(filename):
48 old_data = open(filename).read()
49 diff = ''.join(difflib.unified_diff(old_data.splitlines(1), data.splitlines(1)))
50 if len(diff) > 0:
51 print '%s\nCHANGED: %s\n%s\n%s\n%s\n\n' % ('*'*60, url, '*'*60, diff, '*'*60)
52 else:
53 print '%s\nNEW: %s\n%s\n\n' % ('*'*60, url, '*'*60)
54 open(filename, 'w').write(data)
55 except urllib2.HTTPError, error:
56 if display_errors:
57 print '%s\nERROR: %s\n%s\n%s\n%s\n\n' % ('*'*60, url, '*'*60, error, '*'*60)
58 except urllib2.URLError, error:
59 if display_errors:
60 print '%s\nERROR: %s\n%s\n%s\n%s\n\n' % ('*'*60, url, '*'*60, error, '*'*60)