updated changelog
[urlwatch.git] / urlwatch
blobc5081b466a6a3829a5bce32655b64555fccca3d0
1 #!/usr/bin/python
3 # urlwatch is a minimalistic URL watcher written in Python
5 # Copyright (c) 2008 Thomas Perl <thp|thpinfo.com>
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions
10 # are met:
11 # 1. Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # 2. Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
16 # 3. The name of the author may not be used to endorse or promote products
17 # derived from this software without specific prior written permission.
19 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 """Watch web pages and arbitrary URLs for changes"""
33 pkgname = 'urlwatch'
35 __author__ = 'Thomas Perl <thpinfo.com>'
36 __copyright__ = 'Copyright 2008 Thomas Perl'
37 __license__ = 'BSD'
38 __homepage__ = 'http://thpinfo.com/2008/urlwatch/'
39 __version__ = 1.5
41 user_agent = '%s/%s (+http://thpinfo.com/2008/urlwatch/info.html)' % (pkgname, __version__)
43 # Configuration section
44 display_errors = False
45 line_length = 75
48 # File and folder paths
49 import sys
50 import os.path
52 urlwatch_dir = os.path.expanduser(os.path.join('~', '.'+pkgname))
53 urls_txt = os.path.join(urlwatch_dir, 'urls.txt')
54 cache_dir = os.path.join(urlwatch_dir, 'cache')
55 scripts_dir = os.path.join(urlwatch_dir, 'lib')
56 hooks_py = os.path.join(scripts_dir, 'hooks.py')
58 # Check if we are installed in the system already
59 (prefix, bindir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0])))
61 if bindir == 'bin':
62 # Assume we are installed in system
63 examples_dir = os.path.join(prefix, 'share', pkgname, 'examples')
64 else:
65 # Assume we are not yet installed
66 examples_dir = os.path.join(prefix, bindir, 'examples')
67 sys.path.append(os.path.join(prefix, bindir, 'lib'))
69 urls_txt_example = os.path.join(examples_dir, 'urls.txt.example')
70 hooks_py_example = os.path.join(examples_dir, 'hooks.py.example')
72 # Code section
73 import sha
74 import shutil
75 import os
76 import urllib2
77 import difflib
78 import datetime
80 def foutput(type, url, content=None, summary=None, c='*', n=line_length):
81 """Format output messages
83 Returns a snippet of a specific message type (i.e. 'changed') for
84 a specific URL and an optional (possibly multi-line) content.
86 The parameter "summary" (if specified) should be a list variable
87 that gets one item appended for the summary of the changes.
89 The return value is a list of strings (one item per line).
90 """
91 summary_txt = ': '.join((type.upper(), url))
93 if summary is not None:
94 if content is None:
95 summary.append(summary_txt)
96 else:
97 summary.append('%s (%d bytes)' % (summary_txt, len(content)))
99 result = [c*n, summary_txt]
100 if content is not None:
101 result += [c*n, content]
102 result += [c*n, '', '']
104 return result
107 if __name__ == '__main__':
108 start = datetime.datetime.now()
110 # Created all needed folders
111 for needed_dir in (urlwatch_dir, cache_dir, scripts_dir):
112 if not os.path.isdir(needed_dir):
113 os.makedirs(needed_dir)
115 # Check for required files
116 if not os.path.isfile(urls_txt):
117 urls_txt_fn = os.path.join(os.path.dirname(urls_txt), os.path.basename(urls_txt_example))
118 hooks_py_fn = os.path.join(os.path.dirname(hooks_py), os.path.basename(hooks_py_example))
119 print 'Error: You need to create a urls.txt file first.'
120 print ''
121 print 'Place it in %s' % (urls_txt)
122 print 'An example is available in %s' % (urls_txt_fn)
123 print ''
124 print 'You can also create %s' % (hooks_py)
125 print 'An example is available in %s' % (hooks_py_fn)
126 print ''
127 if os.path.exists(urls_txt_example) and not os.path.exists(urls_txt_fn):
128 shutil.copy(urls_txt_example, urls_txt_fn)
129 if os.path.exists(hooks_py_example) and not os.path.exists(hooks_py_fn):
130 shutil.copy(hooks_py_example, hooks_py_fn)
131 sys.exit(1)
133 headers = {
134 'User-agent': user_agent,
137 summary = []
138 details = []
139 count = 0
141 if os.path.exists(hooks_py):
142 hooks = imp.load_source('hooks', hooks_py)
143 if hasattr(hooks, 'filter'):
144 filter = hooks.filter
145 else:
146 print 'WARNING: %s has no filter function - ignoring' % hooks_py
147 filter = lambda x, y: y
148 else:
149 filter = lambda x, y: y
151 for url in (x for x in open(urls_txt).read().splitlines() if not (x.startswith('#') or x.strip()=='')):
152 filename = os.path.join(cache_dir, sha.new(url).hexdigest())
153 try:
154 request = urllib2.Request(url, None, headers)
155 data = filter(url, urllib2.urlopen(request).read())
156 if os.path.exists(filename):
157 old_data = open(filename).read()
158 diff = ''.join(difflib.unified_diff(old_data.splitlines(1), data.splitlines(1)))
159 if len(diff) > 0:
160 details += foutput('changed', url, diff, summary)
161 else:
162 details += foutput('new', url, None, summary)
163 open(filename, 'w').write(data)
164 except urllib2.HTTPError, error:
165 if display_errors:
166 details += foutput('error', url, error, summary)
167 except urllib2.URLError, error:
168 if display_errors:
169 details += foutput('error', url, error, summary)
170 count += 1
172 end = datetime.datetime.now()
174 # Output everything
175 if len(summary) > 1:
176 print '-'*line_length
177 print 'summary: %d changes' % (len(summary),)
178 print ''
179 for id, line in enumerate(summary):
180 print '%02d. %s' % (id+1, line)
181 print '-'*line_length
182 print '\n\n\n'
183 if len(details) > 1:
184 print '\n'.join(details)
185 print '-- '
186 print '%s %s, %s' % (pkgname, __version__, __copyright__)
187 print 'Website: %s' % (__homepage__,)
188 print 'watched %d URLs in %d seconds\n' % (count, (end-start).seconds)