Don't try to check S3 URLs
[FeedLint.git] / feedlint
blob49fe74baac36b066c8db765ad64b18046bfd0cd3
1 #!/usr/bin/env python
3 from optparse import OptionParser
4 import sys, shutil, tempfile, urlparse
5 import socket
6 import urllib2, os, httplib
7 import ftplib
8 import logging, time, traceback
9 from logging import info
11 from zeroinstall import SafeException
12 from zeroinstall.support import basedir, tasks
13 from zeroinstall.injector import model, gpg, namespaces, qdom
14 from zeroinstall.injector.config import load_config
16 from display import checking, result, error, highlight, error_new_line
18 config = load_config()
20 now = time.time()
22 version = '0.9'
24 WEEK = 60 * 60 * 24 * 7
26 class Skip:
27 def __init__(self, msg):
28 self.msg = msg
30 def __str__(self):
31 return "Skip (%s)" % self.msg
33 def host(address):
34 if hasattr(address, 'hostname'):
35 return address.hostname
36 else:
37 return address[1].split(':', 1)[0]
39 def port(address):
40 if hasattr(address, 'port'):
41 return address.port
42 else:
43 port = address[1].split(':', 1)[1:]
44 if port:
45 return int(port[0])
46 else:
47 return None
49 assert port(('http', 'foo:81')) == 81
50 assert port(urlparse.urlparse('http://foo:81')) == 81
52 parser = OptionParser(usage="usage: %prog [options] feed.xml")
53 parser.add_option("-d", "--dependencies", help="also check feeds for dependencies", action='store_true')
54 parser.add_option("-o", "--offline", help="only perform offline checks", action='store_true')
55 parser.add_option("-s", "--skip-archives", help="don't check the archives are OK", action='store_true')
56 parser.add_option("-v", "--verbose", help="more verbose output", action='count')
57 parser.add_option("-V", "--version", help="display version information", action='store_true')
59 (options, args) = parser.parse_args()
61 if options.version:
62 print "FeedLint (zero-install) " + version
63 print "Copyright (C) 2007 Thomas Leonard"
64 print "This program comes with ABSOLUTELY NO WARRANTY,"
65 print "to the extent permitted by law."
66 print "You may redistribute copies of this program"
67 print "under the terms of the GNU General Public License."
68 print "For more information about these matters, see the file named COPYING."
69 sys.exit(0)
71 if options.offline:
72 config.network_use = model.network_offline
73 # Catch bugs
74 os.environ['http_proxy'] = 'http://localhost:9999/offline-mode'
76 if options.verbose:
77 logger = logging.getLogger()
78 if options.verbose == 1:
79 logger.setLevel(logging.INFO)
80 else:
81 logger.setLevel(logging.DEBUG)
83 if len(args) < 1:
84 parser.print_help()
85 sys.exit(1)
87 checked = set()
89 def arg_to_uri(arg):
90 app = config.app_mgr.lookup_app(arg, missing_ok = True)
91 if app is not None:
92 return app.get_requirements().interface_uri
93 else:
94 return model.canonical_iface_uri(a)
96 try:
97 to_check = [arg_to_uri(a) for a in args]
98 except SafeException, ex:
99 if options.verbose: raise
100 print >>sys.stderr, ex
101 sys.exit(1)
103 def check_key(feed_url, keyid):
104 for line in os.popen('gpg --with-colons --list-keys %s' % keyid):
105 if line.startswith('pub:'):
106 key_id = line.split(':')[4]
107 break
108 else:
109 raise SafeException('Failed to find key "%s" on your keyring' % keyid)
111 if options.offline: return
113 key_url = urlparse.urljoin(feed_url, '%s.gpg' % key_id)
115 if key_url in checked:
116 info("(already checked key URL %s)", key_url)
117 else:
118 checking("Checking key %s" % key_url)
119 urllib2.urlopen(key_url).read()
120 result('OK')
121 checked.add(key_url)
123 def get_http_size(url, ttl = 3):
124 assert not options.offline
125 address = urlparse.urlparse(url)
127 if url.lower().startswith('http://'):
128 http = httplib.HTTPConnection(host(address), port(address) or 80)
129 elif url.lower().startswith('https://'):
130 http = httplib.HTTPSConnection(host(address), port(address) or 443)
131 else:
132 assert False, url
134 parts = url.split('/', 3)
135 if len(parts) == 4:
136 path = parts[3]
137 else:
138 path = ''
140 if host(address).endswith('.s3.amazonaws.com'):
141 return Skip("S3")
143 http.request('HEAD', '/' + path, headers = {'Host': host(address)})
144 response = http.getresponse()
145 try:
146 if response.status == 200:
147 return response.getheader('Content-Length')
148 elif response.status in (301, 302, 303):
149 new_url_rel = response.getheader('Location') or response.getheader('URI')
150 new_url = urlparse.urljoin(url, new_url_rel)
151 else:
152 raise SafeException("HTTP error: got status code %s" % response.status)
153 finally:
154 response.close()
156 if ttl:
157 result("Moved", 'YELLOW')
158 checking("Checking new URL %s" % new_url)
159 assert new_url
160 return get_http_size(new_url, ttl - 1)
161 else:
162 raise SafeException('Too many redirections.')
164 def get_ftp_size(url):
165 address = urlparse.urlparse(url)
166 ftp = ftplib.FTP(host(address))
167 try:
168 ftp.login()
169 ftp.voidcmd('TYPE I')
170 return ftp.size(url.split('/', 3)[3])
171 finally:
172 ftp.close()
174 def get_size(url, base_url = None):
175 if '://' not in url:
176 # Local path
177 if base_url and base_url.startswith('/'):
178 # Local feed; OK
179 local_path = os.path.join(os.path.dirname(base_url), url)
180 if not os.path.exists(local_path):
181 raise SafeException("Local file '%s' does not exist (should be a URL?)" % url)
182 return os.path.getsize(local_path)
183 if base_url is not None:
184 raise SafeException("Local file reference '%s' in non-local feed '%s'" % (url, base_url))
185 # Fall-through to Unknown scheme error
187 scheme = urlparse.urlparse(url)[0].lower()
188 if scheme.startswith('http') or scheme.startswith('https'):
189 return get_http_size(url)
190 elif scheme.startswith('ftp'):
191 return get_ftp_size(url)
192 else:
193 raise SafeException("Unknown scheme '%s' in '%s'" % (scheme, url))
195 def check_source(feed_url, source):
196 if hasattr(source, 'url'):
197 checking("Checking archive %s" % source.url)
198 actual_size = get_size(source.url, feed_url)
199 if isinstance(actual_size, Skip):
200 result(str(actual_size), 'YELLOW')
201 elif actual_size is None:
202 result("No Content-Length for archive; can't check", 'YELLOW')
203 else:
204 actual_size = int(actual_size)
205 expected_size = source.size + (source.start_offset or 0)
206 if actual_size != expected_size:
207 error('Bad length')
208 raise SafeException("Expected archive to have a size of %d, but server says it is %d" %
209 (expected_size, actual_size))
210 result('OK')
211 elif hasattr(source, 'steps'):
212 for step in source.steps:
213 check_source(feed_url, step)
215 existing_urls = set()
216 def check_exists(url):
217 if url in existing_urls: return # Already checked
218 if options.offline: return
220 checking("Checking URL exists %s" % url)
221 size = get_size(url)
222 if isinstance(size, Skip):
223 result(str(size))
224 else:
225 result('OK')
226 existing_urls.add(url)
228 def scan_implementations(impls, dom):
229 """Add each implementation in dom to impls. Error if duplicate."""
230 for elem in dom.childNodes:
231 if elem.uri != namespaces.XMLNS_IFACE: continue
232 if elem.name == 'implementation':
233 impl_id = elem.attrs['id']
234 if impl_id in impls:
235 raise SafeException("Duplicate ID {id}!".format(id = impl_id))
236 impls[impl_id] = elem
237 elif elem.name == 'group':
238 scan_implementations(impls, elem)
240 n_errors = 0
242 def check_gpg_sig(feed_url, stream):
243 start = stream.read(5)
244 if start == '<?xml':
245 result('Fetched')
246 elif start == '-----':
247 result('Old sig', colour = 'RED')
248 error_new_line(' Feed has an old-style plain GPG signature. Use 0publish --xmlsign.',
249 colour = 'YELLOW')
250 else:
251 result('Fetched')
252 error_new_line(' Unknown format. File starts "%s"' % start)
254 data, sigs = gpg.check_stream(stream)
256 for s in sigs:
257 if isinstance(s, gpg.ValidSig):
258 check_key(feed_url, s.fingerprint)
259 elif isinstance(s, gpg.ErrSig) and s.need_key():
260 # Download missing key
261 key = s.need_key()
262 key_url = urlparse.urljoin(feed_url, '%s.gpg' % key)
263 dl = config.fetcher.download_url(key_url)
264 stream = dl.tempfile
265 tasks.wait_for_blocker(dl.downloaded)
267 stream.seek(0)
268 gpg.import_key(stream)
269 stream.close()
271 check_key(feed_url, key)
272 else:
273 raise SafeException("Can't check sig: %s" % s)
275 return data
277 while to_check:
278 feed = to_check.pop()
279 if feed in checked:
280 info("Already checked feed %s", feed)
281 continue
283 checked.add(feed)
285 checking("Checking " + feed, indent = 0)
286 is_remote = feed.startswith('http://') or feed.startswith('https://')
288 try:
289 if not is_remote:
290 with open(feed) as stream:
291 dom = qdom.parse(stream)
293 if "uri" in dom.attrs:
294 stream.seek(0)
295 try:
296 check_gpg_sig(dom.attrs['uri'], stream)
297 except SafeException, ex:
298 n_errors += 1
299 error_new_line(' %s' % ex)
301 feed_obj = model.ZeroInstallFeed(dom, local_path = feed if "uri" not in dom.attrs else None)
302 result('Local')
303 elif options.offline:
304 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(feed))
305 if not cached:
306 raise SafeException('Not cached (offline-mode)')
307 with open(cached, 'rb') as stream:
308 dom = qdom.parse(stream)
309 feed_obj = model.ZeroInstallFeed(dom)
310 result('Cached')
311 else:
312 tmp = tempfile.TemporaryFile(prefix = 'feedlint-')
313 try:
314 try:
315 stream = urllib2.urlopen(feed)
316 shutil.copyfileobj(stream, tmp)
317 except Exception as ex:
318 raise SafeException('Failed to fetch feed: {ex}'.format(ex = ex))
319 tmp.seek(0)
321 data = check_gpg_sig(feed, tmp)
322 tmp.seek(0)
324 dom = qdom.parse(data)
325 feed_obj = model.ZeroInstallFeed(dom)
327 if feed_obj.url != feed:
328 raise SafeException('Incorrect URL "%s"' % feed_obj.url)
330 finally:
331 tmp.close()
333 # Check for duplicate IDs
334 scan_implementations({}, dom)
336 for f in feed_obj.feeds:
337 info("Will check feed %s", f.uri)
338 to_check.append(f.uri)
340 highest_version = None
341 for impl in sorted(feed_obj.implementations.values()):
342 if hasattr(impl, 'dependencies'):
343 for r in impl.dependencies.values():
344 if r.interface not in checked:
345 info("Will check dependency %s", r)
346 if options.dependencies:
347 to_check.append(r.interface)
348 else:
349 check_exists(r.interface)
350 if hasattr(impl, 'download_sources') and not options.skip_archives:
351 if not options.offline:
352 for source in impl.download_sources:
353 check_source(feed_obj.url, source)
354 if impl.local_path is None:
355 if not impl.digests:
356 raise SafeException("Version {version} has no digests".format(version = impl.get_version()))
357 stability = impl.upstream_stability or model.testing
358 if highest_version is None or impl.version > highest_version.version:
359 highest_version = impl
360 if stability == model.testing:
361 testing_error = None
362 if not impl.released:
363 if not impl.local_path:
364 testing_error = "No release date on testing version"
365 else:
366 try:
367 released = time.strptime(impl.released, '%Y-%m-%d')
368 except ValueError, ex:
369 testing_error = "Can't parse date"
370 else:
371 ago = now - time.mktime(released)
372 if ago < 0:
373 testing_error = 'Release date is in the future!'
374 if testing_error:
375 raise SafeException("Version %s: %s (released %s)" % (impl.get_version(), testing_error, impl.released))
377 # Old Windows versions use 32-bit integers to store versions. Newer versions use 64-bit ones, but in general
378 # keeping the numbers small is helpful.
379 for i in range(0, len(impl.version), 2):
380 for x in impl.version[i]:
381 if x >= (1 << 31):
382 raise SafeException("Version %s: component %s won't fit in a 32-bit signed integer" % (impl.get_version(), x))
384 if highest_version and (highest_version.upstream_stability or model.testing) is model.testing:
385 print highlight(' Highest version (%s) is still "testing"' % highest_version.get_version(), 'YELLOW')
387 for homepage in feed_obj.get_metadata(namespaces.XMLNS_IFACE, 'homepage'):
388 check_exists(homepage.content)
390 for icon in feed_obj.get_metadata(namespaces.XMLNS_IFACE, 'icon'):
391 check_exists(icon.getAttribute('href'))
393 except (urllib2.HTTPError, httplib.BadStatusLine, socket.error, ftplib.error_perm), ex:
394 err_msg = str(ex).strip() or str(type(ex))
395 error_new_line(' ' + err_msg)
396 n_errors += 1
397 if options.verbose: traceback.print_exc()
398 except SafeException, ex:
399 if options.verbose: raise
400 error_new_line(' ' + str(ex))
401 n_errors += 1
403 if n_errors == 0:
404 print "OK"
405 else:
406 print "\nERRORS FOUND:", n_errors
407 sys.exit(1)