Cope with feeds we've never seen before
[0mirror.git] / 0mirror
blob85c541222ccca188ac478cb12eb0df50919b4261
1 #!/usr/bin/env python
2 # Copyright (C) 2007, Thomas Leonard
3 # See the COPYING file for details, or visit http://0install.net.
5 from optparse import OptionParser
6 import os, sys, time, shutil, subprocess, signal
7 from ConfigParser import RawConfigParser
8 from logging import info, debug, warn
9 from xml.dom import minidom
11 signal.alarm(10 * 60) # Abort after 10 minutes
13 from zeroinstall import SafeException
14 from zeroinstall.injector.iface_cache import iface_cache
15 from zeroinstall.injector import model, namespaces, policy, handler, gpg, handler, qdom, distro
16 from zeroinstall.support import basedir, tasks
18 from atom import AtomFeed, set_element
19 from stats import Stats
20 from support import format_date, get_feed_dir, ensure_dirs
22 # Site configuration!
24 site_config_file = os.path.abspath('0mirror.ini')
25 FEED_TIMEOUT = 60 # Seconds to wait before giving up on a feed download
27 version = '0.1'
29 parser = OptionParser(usage="usage: %prog [options] PUBLIC-DIR")
30 parser.add_option("-v", "--verbose", help="more verbose output", action='count')
31 parser.add_option("-V", "--version", help="display version information", action='store_true')
33 (options, args) = parser.parse_args()
35 if options.version:
36 print "0mirror (zero-install) " + version
37 print "Copyright (C) 2010 Thomas Leonard"
38 print "This program comes with ABSOLUTELY NO WARRANTY,"
39 print "to the extent permitted by law."
40 print "You may redistribute copies of this program"
41 print "under the terms of the GNU General Public License."
42 print "For more information about these matters, see the file named COPYING."
43 sys.exit(0)
45 if options.verbose:
46 import logging
47 logger = logging.getLogger()
48 if options.verbose == 1:
49 logger.setLevel(logging.INFO)
50 else:
51 logger.setLevel(logging.DEBUG)
53 if len(args) != 1:
54 parser.print_help()
55 sys.exit(1)
57 if not os.path.exists(site_config_file):
58 print >>sys.stderr, "Configuration file '%s' not found!" % site_config_file
59 sys.exit(1)
60 print "Reading configuration from", site_config_file
62 site_config = RawConfigParser()
63 site_config.read(site_config_file)
65 site_address = site_config.get('site', 'address') # e.g. "http://localhost/0mirror"
66 if not site_address.endswith('/'):
67 site_address += '/'
69 # Where we try if the primary site fails
70 my_mirror = site_config.get('fetching', 'upstream_mirror') or None
72 n_feeds_to_update = int(site_config.get('fetching', 'n_feeds_to_update'))
74 public_dir = args[0]
76 feed_file = os.path.join(public_dir, 'feed-list')
77 ignore_file = os.path.join(public_dir, 'ignore-list')
79 summary_xml = """
80 <summary type='xhtml'>
81 <div xmlns="http://www.w3.org/1999/xhtml">
82 <a href=""/> - <span/>
83 </div>
84 </summary>
85 """
87 unconfirmed_keys_xml = """
88 <summary type='xhtml'>
89 <div xmlns="http://www.w3.org/1999/xhtml">
90 New keys awaiting confirmation:
91 <dl>
92 </dl>
93 </div>
94 </summary>
95 """
97 warnings_xml = """
98 <summary type='xhtml'>
99 <div xmlns="http://www.w3.org/1999/xhtml">
100 <ul>
101 </ul>
102 </div>
103 </summary>
106 unconfirmed_keys = [] # List of PendingFeeds
107 class NonInteractiveHandler(handler.Handler):
108 def confirm_import_feed(self, pending, valid_sigs):
109 for x in valid_sigs:
110 warn("Need to check key %s for %s", x.fingerprint, pending.url)
111 unconfirmed_keys.append(pending)
112 return None
114 @tasks.async
115 def wait_with_timeout(delay, blocker):
116 timeout = tasks.TimeoutBlocker(FEED_TIMEOUT, 'Mirror timeout')
117 yield timeout, blocker
118 tasks.check(timeout, blocker)
119 if not blocker.happened:
120 raise Exception("Timeout (waited %d seconds)" % delay)
122 warnings = []
123 def add_warning(msg):
124 warn(msg)
125 warnings.append(msg)
127 key_dir = os.path.join(public_dir, 'keys')
128 ensure_dirs(key_dir)
129 keys = set()
130 def ensure_key(fingerprint):
131 if fingerprint in keys:
132 return
133 key_path = os.path.join(key_dir, fingerprint[-16:] + '.gpg')
134 child = subprocess.Popen(['gpg', '-a', '--export', fingerprint], stdout = subprocess.PIPE)
135 keydata, unused = child.communicate()
136 stream = file(key_path, 'w')
137 stream.write(keydata)
138 stream.close()
139 print "Exported key", fingerprint
140 keys.add(fingerprint)
142 feeds = []
144 now = format_date(time.time())
145 news_feed = AtomFeed(title = "Zero Install News Feed",
146 link = site_address + "/news-feed.xml",
147 updated = now,
148 author = "0mirror")
150 class DummyFeed:
151 last_checked = 0
152 last_modified = 0
153 def __init__(self, url):
154 self.url = url
156 def load_feed(url):
157 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(url))
158 if cached:
159 with open(cached) as stream:
160 root = qdom.parse(stream)
161 feed = model.ZeroInstallFeed(root, None)
162 feed.last_modified = int(os.stat(cached).st_mtime)
164 # We don't currently have a clean way to get the last checked time using
165 # the library...
166 user = basedir.load_first_config(namespaces.config_site, 'injector',
167 'user_overrides', model.escape(url))
168 if user:
169 root = qdom.parse(file(user))
170 last_checked = root.getAttribute('last-checked')
171 if last_checked:
172 feed.last_checked = int(last_checked)
174 return feed
175 return DummyFeed(url)
177 def load_feeds(feed_uris):
178 feeds = {}
180 for feed_url in feed_uris:
181 feeds[feed_url] = load_feed(feed_url)
182 return feeds
184 feed = None
185 try:
186 stats = Stats()
187 if not os.path.isdir(public_dir):
188 raise SafeException("Public directory '%s' does not exist. "
189 "To setup a new site, create it as an empty directory now." % public_dir)
190 if not os.path.isfile(feed_file):
191 raise SafeException("File '%s' does not exist. It should contain a list of feed URLs, one per line" % feed_file)
192 print "Reading", feed_file
194 lines = filter(None, file(feed_file).read().split('\n'))
195 feed_uris = [line for line in lines if not line.startswith('-')]
196 feed_set = set(feed_uris)
197 ignore_set = set(filter(None, file(ignore_file).read().split('\n')))
198 inactive_set = set(line[1:] for line in lines if line.startswith('-'))
200 known_set = feed_set | inactive_set
202 stale_feeds = [] # [(last-checked, feed)]
204 handler = NonInteractiveHandler()
205 p = policy.Policy("http://localhost/dummy", handler)
206 p.fetcher.feed_mirror = my_mirror
208 feeds = load_feeds(feed_uris)
210 def last_checked(feed):
211 last_check_attempt = iface_cache.get_last_check_attempt(feed.url)
212 if not last_check_attempt:
213 return feed.last_checked
214 return max(feed.last_checked or 0, last_check_attempt)
216 # List all the feeds, starting with the most stale
217 stale_feeds = [(last_checked(feed), feed) for feed in feeds.values()]
218 stale_feeds.sort()
220 # If we've got some completely new feeds, update all of them now
221 while n_feeds_to_update < len(stale_feeds) and stale_feeds[n_feeds_to_update - 1][0] in (0, None):
222 n_feeds_to_update += 1
224 # Update the first few feeds in the list
225 stale_feeds = stale_feeds[:n_feeds_to_update]
226 for last_check, feed in stale_feeds:
227 if last_check > 0:
228 ctime_str = time.strftime('%Y-%m-%d_%H:%M', time.gmtime(last_check))
229 print "Feed %s last checked %s; updating..." % (feed.url, ctime_str)
230 else:
231 print "Feed %s is new; fetching..." % feed.url
233 iface_cache.mark_as_checking(feed.url)
234 blocker = p.fetcher.download_and_import_feed(feed.url, iface_cache)
235 try:
236 handler.wait_for_blocker(wait_with_timeout(FEED_TIMEOUT, blocker))
237 except Exception, ex:
238 add_warning("Error fetching '%s': %s" % (feed.url, ex))
239 continue
240 # Reload
241 feeds[feed.url] = load_feed(feed.url)
243 for feed_url in feed_uris:
244 info("Processing feed '%s'", feed_url)
245 feed = feeds[feed_url]
246 if isinstance(feed, DummyFeed):
247 # Error during download?
248 add_warning("Attempted to fetch '%s', but still not cached" % feed_url)
249 continue
251 feed_dir = os.path.join(public_dir, get_feed_dir(feed_url))
252 ensure_dirs(feed_dir)
254 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(feed.url))
255 assert cached is not None
257 for subfeed in feed.feeds:
258 if subfeed.uri not in known_set:
259 if subfeed.uri.startswith('/'):
260 continue
261 if subfeed.uri not in ignore_set:
262 add_warning("WARNING: Subfeed %s of %s not in feeds list" % (subfeed.uri, feed.get_name()))
264 # Check dependencies
265 for impl in feed.implementations.values():
266 for dep in impl.requires:
267 if dep.interface not in known_set:
268 add_warning("Version %s of %s depends on %s, but that isn't being mirrored!" % (impl.get_version(), feed.url, dep.interface))
269 break
270 else:
271 continue
272 break # Once we've warned about one version, don't check any other versions
274 style = os.path.join(feed_dir, 'interface.xsl')
275 if not os.path.islink(style):
276 os.symlink('../../../../feed_style.xsl', style)
278 latest = os.path.join(feed_dir, 'latest.xml')
280 last_modified = int(os.stat(cached).st_mtime)
281 version_name = time.strftime('%Y-%m-%d_%H:%M.xml', time.gmtime(last_modified))
282 version_path = os.path.join(feed_dir, version_name)
284 if os.path.islink(latest) and os.readlink(latest) == version_name:
285 if os.path.exists(version_path):
286 continue
287 warn("Broken symlink '%s'!", latest)
289 # Get the keys
290 stream = file(cached)
291 unused, sigs = gpg.check_stream(stream)
292 stream.close()
294 for x in sigs:
295 if isinstance(x, gpg.ValidSig):
296 ensure_key(x.fingerprint)
297 else:
298 add_warning("Signature problem: %s" % x)
300 shutil.copyfile(cached, version_path)
301 latest_new = latest + '.new'
302 if os.path.exists(latest_new):
303 os.unlink(latest_new)
304 os.symlink(version_name, latest_new)
305 os.rename(latest_new, latest)
306 print "Updated %s to %s" % (feed, version_name)
308 feed = None
310 for feed_url in known_set:
311 feed = load_feed(feed_url)
312 if feed and feed.last_modified:
313 stats.add_feed(feed, feed_url in feed_set)
315 stats.write_summary(public_dir)
317 if unconfirmed_keys:
318 summary = minidom.parseString(unconfirmed_keys_xml)
319 dl = summary.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "dl")[0]
320 for pending_feed in unconfirmed_keys:
321 dt = summary.createElementNS("http://www.w3.org/1999/xhtml", "dt")
322 dl.appendChild(dt)
323 dt.appendChild(summary.createTextNode(pending_feed.url))
325 dd = summary.createElementNS("http://www.w3.org/1999/xhtml", "dd")
326 dl.appendChild(dd)
327 dd.appendChild(summary.createTextNode(str(pending_feed.sigs[0].fingerprint)))
329 news_feed.add_entry(title = "Keys awaiting confirmation",
330 link = site_address + "/news-feed.xml",
331 entry_id = "unconfirmed-keys",
332 updated = format_date(time.time()),
333 summary = summary.documentElement)
335 if warnings:
336 summary = minidom.parseString(warnings_xml)
337 ul = summary.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "ul")[0]
338 for warning in warnings:
339 li = summary.createElementNS("http://www.w3.org/1999/xhtml", "li")
340 ul.appendChild(li)
341 li.appendChild(summary.createTextNode(warning))
343 news_feed.add_entry(title = "Warnings",
344 link = site_address + "/news-feed.xml",
345 entry_id = "warnings",
346 updated = format_date(time.time()),
347 summary = summary.documentElement)
349 latest_feeds = [(feed.last_modified, feed) for feed in feeds.values()]
350 latest_feeds.sort()
351 latest_feeds = reversed(latest_feeds[-16:])
352 for date, feed in latest_feeds:
353 summary = minidom.parseString(summary_xml)
354 set_element(summary, "summary/div/a", feed.get_name())
355 local_html_page = site_address + "/" + get_feed_dir(feed.url).replace('#', '%23') + "/feed.html"
356 set_element(summary, "summary/div/a/@href", local_html_page)
357 set_element(summary, "summary/div/span", feed.summary)
358 news_feed.add_entry(title = "%s feed updated" % feed.get_name(),
359 link = local_html_page,
360 entry_id = feed.url,
361 updated = format_date(date),
362 summary = summary.documentElement)
364 news_stream = file(os.path.join(public_dir, 'news-feed.xml'), 'w')
365 news_feed.save(news_stream)
366 news_stream.close()
368 if False:
369 # Warn about possible missing feeds...
370 child = subprocess.Popen(['0launch', '--list'], stdout = subprocess.PIPE)
371 all_feeds, unused = child.communicate()
372 all_feeds = set([x for x in all_feeds.split('\n') if x and not x.startswith('/')])
373 unknown = all_feeds - known_set
375 if unknown:
376 print "\nUnknown feeds (add to known or ignore lists):"
377 for feed in sorted(unknown):
378 if '/tests/' in feed: continue
379 print feed
382 except KeyboardInterrupt, ex:
383 print >>sys.stderr, "Aborted at user's request"
384 sys.exit(1)
385 except SafeException, ex:
386 if options.verbose: raise
387 print >>sys.stderr, ex
388 if feed:
389 print "(while processing %s)" % feed
390 sys.exit(1)