Skip feeds that couldn't be fetched from summary
[0mirror.git] / 0mirror
blob7f641a27db099fe777b0370fe819ce1dcb7aa19b
1 #!/usr/bin/env python
2 # Copyright (C) 2007, Thomas Leonard
3 # See the COPYING file for details, or visit http://0install.net.
5 from optparse import OptionParser
6 import os, sys, time, shutil, subprocess, signal
7 from ConfigParser import RawConfigParser
8 from logging import info, debug, warn
9 from xml.dom import minidom
10 import codecs
12 signal.alarm(10 * 60) # Abort after 10 minutes
14 from zeroinstall import SafeException
15 from zeroinstall.injector.iface_cache import iface_cache
16 from zeroinstall.injector import model, namespaces, policy, handler, gpg, handler, qdom, distro
17 from zeroinstall.support import basedir, tasks
19 from atom import AtomFeed, set_element
20 from stats import Stats
21 from support import format_date, get_feed_dir, ensure_dirs
23 # Site configuration!
25 site_config_file = os.path.abspath('0mirror.ini')
26 FEED_TIMEOUT = 60 # Seconds to wait before giving up on a feed download
28 version = '0.1'
30 parser = OptionParser(usage="usage: %prog [options] PUBLIC-DIR")
31 parser.add_option("-v", "--verbose", help="more verbose output", action='count')
32 parser.add_option("-V", "--version", help="display version information", action='store_true')
34 (options, args) = parser.parse_args()
36 if options.version:
37 print "0mirror (zero-install) " + version
38 print "Copyright (C) 2010 Thomas Leonard"
39 print "This program comes with ABSOLUTELY NO WARRANTY,"
40 print "to the extent permitted by law."
41 print "You may redistribute copies of this program"
42 print "under the terms of the GNU General Public License."
43 print "For more information about these matters, see the file named COPYING."
44 sys.exit(0)
46 if options.verbose:
47 import logging
48 logger = logging.getLogger()
49 if options.verbose == 1:
50 logger.setLevel(logging.INFO)
51 else:
52 logger.setLevel(logging.DEBUG)
54 if len(args) != 1:
55 parser.print_help()
56 sys.exit(1)
58 if not os.path.exists(site_config_file):
59 print >>sys.stderr, "Configuration file '%s' not found!" % site_config_file
60 sys.exit(1)
61 print "Reading configuration from", site_config_file
63 site_config = RawConfigParser()
64 site_config.read(site_config_file)
66 site_address = site_config.get('site', 'address') # e.g. "http://localhost/0mirror"
67 if not site_address.endswith('/'):
68 site_address += '/'
70 # Where we try if the primary site fails
71 my_mirror = site_config.get('fetching', 'upstream_mirror') or None
73 n_feeds_to_update = int(site_config.get('fetching', 'n_feeds_to_update'))
75 public_dir = args[0]
77 feed_file = os.path.join(public_dir, 'feed-list')
78 ignore_file = os.path.join(public_dir, 'ignore-list')
80 summary_xml = """
81 <summary type='xhtml'>
82 <div xmlns="http://www.w3.org/1999/xhtml">
83 <a href=""/> - <span/>
84 </div>
85 </summary>
86 """
88 unconfirmed_keys_xml = """
89 <summary type='xhtml'>
90 <div xmlns="http://www.w3.org/1999/xhtml">
91 New keys awaiting confirmation:
92 <dl>
93 </dl>
94 </div>
95 </summary>
96 """
98 warnings_xml = """
99 <summary type='xhtml'>
100 <div xmlns="http://www.w3.org/1999/xhtml">
101 <ul>
102 </ul>
103 </div>
104 </summary>
107 unconfirmed_keys = [] # List of PendingFeeds
108 class NonInteractiveHandler(handler.Handler):
109 def confirm_import_feed(self, pending, valid_sigs):
110 for x in valid_sigs:
111 warn("Need to check key %s for %s", x.fingerprint, pending.url)
112 unconfirmed_keys.append(pending)
113 return None
115 @tasks.async
116 def wait_with_timeout(delay, blocker):
117 timeout = tasks.TimeoutBlocker(FEED_TIMEOUT, 'Mirror timeout')
118 yield timeout, blocker
119 tasks.check(timeout, blocker)
120 if not blocker.happened:
121 raise Exception("Timeout (waited %d seconds)" % delay)
123 warnings = []
124 def add_warning(msg):
125 warn(msg)
126 warnings.append(msg)
128 key_dir = os.path.join(public_dir, 'keys')
129 ensure_dirs(key_dir)
130 keys = set()
131 def ensure_key(fingerprint):
132 if fingerprint in keys:
133 return
134 key_path = os.path.join(key_dir, fingerprint[-16:] + '.gpg')
135 child = subprocess.Popen(['gpg', '-a', '--export', fingerprint], stdout = subprocess.PIPE)
136 keydata, unused = child.communicate()
137 stream = file(key_path, 'w')
138 stream.write(keydata)
139 stream.close()
140 print "Exported key", fingerprint
141 keys.add(fingerprint)
143 feeds = []
145 now = format_date(time.time())
146 news_feed = AtomFeed(title = "Zero Install News Feed",
147 link = site_address + "/news-feed.xml",
148 updated = now,
149 author = "0mirror")
151 class DummyFeed:
152 last_checked = 0
153 last_modified = 0
154 def __init__(self, url):
155 self.url = url
157 def load_feed(url):
158 return iface_cache.get_feed(url)
160 def load_feeds(feed_uris):
161 feeds = {}
163 for feed_url in feed_uris:
164 feeds[feed_url] = load_feed(feed_url)
165 return feeds
167 feed = None
168 try:
169 stats = Stats()
170 if not os.path.isdir(public_dir):
171 raise SafeException("Public directory '%s' does not exist. "
172 "To setup a new site, create it as an empty directory now." % public_dir)
173 if not os.path.isfile(feed_file):
174 raise SafeException("File '%s' does not exist. It should contain a list of feed URLs, one per line" % feed_file)
175 print "Reading", feed_file
177 lines = filter(None, file(feed_file).read().split('\n'))
178 feed_uris = [line for line in lines if not line.startswith('-')]
179 feed_set = set(feed_uris)
180 ignore_set = set(filter(None, file(ignore_file).read().split('\n')))
181 inactive_set = set(line[1:] for line in lines if line.startswith('-'))
183 known_set = feed_set | inactive_set
185 stale_feeds = [] # [(last-checked, feed)]
187 handler = NonInteractiveHandler()
188 p = policy.Policy("http://localhost/dummy", handler)
189 p.fetcher.feed_mirror = my_mirror
191 feeds = load_feeds(feed_uris)
193 def last_checked(feed):
194 if feed is None:
195 # If we've never downloaded this feed, just keep trying (ignore last_check_attempt)
196 return 0
197 # Use the latest of the last successful check or the last failed check
198 last_check_attempt = iface_cache.get_last_check_attempt(feed.url)
199 if not last_check_attempt:
200 return feed.last_checked
201 return max(feed.last_checked or 0, last_check_attempt)
203 # List all the feeds, starting with the most stale
204 stale_feeds = [(last_checked(feed), url, feed) for url, feed in feeds.items()]
205 stale_feeds.sort()
207 # If we've got some completely new feeds, update all of them now
208 while n_feeds_to_update < len(stale_feeds) and stale_feeds[n_feeds_to_update - 1][0] in (0, None):
209 n_feeds_to_update += 1
211 # Update the first few feeds in the list
212 stale_feeds = stale_feeds[:n_feeds_to_update]
213 for last_check, feed_url, feed in stale_feeds:
214 if last_check > 0:
215 ctime_str = time.strftime('%Y-%m-%d_%H:%M', time.gmtime(last_check))
216 print "Feed %s last checked %s; updating..." % (feed_url, ctime_str)
217 else:
218 print "Feed %s is new; fetching..." % feed_url
220 iface_cache.mark_as_checking(feed_url)
221 blocker = p.fetcher.download_and_import_feed(feed_url, iface_cache)
222 try:
223 handler.wait_for_blocker(wait_with_timeout(FEED_TIMEOUT, blocker))
224 except Exception, ex:
225 add_warning("Error fetching '%s': %s" % (feed_url, ex))
226 continue
227 # Reload
228 feed = feeds[feed_url] = load_feed(feed_url)
229 #assert feed.last_checked, feed
231 for feed_url in feed_uris:
232 info("Processing feed '%s'", feed_url)
233 feed = feeds[feed_url]
234 if feed is None:
235 # Error during download?
236 add_warning("Attempted to fetch '%s', but still not cached" % feed_url)
237 continue
239 feed_dir = os.path.join(public_dir, get_feed_dir(feed_url))
240 ensure_dirs(feed_dir)
242 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(feed.url))
243 assert cached is not None
245 for subfeed in feed.feeds:
246 if subfeed.uri not in known_set:
247 if subfeed.uri.startswith('/'):
248 continue
249 if subfeed.uri not in ignore_set:
250 add_warning("WARNING: Subfeed %s of %s not in feeds list" % (subfeed.uri, feed.get_name()))
252 # Check dependencies
253 for impl in feed.implementations.values():
254 for dep in impl.requires:
255 if dep.interface not in known_set:
256 add_warning("Version %s of %s depends on %s, but that isn't being mirrored!" % (impl.get_version(), feed.url, dep.interface))
257 break
258 else:
259 continue
260 break # Once we've warned about one version, don't check any other versions
262 style = os.path.join(feed_dir, 'interface.xsl')
263 if not os.path.islink(style):
264 os.symlink('../../../../feed_style.xsl', style)
266 latest = os.path.join(feed_dir, 'latest.xml')
268 last_modified = int(os.stat(cached).st_mtime)
269 version_name = time.strftime('%Y-%m-%d_%H:%M.xml', time.gmtime(last_modified))
270 version_path = os.path.join(feed_dir, version_name)
272 if os.path.islink(latest) and os.readlink(latest) == version_name:
273 if os.path.exists(version_path):
274 continue
275 warn("Broken symlink '%s'!", latest)
277 # Get the keys
278 stream = file(cached)
279 unused, sigs = gpg.check_stream(stream)
280 stream.close()
282 for x in sigs:
283 if isinstance(x, gpg.ValidSig):
284 ensure_key(x.fingerprint)
285 else:
286 add_warning("Signature problem: %s" % x)
288 shutil.copyfile(cached, version_path)
289 latest_new = latest + '.new'
290 if os.path.exists(latest_new):
291 os.unlink(latest_new)
292 os.symlink(version_name, latest_new)
293 os.rename(latest_new, latest)
294 print "Updated %s to %s" % (feed, version_name)
296 feed = None
298 for feed_url in known_set:
299 feed = load_feed(feed_url)
300 if feed and feed.last_modified:
301 stats.add_feed(feed, feed_url in feed_set)
303 stats.write_summary(public_dir)
305 if unconfirmed_keys:
306 summary = minidom.parseString(unconfirmed_keys_xml)
307 dl = summary.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "dl")[0]
308 for pending_feed in unconfirmed_keys:
309 dt = summary.createElementNS("http://www.w3.org/1999/xhtml", "dt")
310 dl.appendChild(dt)
311 dt.appendChild(summary.createTextNode(pending_feed.url))
313 dd = summary.createElementNS("http://www.w3.org/1999/xhtml", "dd")
314 dl.appendChild(dd)
315 dd.appendChild(summary.createTextNode(str(pending_feed.sigs[0].fingerprint)))
317 news_feed.add_entry(title = "Keys awaiting confirmation",
318 link = site_address + "/news-feed.xml",
319 entry_id = "unconfirmed-keys",
320 updated = format_date(time.time()),
321 summary = summary.documentElement)
323 if warnings:
324 summary = minidom.parseString(warnings_xml)
325 ul = summary.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "ul")[0]
326 for warning in warnings:
327 li = summary.createElementNS("http://www.w3.org/1999/xhtml", "li")
328 ul.appendChild(li)
329 li.appendChild(summary.createTextNode(warning))
331 news_feed.add_entry(title = "Warnings",
332 link = site_address + "/news-feed.xml",
333 entry_id = "warnings",
334 updated = format_date(time.time()),
335 summary = summary.documentElement)
337 latest_feeds = [(feed.last_modified, feed) for feed in feeds.values() if feed]
338 latest_feeds.sort()
339 latest_feeds = reversed(latest_feeds[-16:])
340 for date, feed in latest_feeds:
341 summary = minidom.parseString(summary_xml)
342 set_element(summary, "summary/div/a", feed.get_name())
343 local_html_page = site_address + "/" + get_feed_dir(feed.url).replace('#', '%23') + "/feed.html"
344 set_element(summary, "summary/div/a/@href", local_html_page)
345 set_element(summary, "summary/div/span", feed.summary)
346 news_feed.add_entry(title = "%s feed updated" % feed.get_name(),
347 link = local_html_page,
348 entry_id = feed.url,
349 updated = format_date(date),
350 summary = summary.documentElement)
352 news_stream = codecs.open(os.path.join(public_dir, 'news-feed.xml'), 'w', encoding = 'utf-8')
353 news_feed.save(news_stream)
354 news_stream.close()
356 if False:
357 # Warn about possible missing feeds...
358 child = subprocess.Popen(['0launch', '--list'], stdout = subprocess.PIPE)
359 all_feeds, unused = child.communicate()
360 all_feeds = set([x for x in all_feeds.split('\n') if x and not x.startswith('/')])
361 unknown = all_feeds - known_set
363 if unknown:
364 print "\nUnknown feeds (add to known or ignore lists):"
365 for feed in sorted(unknown):
366 if '/tests/' in feed: continue
367 print feed
370 except KeyboardInterrupt, ex:
371 print >>sys.stderr, "Aborted at user's request"
372 sys.exit(1)
373 except SafeException, ex:
374 if options.verbose: raise
375 print >>sys.stderr, ex
376 if feed:
377 print "(while processing %s)" % feed
378 sys.exit(1)