Convert all warnings to strings
[0mirror.git] / 0mirror
blob0ccf2565dfbc08aef95d7dda1b028b6b86a84cf4
1 #!/usr/bin/env python
2 # Copyright (C) 2013, Thomas Leonard
3 # See the COPYING file for details, or visit http://0install.net.
5 from optparse import OptionParser
6 import os, sys, time, shutil, subprocess, signal, logging
7 from ConfigParser import RawConfigParser
8 from logging import info, debug, warn
9 from xml.dom import minidom
10 import codecs
12 signal.alarm(10 * 60) # Abort after 10 minutes
14 from zeroinstall import SafeException
15 from zeroinstall.injector.iface_cache import iface_cache
16 from zeroinstall.injector import model, namespaces, config, gpg, handler
17 from zeroinstall.support import basedir, tasks
19 from atom import AtomFeed, set_element
20 from stats import Stats, sig_cache
21 from support import format_date, get_feed_dir, ensure_dirs
22 import index
24 missing_set = set()
26 # Site configuration!
28 site_config_file = os.path.abspath('0mirror.ini')
29 FEED_TIMEOUT = 60 # Seconds to wait before giving up on a feed download
31 version = '0.1'
33 parser = OptionParser(usage="usage: %prog [options] PUBLIC-DIR")
34 parser.add_option("", "--reindex", help="index all feeds, not just new ones", action='store_true')
35 parser.add_option("-v", "--verbose", help="more verbose output", action='count')
36 parser.add_option("-V", "--version", help="display version information", action='store_true')
38 (options, args) = parser.parse_args()
40 if options.version:
41 print "0mirror (zero-install) " + version
42 print "Copyright (C) 2013 Thomas Leonard"
43 print "This program comes with ABSOLUTELY NO WARRANTY,"
44 print "to the extent permitted by law."
45 print "You may redistribute copies of this program"
46 print "under the terms of the GNU General Public License."
47 print "For more information about these matters, see the file named COPYING."
48 sys.exit(0)
50 if options.verbose:
51 logger = logging.getLogger()
52 if options.verbose == 1:
53 logger.setLevel(logging.INFO)
54 else:
55 logger.setLevel(logging.DEBUG)
57 if len(args) != 1:
58 parser.print_help()
59 sys.exit(1)
61 if not os.path.exists(site_config_file):
62 print >>sys.stderr, "Configuration file '%s' not found!" % site_config_file
63 sys.exit(1)
64 print "Reading configuration from", site_config_file
66 site_config = RawConfigParser()
67 site_config.read(site_config_file)
69 site_address = site_config.get('site', 'address') # e.g. "http://localhost/0mirror"
70 if not site_address.endswith('/'):
71 site_address += '/'
73 # Where we try if the primary site fails
74 my_mirror = site_config.get('fetching', 'upstream_mirror') or None
76 if options.reindex:
77 n_feeds_to_update = 0
78 else:
79 n_feeds_to_update = int(site_config.get('fetching', 'n_feeds_to_update'))
81 public_dir = args[0]
83 feed_file = os.path.join(public_dir, 'feed-list')
84 ignore_file = os.path.join(public_dir, 'ignore-list')
85 warnings_file = os.path.join(public_dir, 'warnings.xml')
86 search_index_dir = os.path.join(public_dir, 'search-index') # (doesn't need to be public, actually)
88 summary_xml = """
89 <summary type='xhtml'>
90 <div xmlns="http://www.w3.org/1999/xhtml">
91 <a href=""/> - <span/>
92 </div>
93 </summary>
94 """
96 warnings_xml = """
97 <summary type='xhtml'>
98 <div xmlns="http://www.w3.org/1999/xhtml">
99 </div>
100 </summary>
103 unconfirmed_keys = [] # List of PendingFeeds
104 class NonInteractiveHandler(handler.Handler):
105 def confirm_import_feed(self, pending, valid_sigs):
106 for x in valid_sigs:
107 warn("Need to check key %s for %s", x.fingerprint, pending.url)
108 unconfirmed_keys.append(pending)
109 return None
111 @tasks.async
112 def wait_with_timeout(delay, blocker):
113 timeout = tasks.TimeoutBlocker(FEED_TIMEOUT, 'Mirror timeout')
114 yield timeout, blocker
115 tasks.check([timeout, blocker])
116 if not blocker.happened:
117 raise Exception("Timeout (waited %d seconds)" % delay)
119 warnings = []
120 def add_warning(title, msg):
121 warn("%s: %s", title, msg)
122 warnings.append((title, str(msg)))
124 key_dir = os.path.join(public_dir, 'keys')
125 ensure_dirs(key_dir)
126 keys = set()
127 def ensure_key(fingerprint):
128 if fingerprint in keys:
129 return
130 key_path = os.path.join(key_dir, fingerprint[-16:] + '.gpg')
131 child = subprocess.Popen(['gpg', '-a', '--export', fingerprint], stdout = subprocess.PIPE)
132 keydata, unused = child.communicate()
133 stream = file(key_path, 'w')
134 stream.write(keydata)
135 stream.close()
136 print "Exported key", fingerprint
137 keys.add(fingerprint)
139 feeds = []
141 now = format_date(time.time())
142 news_feed = AtomFeed(title = "Zero Install News Feed",
143 link = site_address + "/news-feed.xml",
144 updated = now,
145 author = "0mirror")
146 warnings_feed = AtomFeed(title = "0mirror Warnings Feed",
147 link = site_address + "/warnings.xml",
148 updated = now,
149 author = "0mirror",
150 source = warnings_file)
152 def load_feed(url):
153 return iface_cache.get_feed(url)
155 def load_feeds(feed_uris):
156 logging.getLogger("0install").setLevel(logging.ERROR)
157 try:
158 feeds = {}
160 for feed_url in feed_uris:
161 feeds[feed_url] = load_feed(feed_url)
162 return feeds
163 finally:
164 logging.getLogger("0install").setLevel(logging.WARNING)
166 feed = None
167 try:
168 stats = Stats()
169 if not os.path.isdir(public_dir):
170 raise SafeException("Public directory '%s' does not exist. "
171 "To setup a new site, create it as an empty directory now." % public_dir)
172 if not os.path.isfile(feed_file):
173 raise SafeException("File '%s' does not exist. It should contain a list of feed URLs, one per line" % feed_file)
174 print "Reading", feed_file
176 lines = filter(None, file(feed_file).read().split('\n'))
177 feed_uris = [line for line in lines if not line.startswith('-')]
178 feed_set = set(feed_uris)
179 ignore_set = set(filter(None, file(ignore_file).read().split('\n')))
180 inactive_set = set(line[1:] for line in lines if line.startswith('-'))
182 known_set = feed_set | inactive_set
184 stale_feeds = [] # [(last-checked, feed)]
186 c = config.load_config()
187 c.mirror = my_mirror
188 indexer = index.Indexer(c, search_index_dir)
190 feeds = load_feeds(feed_uris)
192 def last_checked(feed):
193 if feed is None:
194 # If we've never downloaded this feed, just keep trying (ignore last_check_attempt)
195 return 0
196 # Use the latest of the last successful check or the last failed check
197 last_check_attempt = iface_cache.get_last_check_attempt(feed.url)
198 if not last_check_attempt:
199 return feed.last_checked
200 return max(feed.last_checked or 0, last_check_attempt)
202 # List all the feeds, starting with the most stale
203 stale_feeds = [(last_checked(feed), url, feed) for url, feed in feeds.items()]
204 stale_feeds.sort()
206 # If we've got some completely new feeds, update all of them now
207 while n_feeds_to_update < len(stale_feeds) and stale_feeds[n_feeds_to_update - 1][0] in (0, None):
208 n_feeds_to_update += 1
210 # Update the first few feeds in the list
211 stale_feeds = stale_feeds[:n_feeds_to_update]
212 for last_check, feed_url, feed in stale_feeds:
213 if last_check > 0:
214 ctime_str = time.strftime('%Y-%m-%d_%H:%M', time.gmtime(last_check))
215 print "Feed %s last checked %s; updating..." % (feed_url, ctime_str)
216 else:
217 print "Feed %s is new; fetching..." % feed_url
219 iface_cache.mark_as_checking(feed_url)
220 blocker = c.fetcher.download_and_import_feed(feed_url, iface_cache)
221 try:
222 tasks.wait_for_blocker(wait_with_timeout(FEED_TIMEOUT, blocker))
223 except Exception, ex:
224 add_warning("Error fetching feed", "Error fetching '%s': %s" % (feed_url, ex))
225 continue
226 # Reload
227 feed = feeds[feed_url] = load_feed(feed_url)
228 #assert feed.last_checked, feed
230 for feed_url in feed_uris:
231 info("Processing feed '%s'", feed_url)
232 feed = feeds[feed_url]
233 if feed is None:
234 # Error during download?
235 add_warning("Fetch failed", "Attempted to fetch '%s', but still not cached" % feed_url)
236 continue
238 feed_dir = os.path.join(public_dir, get_feed_dir(feed_url))
239 ensure_dirs(feed_dir)
241 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(feed.url))
242 assert cached is not None
244 for subfeed in feed.feeds:
245 if subfeed.uri not in known_set:
246 if subfeed.uri.startswith('/'):
247 continue
248 if subfeed.uri not in ignore_set:
249 add_warning("Missing subfeed", "WARNING: Subfeed %s of %s not in feeds list" % (subfeed.uri, feed.get_name()))
251 # Check dependencies
252 for impl in feed.implementations.values():
253 for dep in impl.requires:
254 if dep.interface not in known_set and dep.interface not in missing_set:
255 add_warning("Missing dependency", "Version %s of %s depends on %s, but that isn't being mirrored!" % (impl.get_version(), feed.url, dep.interface))
256 missing_set.add(dep.interface)
257 else:
258 continue
259 break # Once we've warned about one version, don't check any other versions
261 style = os.path.join(feed_dir, 'interface.xsl')
262 if not os.path.islink(style):
263 os.symlink('../../../../feed_style.xsl', style)
265 latest = os.path.join(feed_dir, 'latest.xml')
267 last_modified = int(os.stat(cached).st_mtime)
268 version_name = time.strftime('%Y-%m-%d_%H:%M.xml', time.gmtime(last_modified))
269 version_path = os.path.join(feed_dir, version_name)
271 if os.path.islink(latest) and os.readlink(latest) == version_name:
272 if os.path.exists(version_path):
273 continue
274 warn("Broken symlink '%s'!", latest)
276 # Get the keys
277 stream = file(cached)
278 unused, sigs = gpg.check_stream(stream)
279 stream.close()
281 for x in sigs:
282 if isinstance(x, gpg.ValidSig):
283 ensure_key(x.fingerprint)
284 else:
285 add_warning("Signature problem", x)
287 shutil.copyfile(cached, version_path)
288 latest_new = latest + '.new'
289 if os.path.exists(latest_new):
290 os.unlink(latest_new)
291 os.symlink(version_name, latest_new)
292 os.rename(latest_new, latest)
293 sig_cache.update(feed.url)
294 if not options.reindex: indexer.update(feed_url)
295 print "Updated %s to %s" % (feed, version_name)
297 feed = None
299 for feed_url in known_set:
300 feed = load_feed(feed_url)
301 if feed and feed.last_modified:
302 stats.add_feed(feed, feed_url in feed_set)
304 if options.reindex:
305 for url in feed_set:
306 indexer.update(url)
308 # Commit search db (whether we indexed everything, or just the updated feeds)
309 indexer.commit()
311 stats.write_summary(public_dir)
313 for pending_feed in unconfirmed_keys:
314 add_warning("Key awaiting confirmation",
315 "Feed: {feed}, Fingerprint: {fingerprint}".format(
316 feed = pending_feed.url,
317 fingerprint = pending_feed.sigs[0].fingerprint))
319 if warnings:
320 i = 0
321 for (title, warning) in warnings:
322 summary = minidom.parseString(warnings_xml)
323 div = summary.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "div")[0]
324 div.appendChild(summary.createTextNode(warning))
325 warnings_feed.add_entry(title = title,
326 link = site_address + "/warnings.xml",
327 entry_id = "warning-" + now + '-%d' % i,
328 updated = now,
329 summary = summary.documentElement)
330 i += 1
331 warnings_feed.limit(20)
332 with open(warnings_file, 'w') as stream:
333 warnings_feed.save(stream)
335 latest_feeds = [(feed.last_modified, feed) for feed in feeds.values() if feed]
336 latest_feeds.sort()
337 latest_feeds = reversed(latest_feeds[-16:])
338 for date, feed in latest_feeds:
339 summary = minidom.parseString(summary_xml)
340 set_element(summary, "summary/div/a", feed.get_name())
341 local_html_page = site_address + "/" + get_feed_dir(feed.url).replace('#', '%23') + "/feed.html"
342 set_element(summary, "summary/div/a/@href", local_html_page)
343 set_element(summary, "summary/div/span", feed.summary)
344 news_feed.add_entry(title = "%s feed updated" % feed.get_name(),
345 link = local_html_page,
346 entry_id = feed.url,
347 updated = format_date(date),
348 summary = summary.documentElement)
350 news_stream = codecs.open(os.path.join(public_dir, 'news-feed.xml'), 'w', encoding = 'utf-8')
351 news_feed.save(news_stream)
352 news_stream.close()
354 if False:
355 # Warn about possible missing feeds...
356 child = subprocess.Popen(['0launch', '--list'], stdout = subprocess.PIPE)
357 all_feeds, unused = child.communicate()
358 all_feeds = set([x for x in all_feeds.split('\n') if x and not x.startswith('/')])
359 unknown = all_feeds - known_set
361 if unknown:
362 print "\nUnknown feeds (add to known or ignore lists):"
363 for feed in sorted(unknown):
364 if '/tests/' in feed: continue
365 print feed
367 if missing_set:
368 print "\nMissing feeds:"
369 for x in missing_set:
370 print x
372 except KeyboardInterrupt, ex:
373 print >>sys.stderr, "Aborted at user's request"
374 sys.exit(1)
375 except SafeException, ex:
376 if options.verbose: raise
377 print >>sys.stderr, ex
378 if feed:
379 print "(while processing %s)" % feed
380 sys.exit(1)