Allow search to be used as a library
[0mirror.git] / 0mirror
blob744e202086ddc50d7d9b4634b0b26bb866225187
1 #!/usr/bin/env python
2 # Copyright (C) 2013, Thomas Leonard
3 # See the COPYING file for details, or visit http://0install.net.
5 from optparse import OptionParser
6 import os, sys, time, shutil, subprocess, signal, logging
7 from ConfigParser import RawConfigParser
8 from logging import info, debug, warn
9 from xml.dom import minidom
10 import codecs
12 signal.alarm(10 * 60) # Abort after 10 minutes
14 from zeroinstall import SafeException
15 from zeroinstall.injector.iface_cache import iface_cache
16 from zeroinstall.injector import model, namespaces, config, gpg, handler
17 from zeroinstall.support import basedir, tasks
19 from atom import AtomFeed, set_element
20 from stats import Stats, sig_cache
21 from support import format_date, get_feed_dir, ensure_dirs
22 import index
24 missing_set = set()
26 # Site configuration!
28 site_config_file = os.path.abspath('0mirror.ini')
29 FEED_TIMEOUT = 60 # Seconds to wait before giving up on a feed download
31 version = '0.1'
33 parser = OptionParser(usage="usage: %prog [options] PUBLIC-DIR")
34 parser.add_option("", "--reindex", help="index all feeds, not just new ones", action='store_true')
35 parser.add_option("-v", "--verbose", help="more verbose output", action='count')
36 parser.add_option("-V", "--version", help="display version information", action='store_true')
38 (options, args) = parser.parse_args()
40 if options.version:
41 print "0mirror (zero-install) " + version
42 print "Copyright (C) 2013 Thomas Leonard"
43 print "This program comes with ABSOLUTELY NO WARRANTY,"
44 print "to the extent permitted by law."
45 print "You may redistribute copies of this program"
46 print "under the terms of the GNU General Public License."
47 print "For more information about these matters, see the file named COPYING."
48 sys.exit(0)
50 if options.verbose:
51 logger = logging.getLogger()
52 if options.verbose == 1:
53 logger.setLevel(logging.INFO)
54 else:
55 logger.setLevel(logging.DEBUG)
57 if len(args) != 1:
58 parser.print_help()
59 sys.exit(1)
61 if not os.path.exists(site_config_file):
62 print >>sys.stderr, "Configuration file '%s' not found!" % site_config_file
63 sys.exit(1)
64 print "Reading configuration from", site_config_file
66 site_config = RawConfigParser()
67 site_config.read(site_config_file)
69 site_address = site_config.get('site', 'address') # e.g. "http://localhost/0mirror"
70 if not site_address.endswith('/'):
71 site_address += '/'
73 # Where we try if the primary site fails
74 my_mirror = site_config.get('fetching', 'upstream_mirror') or None
76 n_feeds_to_update = int(site_config.get('fetching', 'n_feeds_to_update'))
78 public_dir = args[0]
80 feed_file = os.path.join(public_dir, 'feed-list')
81 ignore_file = os.path.join(public_dir, 'ignore-list')
82 warnings_file = os.path.join(public_dir, 'warnings.xml')
83 search_index_dir = os.path.join(public_dir, 'search-index') # (doesn't need to be public, actually)
85 summary_xml = """
86 <summary type='xhtml'>
87 <div xmlns="http://www.w3.org/1999/xhtml">
88 <a href=""/> - <span/>
89 </div>
90 </summary>
91 """
93 warnings_xml = """
94 <summary type='xhtml'>
95 <div xmlns="http://www.w3.org/1999/xhtml">
96 </div>
97 </summary>
98 """
100 unconfirmed_keys = [] # List of PendingFeeds
101 class NonInteractiveHandler(handler.Handler):
102 def confirm_import_feed(self, pending, valid_sigs):
103 for x in valid_sigs:
104 warn("Need to check key %s for %s", x.fingerprint, pending.url)
105 unconfirmed_keys.append(pending)
106 return None
108 @tasks.async
109 def wait_with_timeout(delay, blocker):
110 timeout = tasks.TimeoutBlocker(FEED_TIMEOUT, 'Mirror timeout')
111 yield timeout, blocker
112 tasks.check([timeout, blocker])
113 if not blocker.happened:
114 raise Exception("Timeout (waited %d seconds)" % delay)
116 warnings = []
117 def add_warning(title, msg):
118 warn("%s: %s", title, msg)
119 warnings.append((title, msg))
121 key_dir = os.path.join(public_dir, 'keys')
122 ensure_dirs(key_dir)
123 keys = set()
124 def ensure_key(fingerprint):
125 if fingerprint in keys:
126 return
127 key_path = os.path.join(key_dir, fingerprint[-16:] + '.gpg')
128 child = subprocess.Popen(['gpg', '-a', '--export', fingerprint], stdout = subprocess.PIPE)
129 keydata, unused = child.communicate()
130 stream = file(key_path, 'w')
131 stream.write(keydata)
132 stream.close()
133 print "Exported key", fingerprint
134 keys.add(fingerprint)
136 feeds = []
138 now = format_date(time.time())
139 news_feed = AtomFeed(title = "Zero Install News Feed",
140 link = site_address + "/news-feed.xml",
141 updated = now,
142 author = "0mirror")
143 warnings_feed = AtomFeed(title = "0mirror Warnings Feed",
144 link = site_address + "/warnings.xml",
145 updated = now,
146 author = "0mirror",
147 source = warnings_file)
149 def load_feed(url):
150 return iface_cache.get_feed(url)
152 def load_feeds(feed_uris):
153 logging.getLogger("0install").setLevel(logging.ERROR)
154 try:
155 feeds = {}
157 for feed_url in feed_uris:
158 feeds[feed_url] = load_feed(feed_url)
159 return feeds
160 finally:
161 logging.getLogger("0install").setLevel(logging.WARNING)
163 feed = None
164 try:
165 stats = Stats()
166 if not os.path.isdir(public_dir):
167 raise SafeException("Public directory '%s' does not exist. "
168 "To setup a new site, create it as an empty directory now." % public_dir)
169 if not os.path.isfile(feed_file):
170 raise SafeException("File '%s' does not exist. It should contain a list of feed URLs, one per line" % feed_file)
171 print "Reading", feed_file
173 lines = filter(None, file(feed_file).read().split('\n'))
174 feed_uris = [line for line in lines if not line.startswith('-')]
175 feed_set = set(feed_uris)
176 ignore_set = set(filter(None, file(ignore_file).read().split('\n')))
177 inactive_set = set(line[1:] for line in lines if line.startswith('-'))
179 known_set = feed_set | inactive_set
181 stale_feeds = [] # [(last-checked, feed)]
183 c = config.load_config()
184 c.mirror = my_mirror
185 indexer = index.Indexer(c, search_index_dir)
187 feeds = load_feeds(feed_uris)
189 def last_checked(feed):
190 if feed is None:
191 # If we've never downloaded this feed, just keep trying (ignore last_check_attempt)
192 return 0
193 # Use the latest of the last successful check or the last failed check
194 last_check_attempt = iface_cache.get_last_check_attempt(feed.url)
195 if not last_check_attempt:
196 return feed.last_checked
197 return max(feed.last_checked or 0, last_check_attempt)
199 # List all the feeds, starting with the most stale
200 stale_feeds = [(last_checked(feed), url, feed) for url, feed in feeds.items()]
201 stale_feeds.sort()
203 # If we've got some completely new feeds, update all of them now
204 while n_feeds_to_update < len(stale_feeds) and stale_feeds[n_feeds_to_update - 1][0] in (0, None):
205 n_feeds_to_update += 1
207 # Update the first few feeds in the list
208 stale_feeds = stale_feeds[:n_feeds_to_update]
209 for last_check, feed_url, feed in stale_feeds:
210 if last_check > 0:
211 ctime_str = time.strftime('%Y-%m-%d_%H:%M', time.gmtime(last_check))
212 print "Feed %s last checked %s; updating..." % (feed_url, ctime_str)
213 else:
214 print "Feed %s is new; fetching..." % feed_url
216 iface_cache.mark_as_checking(feed_url)
217 blocker = c.fetcher.download_and_import_feed(feed_url, iface_cache)
218 try:
219 tasks.wait_for_blocker(wait_with_timeout(FEED_TIMEOUT, blocker))
220 except Exception, ex:
221 add_warning("Error fetching feed", "Error fetching '%s': %s" % (feed_url, ex))
222 continue
223 # Reload
224 feed = feeds[feed_url] = load_feed(feed_url)
225 #assert feed.last_checked, feed
227 for feed_url in feed_uris:
228 info("Processing feed '%s'", feed_url)
229 feed = feeds[feed_url]
230 if feed is None:
231 # Error during download?
232 add_warning("Fetch failed", "Attempted to fetch '%s', but still not cached" % feed_url)
233 continue
235 feed_dir = os.path.join(public_dir, get_feed_dir(feed_url))
236 ensure_dirs(feed_dir)
238 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(feed.url))
239 assert cached is not None
241 for subfeed in feed.feeds:
242 if subfeed.uri not in known_set:
243 if subfeed.uri.startswith('/'):
244 continue
245 if subfeed.uri not in ignore_set:
246 add_warning("Missing subfeed", "WARNING: Subfeed %s of %s not in feeds list" % (subfeed.uri, feed.get_name()))
248 # Check dependencies
249 for impl in feed.implementations.values():
250 for dep in impl.requires:
251 if dep.interface not in known_set and dep.interface not in missing_set:
252 add_warning("Missing dependency", "Version %s of %s depends on %s, but that isn't being mirrored!" % (impl.get_version(), feed.url, dep.interface))
253 missing_set.add(dep.interface)
254 else:
255 continue
256 break # Once we've warned about one version, don't check any other versions
258 style = os.path.join(feed_dir, 'interface.xsl')
259 if not os.path.islink(style):
260 os.symlink('../../../../feed_style.xsl', style)
262 latest = os.path.join(feed_dir, 'latest.xml')
264 last_modified = int(os.stat(cached).st_mtime)
265 version_name = time.strftime('%Y-%m-%d_%H:%M.xml', time.gmtime(last_modified))
266 version_path = os.path.join(feed_dir, version_name)
268 if os.path.islink(latest) and os.readlink(latest) == version_name:
269 if os.path.exists(version_path):
270 continue
271 warn("Broken symlink '%s'!", latest)
273 # Get the keys
274 stream = file(cached)
275 unused, sigs = gpg.check_stream(stream)
276 stream.close()
278 for x in sigs:
279 if isinstance(x, gpg.ValidSig):
280 ensure_key(x.fingerprint)
281 else:
282 add_warning("Signature problem", x)
284 shutil.copyfile(cached, version_path)
285 latest_new = latest + '.new'
286 if os.path.exists(latest_new):
287 os.unlink(latest_new)
288 os.symlink(version_name, latest_new)
289 os.rename(latest_new, latest)
290 sig_cache.update(feed.url)
291 if not options.reindex: indexer.update(feed_url)
292 print "Updated %s to %s" % (feed, version_name)
294 feed = None
296 for feed_url in known_set:
297 feed = load_feed(feed_url)
298 if feed and feed.last_modified:
299 stats.add_feed(feed, feed_url in feed_set)
301 if options.reindex:
302 for url in feed_set:
303 indexer.update(url)
305 # Commit search db (whether we indexed everything, or just the updated feeds)
306 indexer.commit()
308 stats.write_summary(public_dir)
310 for pending_feed in unconfirmed_keys:
311 add_warning("Key awaiting confirmation",
312 "Feed: {feed}, Fingerprint: {fingerprint}".format(
313 feed = pending_feed.url,
314 fingerprint = pending_feed.sigs[0].fingerprint))
316 if warnings:
317 i = 0
318 for (title, warning) in warnings:
319 summary = minidom.parseString(warnings_xml)
320 div = summary.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "div")[0]
321 div.appendChild(summary.createTextNode(warning))
322 warnings_feed.add_entry(title = title,
323 link = site_address + "/warnings.xml",
324 entry_id = "warning-" + now + '-%d' % i,
325 updated = now,
326 summary = summary.documentElement)
327 i += 1
328 warnings_feed.limit(20)
329 with open(warnings_file, 'w') as stream:
330 warnings_feed.save(stream)
332 latest_feeds = [(feed.last_modified, feed) for feed in feeds.values() if feed]
333 latest_feeds.sort()
334 latest_feeds = reversed(latest_feeds[-16:])
335 for date, feed in latest_feeds:
336 summary = minidom.parseString(summary_xml)
337 set_element(summary, "summary/div/a", feed.get_name())
338 local_html_page = site_address + "/" + get_feed_dir(feed.url).replace('#', '%23') + "/feed.html"
339 set_element(summary, "summary/div/a/@href", local_html_page)
340 set_element(summary, "summary/div/span", feed.summary)
341 news_feed.add_entry(title = "%s feed updated" % feed.get_name(),
342 link = local_html_page,
343 entry_id = feed.url,
344 updated = format_date(date),
345 summary = summary.documentElement)
347 news_stream = codecs.open(os.path.join(public_dir, 'news-feed.xml'), 'w', encoding = 'utf-8')
348 news_feed.save(news_stream)
349 news_stream.close()
351 if False:
352 # Warn about possible missing feeds...
353 child = subprocess.Popen(['0launch', '--list'], stdout = subprocess.PIPE)
354 all_feeds, unused = child.communicate()
355 all_feeds = set([x for x in all_feeds.split('\n') if x and not x.startswith('/')])
356 unknown = all_feeds - known_set
358 if unknown:
359 print "\nUnknown feeds (add to known or ignore lists):"
360 for feed in sorted(unknown):
361 if '/tests/' in feed: continue
362 print feed
364 if missing_set:
365 print "\nMissing feeds:"
366 for x in missing_set:
367 print x
369 except KeyboardInterrupt, ex:
370 print >>sys.stderr, "Aborted at user's request"
371 sys.exit(1)
372 except SafeException, ex:
373 if options.verbose: raise
374 print >>sys.stderr, ex
375 if feed:
376 print "(while processing %s)" % feed
377 sys.exit(1)