2 # Copyright (C) 2013, Thomas Leonard
3 # See the COPYING file for details, or visit http://0install.net.
5 from optparse
import OptionParser
6 import os
, sys
, time
, shutil
, subprocess
, signal
, logging
7 from ConfigParser
import RawConfigParser
8 from logging
import info
, debug
, warn
9 from xml
.dom
import minidom
12 signal
.alarm(10 * 60) # Abort after 10 minutes
14 from zeroinstall
import SafeException
15 from zeroinstall
.injector
.iface_cache
import iface_cache
16 from zeroinstall
.injector
import model
, namespaces
, config
, gpg
, handler
17 from zeroinstall
.support
import basedir
, tasks
19 from atom
import AtomFeed
, set_element
20 from stats
import Stats
, sig_cache
21 from support
import format_date
, get_feed_dir
, ensure_dirs
28 site_config_file
= os
.path
.abspath('0mirror.ini')
29 FEED_TIMEOUT
= 60 # Seconds to wait before giving up on a feed download
33 parser
= OptionParser(usage
="usage: %prog [options] PUBLIC-DIR")
34 parser
.add_option("", "--reindex", help="index all feeds, not just new ones", action
='store_true')
35 parser
.add_option("-v", "--verbose", help="more verbose output", action
='count')
36 parser
.add_option("-V", "--version", help="display version information", action
='store_true')
38 (options
, args
) = parser
.parse_args()
41 print "0mirror (zero-install) " + version
42 print "Copyright (C) 2013 Thomas Leonard"
43 print "This program comes with ABSOLUTELY NO WARRANTY,"
44 print "to the extent permitted by law."
45 print "You may redistribute copies of this program"
46 print "under the terms of the GNU General Public License."
47 print "For more information about these matters, see the file named COPYING."
51 logger
= logging
.getLogger()
52 if options
.verbose
== 1:
53 logger
.setLevel(logging
.INFO
)
55 logger
.setLevel(logging
.DEBUG
)
61 if not os
.path
.exists(site_config_file
):
62 print >>sys
.stderr
, "Configuration file '%s' not found!" % site_config_file
64 print "Reading configuration from", site_config_file
66 site_config
= RawConfigParser()
67 site_config
.read(site_config_file
)
69 site_address
= site_config
.get('site', 'address') # e.g. "http://localhost/0mirror"
70 if not site_address
.endswith('/'):
73 # Where we try if the primary site fails
74 my_mirror
= site_config
.get('fetching', 'upstream_mirror') or None
76 n_feeds_to_update
= int(site_config
.get('fetching', 'n_feeds_to_update'))
80 feed_file
= os
.path
.join(public_dir
, 'feed-list')
81 ignore_file
= os
.path
.join(public_dir
, 'ignore-list')
82 warnings_file
= os
.path
.join(public_dir
, 'warnings.xml')
83 search_index_dir
= os
.path
.join(public_dir
, 'search-index') # (doesn't need to be public, actually)
86 <summary type='xhtml'>
87 <div xmlns="http://www.w3.org/1999/xhtml">
88 <a href=""/> - <span/>
94 <summary type='xhtml'>
95 <div xmlns="http://www.w3.org/1999/xhtml">
100 unconfirmed_keys
= [] # List of PendingFeeds
101 class NonInteractiveHandler(handler
.Handler
):
102 def confirm_import_feed(self
, pending
, valid_sigs
):
104 warn("Need to check key %s for %s", x
.fingerprint
, pending
.url
)
105 unconfirmed_keys
.append(pending
)
109 def wait_with_timeout(delay
, blocker
):
110 timeout
= tasks
.TimeoutBlocker(FEED_TIMEOUT
, 'Mirror timeout')
111 yield timeout
, blocker
112 tasks
.check([timeout
, blocker
])
113 if not blocker
.happened
:
114 raise Exception("Timeout (waited %d seconds)" % delay
)
117 def add_warning(title
, msg
):
118 warn("%s: %s", title
, msg
)
119 warnings
.append((title
, msg
))
121 key_dir
= os
.path
.join(public_dir
, 'keys')
124 def ensure_key(fingerprint
):
125 if fingerprint
in keys
:
127 key_path
= os
.path
.join(key_dir
, fingerprint
[-16:] + '.gpg')
128 child
= subprocess
.Popen(['gpg', '-a', '--export', fingerprint
], stdout
= subprocess
.PIPE
)
129 keydata
, unused
= child
.communicate()
130 stream
= file(key_path
, 'w')
131 stream
.write(keydata
)
133 print "Exported key", fingerprint
134 keys
.add(fingerprint
)
138 now
= format_date(time
.time())
139 news_feed
= AtomFeed(title
= "Zero Install News Feed",
140 link
= site_address
+ "/news-feed.xml",
143 warnings_feed
= AtomFeed(title
= "0mirror Warnings Feed",
144 link
= site_address
+ "/warnings.xml",
147 source
= warnings_file
)
150 return iface_cache
.get_feed(url
)
152 def load_feeds(feed_uris
):
153 logging
.getLogger("0install").setLevel(logging
.ERROR
)
157 for feed_url
in feed_uris
:
158 feeds
[feed_url
] = load_feed(feed_url
)
161 logging
.getLogger("0install").setLevel(logging
.WARNING
)
166 if not os
.path
.isdir(public_dir
):
167 raise SafeException("Public directory '%s' does not exist. "
168 "To setup a new site, create it as an empty directory now." % public_dir
)
169 if not os
.path
.isfile(feed_file
):
170 raise SafeException("File '%s' does not exist. It should contain a list of feed URLs, one per line" % feed_file
)
171 print "Reading", feed_file
173 lines
= filter(None, file(feed_file
).read().split('\n'))
174 feed_uris
= [line
for line
in lines
if not line
.startswith('-')]
175 feed_set
= set(feed_uris
)
176 ignore_set
= set(filter(None, file(ignore_file
).read().split('\n')))
177 inactive_set
= set(line
[1:] for line
in lines
if line
.startswith('-'))
179 known_set
= feed_set | inactive_set
181 stale_feeds
= [] # [(last-checked, feed)]
183 c
= config
.load_config()
185 indexer
= index
.Indexer(c
, search_index_dir
)
187 feeds
= load_feeds(feed_uris
)
189 def last_checked(feed
):
191 # If we've never downloaded this feed, just keep trying (ignore last_check_attempt)
193 # Use the latest of the last successful check or the last failed check
194 last_check_attempt
= iface_cache
.get_last_check_attempt(feed
.url
)
195 if not last_check_attempt
:
196 return feed
.last_checked
197 return max(feed
.last_checked
or 0, last_check_attempt
)
199 # List all the feeds, starting with the most stale
200 stale_feeds
= [(last_checked(feed
), url
, feed
) for url
, feed
in feeds
.items()]
203 # If we've got some completely new feeds, update all of them now
204 while n_feeds_to_update
< len(stale_feeds
) and stale_feeds
[n_feeds_to_update
- 1][0] in (0, None):
205 n_feeds_to_update
+= 1
207 # Update the first few feeds in the list
208 stale_feeds
= stale_feeds
[:n_feeds_to_update
]
209 for last_check
, feed_url
, feed
in stale_feeds
:
211 ctime_str
= time
.strftime('%Y-%m-%d_%H:%M', time
.gmtime(last_check
))
212 print "Feed %s last checked %s; updating..." % (feed_url
, ctime_str
)
214 print "Feed %s is new; fetching..." % feed_url
216 iface_cache
.mark_as_checking(feed_url
)
217 blocker
= c
.fetcher
.download_and_import_feed(feed_url
, iface_cache
)
219 tasks
.wait_for_blocker(wait_with_timeout(FEED_TIMEOUT
, blocker
))
220 except Exception, ex
:
221 add_warning("Error fetching feed", "Error fetching '%s': %s" % (feed_url
, ex
))
224 feed
= feeds
[feed_url
] = load_feed(feed_url
)
225 #assert feed.last_checked, feed
227 for feed_url
in feed_uris
:
228 info("Processing feed '%s'", feed_url
)
229 feed
= feeds
[feed_url
]
231 # Error during download?
232 add_warning("Fetch failed", "Attempted to fetch '%s', but still not cached" % feed_url
)
235 feed_dir
= os
.path
.join(public_dir
, get_feed_dir(feed_url
))
236 ensure_dirs(feed_dir
)
238 cached
= basedir
.load_first_cache(namespaces
.config_site
, 'interfaces', model
.escape(feed
.url
))
239 assert cached
is not None
241 for subfeed
in feed
.feeds
:
242 if subfeed
.uri
not in known_set
:
243 if subfeed
.uri
.startswith('/'):
245 if subfeed
.uri
not in ignore_set
:
246 add_warning("Missing subfeed", "WARNING: Subfeed %s of %s not in feeds list" % (subfeed
.uri
, feed
.get_name()))
249 for impl
in feed
.implementations
.values():
250 for dep
in impl
.requires
:
251 if dep
.interface
not in known_set
and dep
.interface
not in missing_set
:
252 add_warning("Missing dependency", "Version %s of %s depends on %s, but that isn't being mirrored!" % (impl
.get_version(), feed
.url
, dep
.interface
))
253 missing_set
.add(dep
.interface
)
256 break # Once we've warned about one version, don't check any other versions
258 style
= os
.path
.join(feed_dir
, 'interface.xsl')
259 if not os
.path
.islink(style
):
260 os
.symlink('../../../../feed_style.xsl', style
)
262 latest
= os
.path
.join(feed_dir
, 'latest.xml')
264 last_modified
= int(os
.stat(cached
).st_mtime
)
265 version_name
= time
.strftime('%Y-%m-%d_%H:%M.xml', time
.gmtime(last_modified
))
266 version_path
= os
.path
.join(feed_dir
, version_name
)
268 if os
.path
.islink(latest
) and os
.readlink(latest
) == version_name
:
269 if os
.path
.exists(version_path
):
271 warn("Broken symlink '%s'!", latest
)
274 stream
= file(cached
)
275 unused
, sigs
= gpg
.check_stream(stream
)
279 if isinstance(x
, gpg
.ValidSig
):
280 ensure_key(x
.fingerprint
)
282 add_warning("Signature problem", x
)
284 shutil
.copyfile(cached
, version_path
)
285 latest_new
= latest
+ '.new'
286 if os
.path
.exists(latest_new
):
287 os
.unlink(latest_new
)
288 os
.symlink(version_name
, latest_new
)
289 os
.rename(latest_new
, latest
)
290 sig_cache
.update(feed
.url
)
291 if not options
.reindex
: indexer
.update(feed_url
)
292 print "Updated %s to %s" % (feed
, version_name
)
296 for feed_url
in known_set
:
297 feed
= load_feed(feed_url
)
298 if feed
and feed
.last_modified
:
299 stats
.add_feed(feed
, feed_url
in feed_set
)
305 # Commit search db (whether we indexed everything, or just the updated feeds)
308 stats
.write_summary(public_dir
)
310 for pending_feed
in unconfirmed_keys
:
311 add_warning("Key awaiting confirmation",
312 "Feed: {feed}, Fingerprint: {fingerprint}".format(
313 feed
= pending_feed
.url
,
314 fingerprint
= pending_feed
.sigs
[0].fingerprint
))
318 for (title
, warning
) in warnings
:
319 summary
= minidom
.parseString(warnings_xml
)
320 div
= summary
.getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "div")[0]
321 div
.appendChild(summary
.createTextNode(warning
))
322 warnings_feed
.add_entry(title
= title
,
323 link
= site_address
+ "/warnings.xml",
324 entry_id
= "warning-" + now
+ '-%d' % i
,
326 summary
= summary
.documentElement
)
328 warnings_feed
.limit(20)
329 with
open(warnings_file
, 'w') as stream
:
330 warnings_feed
.save(stream
)
332 latest_feeds
= [(feed
.last_modified
, feed
) for feed
in feeds
.values() if feed
]
334 latest_feeds
= reversed(latest_feeds
[-16:])
335 for date
, feed
in latest_feeds
:
336 summary
= minidom
.parseString(summary_xml
)
337 set_element(summary
, "summary/div/a", feed
.get_name())
338 local_html_page
= site_address
+ "/" + get_feed_dir(feed
.url
).replace('#', '%23') + "/feed.html"
339 set_element(summary
, "summary/div/a/@href", local_html_page
)
340 set_element(summary
, "summary/div/span", feed
.summary
)
341 news_feed
.add_entry(title
= "%s feed updated" % feed
.get_name(),
342 link
= local_html_page
,
344 updated
= format_date(date
),
345 summary
= summary
.documentElement
)
347 news_stream
= codecs
.open(os
.path
.join(public_dir
, 'news-feed.xml'), 'w', encoding
= 'utf-8')
348 news_feed
.save(news_stream
)
352 # Warn about possible missing feeds...
353 child
= subprocess
.Popen(['0launch', '--list'], stdout
= subprocess
.PIPE
)
354 all_feeds
, unused
= child
.communicate()
355 all_feeds
= set([x
for x
in all_feeds
.split('\n') if x
and not x
.startswith('/')])
356 unknown
= all_feeds
- known_set
359 print "\nUnknown feeds (add to known or ignore lists):"
360 for feed
in sorted(unknown
):
361 if '/tests/' in feed
: continue
365 print "\nMissing feeds:"
366 for x
in missing_set
:
369 except KeyboardInterrupt, ex
:
370 print >>sys
.stderr
, "Aborted at user's request"
372 except SafeException
, ex
:
373 if options
.verbose
: raise
374 print >>sys
.stderr
, ex
376 print "(while processing %s)" % feed