Convert all warnings to strings
[0mirror.git] / stats.py
blob334b0943f785bcd4e587d5f74ad0cd0f07b4e331
1 # Copyright (C) 2010, Thomas Leonard
2 # See the COPYING file for details, or visit http://0install.net.
4 import os, time, codecs, json
5 import xml.etree.ElementTree as ET
7 from zeroinstall.injector.iface_cache import iface_cache
8 from zeroinstall.injector import gpg, trust, namespaces, model, qdom
9 from zeroinstall.support import basedir
11 from support import ensure_dirs, get_feed_dir
13 def format_date(date):
14 return time.strftime("%Y-%m-%d", time.gmtime(date))
16 # When people change keys, add a mapping so that their new feeds appear under the same user
17 # TODO: this should be site configuration
18 aliases = {
19 # New key Original key
20 '617794D7C3DFE0FFF572065C0529FDB71FB13910' : '92429807C9853C0744A68B9AAE07828059A53CC1',
21 '6AD4A9C482F1D3F537C0354FC8CC44742B11FF89' : 'FD3208AD535F2B63BCEDB2BFFB013BAB74FFF135',
22 '1DFE86921CBA7BCB691DA2434F5A1693E18E1E91' : '0C5C7BC77B70E7BA813478B6FF29FF60ACB8DFE8',
23 '2E2B4E59CAC8D874CD2759D34B1095AF2E992B19' : 'C82D382AAB381A54529019D6A0F9B035686C6996',
24 'DA9825AECAD089757CDABD8E07133F96CA74D8BA' : '92429807C9853C0744A68B9AAE07828059A53CC1',
25 '7722DC5085B903FF176CCAA9695BA303C9839ABC' : '03DC5771716A5A329CA97EA64AB8A8E7613A266F',
26 '39AD3DDE2B988623D7F868591C319390658A683A' : 'D30B76E435BD65448F2A57C7B8E1967CBF45481E',
27 '4CFBD0B5B7102BF66E9F12AEFBCAE33FC2DE322B' : '92429807C9853C0744A68B9AAE07828059A53CC1',
28 'FA2577C515715EEE1261D3B0EFD438E5019F0846' : '7EADC3F1EFE150C371EDE0A15B5CB97421BAA5DC',
31 reverse_aliases = {} # user ID -> list of their other keys
32 for new, original in aliases.iteritems():
33 if original not in reverse_aliases:
34 reverse_aliases[original] = []
35 reverse_aliases[original].append(new)
37 # Feeds with these keys must not be mirrored
38 test_keys = set()
39 test_keys.add('5E22F6A13A76F396AC68B5F29B1F5D7F9721DA90')
40 test_keys.add('2E32123D8BE241A3B6D91E0301685F11607BB2C5')
42 def make_feed_element(parent, feed, active):
43 feed_element = ET.SubElement(parent, 'feed')
44 feed_element.attrib['active'] = str(active)
45 feed_element.attrib['local-dir'] = get_feed_dir(feed.url).replace('#', '%23')
46 feed_element.attrib['url'] = feed.url
47 feed_element.attrib['name'] = feed.get_name()
48 feed_element.attrib['implementations'] = str(count_impls(feed.url))
49 feed_element.attrib['last-modified'] = format_date(feed.last_modified)
50 feed_element.attrib['summary'] = feed.summary
52 def contents(path):
53 if not os.path.exists(path):
54 return None
55 with open(path) as stream:
56 return stream.read()
58 def write_if_changed(xml, path):
59 new = path + '.new'
60 xml.write(new, encoding='utf-8')
61 if contents(path) == contents(new):
62 os.unlink(new)
63 else:
64 os.rename(new, path)
65 print "Updated", path
67 cached_counts = {}
68 def count_impls(url):
69 if url not in cached_counts:
70 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(url))
71 if cached:
72 with open(cached) as stream:
73 cached_doc = qdom.parse(stream)
74 def count(elem):
75 c = 0
76 if elem.uri != namespaces.XMLNS_IFACE: return 0
77 if elem.name == 'implementation' or elem.name == 'package-implementation':
78 c += 1
79 else:
80 for child in elem.childNodes:
81 c += count(child)
82 return c
83 cached_counts[url] = count(cached_doc)
84 else:
85 cached_counts[url] = 0
86 return cached_counts[url]
88 class User:
89 def __init__(self):
90 self.feeds = {}
91 self.last_active = None
92 self.n_feeds = 0
93 self.n_implementations = 0
94 self.n_inactive = 0
95 self.key = None
97 def add_feed(self, feed, sig, active):
98 assert feed not in self.feeds, feed
99 self.feeds[feed] = active
100 mtime = sig["date"]
101 if self.last_active is None or self.last_active < mtime:
102 self.last_active = mtime
103 if active:
104 self.n_feeds += 1
105 self.n_implementations += count_impls(feed.url)
106 else:
107 self.n_inactive += 1
109 def as_xml(self, user_keys):
110 root = ET.Element('user')
112 name = ET.SubElement(root, 'name')
113 name.text = self.key.get_short_name()
115 feeds = ET.SubElement(root, 'feeds')
117 sorted_feeds = sorted([(feed.get_name().lower(), feed) for feed in self.feeds.keys()])
118 for unused, feed in sorted_feeds:
119 make_feed_element(feeds, feed, self.feeds[feed])
121 stats = ET.SubElement(root, 'stats')
122 stats.attrib['feeds'] = str(self.n_feeds)
123 stats.attrib['implementations'] = str(self.n_implementations)
124 if self.n_inactive:
125 stats.attrib['inactive_feeds'] = str(self.n_inactive)
126 stats.attrib['karma'] = str(self.get_karma())
128 keys = ET.SubElement(root, 'keys')
129 for key in user_keys:
130 key_elem = ET.SubElement(keys, 'key')
131 key_elem.attrib['name'] = key.get_short_name()
132 key_elem.attrib['fingerprint'] = key.fingerprint
133 key_elem.attrib['keyid'] = key.fingerprint[-16:]
135 return ET.ElementTree(root)
137 def get_karma(self):
138 return 10 * self.n_feeds + self.n_implementations + self.n_inactive
140 def export_users(pairs):
141 root = ET.Element('users')
142 for karma, user in pairs:
143 elem = ET.SubElement(root, "user")
144 elem.attrib["name"] = user.key.get_short_name()
145 elem.attrib["karma"] = str(karma)
146 elem.attrib["uid"] = user.key.fingerprint
147 return ET.ElementTree(root)
149 def export_sites(tuples):
150 root = ET.Element('sites')
151 for n_feeds, domain, feeds in tuples:
152 elem = ET.SubElement(root, "site")
153 elem.attrib["name"] = domain
154 elem.attrib["feeds"] = str(n_feeds)
155 elem.attrib["site-path"] = 'sites/site-%s.html' % domain
156 return ET.ElementTree(root)
159 sig_file = 'sig.cache'
161 class SigCache:
162 """Remembers the signing key of each feed, so we don't have to keep running GPG all the time."""
163 def __init__(self):
164 if os.path.exists(sig_file):
165 with open(sig_file, 'rt') as stream:
166 self.url_to_sig_data = json.load(stream)
167 else:
168 self.url_to_sig_data = {}
170 def get(self, url):
171 if url not in self.url_to_sig_data:
172 sigs = iface_cache.get_cached_signatures(url)
173 sig_data = []
175 for sig in sigs or []:
176 if isinstance(sig, gpg.ValidSig):
177 sig_data.append({
178 "fingerprint": sig.fingerprint,
179 "date": sig.get_timestamp()
181 else:
182 sig_data.append({
183 "error": unicode(sig)
185 self.url_to_sig_data[url] = sig_data
186 return self.url_to_sig_data[url]
188 def update(self, url):
189 if url in self.url_to_sig_data:
190 del self.url_to_sig_data[url]
191 self.get(url)
193 def save(self):
194 with open(sig_file + '.new', 'wt') as stream:
195 json.dump(self.url_to_sig_data, stream)
196 os.rename(sig_file + '.new', sig_file)
198 sig_cache = SigCache()
200 """Keep track of some statistics."""
201 class Stats:
202 def __init__(self):
203 self.users = {} # Fingerprint -> User
204 self.sites = {} # Domain -> [Feed]
205 self.feeds = []
206 self.active = {} # Feed -> bool
208 def add_feed(self, feed, active):
209 self.active[feed] = active
211 metadata = ET.Element('metadata')
212 metadata.attrib["active"] = str(active)
214 sig_data = sig_cache.get(feed.url)
216 for sig in sig_data:
217 fingerprint = sig.get("fingerprint")
218 if fingerprint is None:
219 signer = ET.SubElement(metadata, "signer")
220 signer.attrib["error"] = sig["error"]
221 else:
222 fingerprint = aliases.get(fingerprint, fingerprint)
223 assert fingerprint not in aliases, fingerprint
224 if active:
225 assert fingerprint not in test_keys, (fingerprint, feed)
226 if fingerprint not in self.users:
227 self.users[fingerprint] = User()
228 self.users[fingerprint].add_feed(feed, sig, active)
230 signer = ET.SubElement(metadata, "signer")
231 signer.attrib["user"] = fingerprint
232 signer.attrib["date"] = format_date(sig["date"])
234 domain = trust.domain_from_url(feed.url)
235 if domain not in self.sites:
236 self.sites[domain] = []
237 self.sites[domain].append(feed)
239 self.feeds.append((feed, metadata))
241 def write_summary(self, topdir):
242 sig_cache.save()
244 names = []
245 keys = gpg.load_keys(self.users.keys() + aliases.keys())
246 top_users = []
247 for fingerprint, user in self.users.iteritems():
248 user.key = keys[fingerprint]
249 try:
250 # 0launch <= 0.45 doesn't returns names in unicode
251 unicode(user.key.name)
252 except:
253 user.key.name = codecs.decode(user.key.name, 'utf-8')
254 names.append((user.key.name, fingerprint))
255 for name, fingerprint in sorted(names):
256 user = self.users[fingerprint]
257 user_dir = ensure_dirs(os.path.join(topdir, 'users', fingerprint))
259 user_keys = [fingerprint] + reverse_aliases.get(fingerprint, [])
260 user_xml = user.as_xml([keys[k] for k in user_keys])
261 write_if_changed(user_xml, os.path.join(user_dir, 'user.xml'))
262 top_users.append((user.get_karma(), user))
264 users_xml = export_users(reversed(sorted(top_users)))
265 write_if_changed(users_xml, os.path.join(topdir, 'top-users.xml'))
267 for domain, feeds in self.sites.iteritems():
268 site = ET.Element('site')
269 site.attrib["name"] = domain
270 feeds_elem = ET.SubElement(site, "feeds")
271 sorted_feeds = sorted([(feed.get_name().lower(), feed) for feed in feeds])
272 for name, feed in sorted_feeds:
273 make_feed_element(feeds_elem, feed, self.active[feed])
274 site_xml = ET.ElementTree(site)
275 write_if_changed(site_xml, os.path.join(topdir, 'sites', 'site-%s.xml' % domain))
277 top_sites = [(len(feeds), domain, feeds) for domain, feeds in self.sites.iteritems()]
278 sites_xml = export_sites(reversed(sorted(top_sites)))
279 write_if_changed(sites_xml, os.path.join(topdir, 'top-sites.xml'))
281 for feed, metadata in self.feeds:
282 for signer in metadata.findall("signer"):
283 if "user" in signer.attrib:
284 fingerprint = signer.attrib["user"]
285 user = self.users[fingerprint]
286 signer.attrib["name"] = user.key.get_short_name()
288 metadata_xml = ET.ElementTree(metadata)
289 feed_dir = get_feed_dir(feed.url)
290 write_if_changed(metadata_xml, os.path.join(topdir, feed_dir, 'metadata.xml'))