Fixed encoding problem in error handling
[0mirror.git] / stats.py
blob79a8aa4a98be2a7236667b36928f19fc41ef629f
1 # Copyright (C) 2010, Thomas Leonard
2 # See the COPYING file for details, or visit http://0install.net.
4 import os, time, codecs
5 import xml.etree.ElementTree as ET
7 from zeroinstall.injector.iface_cache import iface_cache
8 from zeroinstall.injector import gpg, trust, namespaces, model, qdom
9 from zeroinstall.support import basedir
11 from support import ensure_dirs, get_feed_dir
13 def format_date(date):
14 return time.strftime("%Y-%m-%d", time.gmtime(date))
16 # When people change keys, add a mapping so that their new feeds appear under the same user
17 # TODO: this should be site configuration
18 aliases = {
19 # New key Original key
20 '617794D7C3DFE0FFF572065C0529FDB71FB13910' : '92429807C9853C0744A68B9AAE07828059A53CC1',
21 '6AD4A9C482F1D3F537C0354FC8CC44742B11FF89' : 'FD3208AD535F2B63BCEDB2BFFB013BAB74FFF135',
22 '1DFE86921CBA7BCB691DA2434F5A1693E18E1E91' : '0C5C7BC77B70E7BA813478B6FF29FF60ACB8DFE8',
23 '2E2B4E59CAC8D874CD2759D34B1095AF2E992B19' : 'C82D382AAB381A54529019D6A0F9B035686C6996',
24 'DA9825AECAD089757CDABD8E07133F96CA74D8BA' : '92429807C9853C0744A68B9AAE07828059A53CC1',
25 '7722DC5085B903FF176CCAA9695BA303C9839ABC' : '03DC5771716A5A329CA97EA64AB8A8E7613A266F',
26 '39AD3DDE2B988623D7F868591C319390658A683A' : 'D30B76E435BD65448F2A57C7B8E1967CBF45481E',
29 reverse_aliases = {} # user ID -> list of their other keys
30 for new, original in aliases.iteritems():
31 if original not in reverse_aliases:
32 reverse_aliases[original] = []
33 reverse_aliases[original].append(new)
35 # Feeds with these keys must not be mirrored
36 test_keys = set()
37 test_keys.add('5E22F6A13A76F396AC68B5F29B1F5D7F9721DA90')
38 test_keys.add('2E32123D8BE241A3B6D91E0301685F11607BB2C5')
40 def make_feed_element(parent, feed, active):
41 feed_element = ET.SubElement(parent, 'feed')
42 feed_element.attrib['active'] = str(active)
43 feed_element.attrib['local-dir'] = get_feed_dir(feed.url).replace('#', '%23')
44 feed_element.attrib['url'] = feed.url
45 feed_element.attrib['name'] = feed.get_name()
46 feed_element.attrib['implementations'] = str(count_impls(feed.url))
47 feed_element.attrib['last-modified'] = format_date(feed.last_modified)
48 feed_element.attrib['summary'] = feed.summary
50 def contents(path):
51 if not os.path.exists(path):
52 return None
53 with open(path) as stream:
54 return stream.read()
56 def write_if_changed(xml, path):
57 new = path + '.new'
58 xml.write(new, encoding='utf-8')
59 if contents(path) == contents(new):
60 os.unlink(new)
61 else:
62 os.rename(new, path)
63 print "Updated", path
65 cached_counts = {}
66 def count_impls(url):
67 if url not in cached_counts:
68 cached = basedir.load_first_cache(namespaces.config_site, 'interfaces', model.escape(url))
69 if cached:
70 with open(cached) as stream:
71 cached_doc = qdom.parse(stream)
72 def count(elem):
73 c = 0
74 if elem.uri != namespaces.XMLNS_IFACE: return 0
75 if elem.name == 'implementation' or elem.name == 'package-implementation':
76 c += 1
77 else:
78 for child in elem.childNodes:
79 c += count(child)
80 return c
81 cached_counts[url] = count(cached_doc)
82 else:
83 cached_counts[url] = 0
84 return cached_counts[url]
86 class User:
87 def __init__(self):
88 self.feeds = {}
89 self.last_active = None
90 self.n_feeds = 0
91 self.n_implementations = 0
92 self.n_inactive = 0
93 self.key = None
95 def add_feed(self, feed, sig, active):
96 assert feed not in self.feeds, feed
97 self.feeds[feed] = active
98 mtime = sig.get_timestamp()
99 if self.last_active is None or self.last_active < mtime:
100 self.last_active = mtime
101 if active:
102 self.n_feeds += 1
103 self.n_implementations += count_impls(feed.url)
104 else:
105 self.n_inactive += 1
107 def as_xml(self, user_keys):
108 root = ET.Element('user')
110 name = ET.SubElement(root, 'name')
111 name.text = self.key.get_short_name()
112 import codecs
114 feeds = ET.SubElement(root, 'feeds')
116 sorted_feeds = sorted([(feed.get_name().lower(), feed) for feed in self.feeds.keys()])
117 for unused, feed in sorted_feeds:
118 make_feed_element(feeds, feed, self.feeds[feed])
120 stats = ET.SubElement(root, 'stats')
121 stats.attrib['feeds'] = str(self.n_feeds)
122 stats.attrib['implementations'] = str(self.n_implementations)
123 if self.n_inactive:
124 stats.attrib['inactive_feeds'] = str(self.n_inactive)
125 stats.attrib['karma'] = str(self.get_karma())
127 keys = ET.SubElement(root, 'keys')
128 for key in user_keys:
129 key_elem = ET.SubElement(keys, 'key')
130 key_elem.attrib['name'] = key.get_short_name()
131 key_elem.attrib['fingerprint'] = key.fingerprint
132 key_elem.attrib['keyid'] = key.fingerprint[-16:]
134 return ET.ElementTree(root)
136 def get_karma(self):
137 return 10 * self.n_feeds + self.n_implementations + self.n_inactive
139 def export_users(pairs):
140 root = ET.Element('users')
141 for karma, user in pairs:
142 elem = ET.SubElement(root, "user")
143 elem.attrib["name"] = user.key.get_short_name()
144 elem.attrib["karma"] = str(karma)
145 elem.attrib["uid"] = user.key.fingerprint
146 return ET.ElementTree(root)
148 def export_sites(tuples):
149 root = ET.Element('sites')
150 for n_feeds, domain, feeds in tuples:
151 elem = ET.SubElement(root, "site")
152 elem.attrib["name"] = domain
153 elem.attrib["feeds"] = str(n_feeds)
154 elem.attrib["site-path"] = 'sites/site-%s.html' % domain
155 return ET.ElementTree(root)
157 """Keep track of some statistics."""
158 class Stats:
159 def __init__(self):
160 self.users = {} # Fingerprint -> User
161 self.sites = {} # Domain -> [Feed]
162 self.feeds = []
163 self.active = {} # Feed -> bool
165 def add_feed(self, feed, active):
166 self.active[feed] = active
168 metadata = ET.Element('metadata')
169 metadata.attrib["active"] = str(active)
171 sigs = iface_cache.get_cached_signatures(feed.url)
173 for sig in sigs or []:
174 if isinstance(sig, gpg.ValidSig):
175 fingerprint = aliases.get(sig.fingerprint, sig.fingerprint)
176 assert fingerprint not in aliases, fingerprint
177 if active:
178 assert fingerprint not in test_keys, (fingerprint, feed)
179 if fingerprint not in self.users:
180 self.users[fingerprint] = User()
181 self.users[fingerprint].add_feed(feed, sig, active)
183 signer = ET.SubElement(metadata, "signer")
184 signer.attrib["user"] = fingerprint
185 signer.attrib["date"] = format_date(sig.get_timestamp())
186 else:
187 signer = ET.SubElement(metadata, "signer")
188 signer.attrib["error"] = unicode(sig)
190 domain = trust.domain_from_url(feed.url)
191 if domain not in self.sites:
192 self.sites[domain] = []
193 self.sites[domain].append(feed)
195 self.feeds.append((feed, metadata))
197 def write_summary(self, topdir):
198 names = []
199 keys = gpg.load_keys(self.users.keys() + aliases.keys())
200 top_users = []
201 for fingerprint, user in self.users.iteritems():
202 user.key = keys[fingerprint]
203 try:
204 # 0launch <= 0.45 doesn't returns names in unicode
205 unicode(user.key.name)
206 except:
207 user.key.name = codecs.decode(user.key.name, 'utf-8')
208 names.append((user.key.name, fingerprint))
209 for name, fingerprint in sorted(names):
210 user = self.users[fingerprint]
211 user_dir = ensure_dirs(os.path.join(topdir, 'users', fingerprint))
213 user_keys = [fingerprint] + reverse_aliases.get(fingerprint, [])
214 user_xml = user.as_xml([keys[k] for k in user_keys])
215 write_if_changed(user_xml, os.path.join(user_dir, 'user.xml'))
216 top_users.append((user.get_karma(), user))
218 users_xml = export_users(reversed(sorted(top_users)))
219 write_if_changed(users_xml, os.path.join(topdir, 'top-users.xml'))
221 for domain, feeds in self.sites.iteritems():
222 site = ET.Element('site')
223 site.attrib["name"] = domain
224 feeds_elem = ET.SubElement(site, "feeds")
225 sorted_feeds = sorted([(feed.get_name().lower(), feed) for feed in feeds])
226 for name, feed in sorted_feeds:
227 make_feed_element(feeds_elem, feed, self.active[feed])
228 site_xml = ET.ElementTree(site)
229 write_if_changed(site_xml, os.path.join(topdir, 'sites', 'site-%s.xml' % domain))
231 top_sites = [(len(feeds), domain, feeds) for domain, feeds in self.sites.iteritems()]
232 sites_xml = export_sites(reversed(sorted(top_sites)))
233 write_if_changed(sites_xml, os.path.join(topdir, 'top-sites.xml'))
235 for feed, metadata in self.feeds:
236 for signer in metadata.findall("signer"):
237 if "user" in signer.attrib:
238 fingerprint = signer.attrib["user"]
239 user = self.users[fingerprint]
240 signer.attrib["name"] = user.key.get_short_name()
242 metadata_xml = ET.ElementTree(metadata)
243 feed_dir = get_feed_dir(feed.url)
244 write_if_changed(metadata_xml, os.path.join(topdir, feed_dir, 'metadata.xml'))