try ripping out expensive computations
[trackgit.git] / notes.py
blobf5607be4bb2a433ea437de62e45eca5396528570
1 #!/usr/bin/python
3 import os
4 import sys
5 import time
6 import re
7 from collections import defaultdict
8 from sqlalchemy.sql import and_
9 from sqlalchemy.orm import join
11 import db
12 from git import git
14 _eol_space_re = re.compile('[ \t]+$', re.MULTILINE)
16 class Notes(object):
18 def __init__(self, refname, indexname):
19 self._indexname = indexname
20 self._env = { 'GIT_INDEX_FILE': '.git/'+indexname }
21 self._ref = refname
22 self._reset()
24 def _reset(self):
25 self._cache = defaultdict(list)
26 self._index = {}
28 def extend(self, sha1, seq):
29 for line in seq:
30 self.append_line(sha1, line)
32 def append_line(self, sha1, line):
33 self._cache[sha1].append(line)
35 def __contains__(self, sha1):
36 return sha1 in self._cache
38 def flush(self):
39 try:
40 os.unlink(self._indexname)
41 except OSError:
42 pass
43 count = len(self._cache)
44 input = []
45 for cmt_sha1 in self._cache.iterkeys():
46 count = count - 1
47 sys.stdout.write('%6d\r' % count)
48 sys.stdout.flush()
49 notes = ''.join(self._cache[cmt_sha1]).strip('\n')
50 notes = _eol_space_re.sub('', notes) + '\n'
51 blob_sha1 = git('hash-object', '-w', '--stdin', input=notes)[0].strip()
52 input.append("100644 %s\t%s\n" % (blob_sha1, cmt_sha1))
53 sys.stdout.write('\n')
54 git('update-index', '--index-info', input=''.join(input), env=self._env)
55 previous, ret = git('rev-parse', self._ref)
56 if ret != 0:
57 args = []
58 previous_arg = []
59 else:
60 args = ['-p', previous.strip()]
61 previous_arg = [previous.strip()]
62 tree_sha1 = git('write-tree', env=self._env)[0].strip()
63 head_sha1 = git('commit-tree', tree_sha1, *args,
64 **{'input':'Mass annotation by notes.py'})[0].strip()
65 git('update-ref', '-m', 'Mass annotation by notes.py',
66 self._ref, head_sha1, *previous_arg)
67 self._reset()
69 def split_and_tab(buf):
70 ret = []
71 for line in str(buf).splitlines():
72 ret.append('\t%s\n' % line)
73 return ret
75 def compute_notes(commit, mail, guess_by=None):
76 full = []
77 terse = []
78 if mail.author:
79 full.append('From: %s\n' % mail.author)
80 if mail.subject:
81 full.append('Subject: %s\n' % mail.subject)
82 full.append('Message-Id: <%s>\n' % mail.message_id)
83 terse.append('Message-Id: <%s>\n' % mail.message_id)
84 full.append('Date: %s\n'
85 % time.strftime("%c", time.localtime(mail.post_date)))
86 if mail.in_reply_to:
87 full.append('In-Reply-To: <%s>\n' % mail.in_reply_to)
88 if mail.gmane_id:
89 full.append('Archived-At: <http://permalink.gmane.org/gmane.comp.version-control.git/%d>\n' % mail.gmane_id)
90 terse.append('Archived-At: <http://permalink.gmane.org/gmane.comp.version-control.git/%d>\n' % mail.gmane_id)
91 if len(mail.patch)>0 and mail.patch[0].extra_notes:
92 full.append('Extra-Notes:\n')
93 full.extend(split_and_tab(mail.patch[0].extra_notes))
94 full.append('\n')
95 terse.append('\n')
96 return full, terse
98 _merge_re = re.compile("^([a-f0-9]{40}) Merge branch '([^']+)'")
99 def _redo_pu(full, terse):
100 pu_ref = git('rev-parse', 'origin/pu')[0].strip()
101 pu_topic = db.session.query(db.Topic).filter(db.Topic.name == 'pu').first()
102 if pu_topic:
103 full.append_line(pu_ref, 'Pu-Overview:\n')
104 full.extend(pu_ref, split_and_tab(pu_topic.cooking_notes))
105 full.append_line(pu_ref, '\n')
106 for line in git('log', '--first-parent', '--pretty=tformat:%H %s',
107 'origin/master..origin/pu', ret_pipe=True):
108 m = _merge_re.match(line)
109 if not m:
110 continue
111 sha1 = m.group(1)
112 branch = m.group(2)
113 t = db.session.query(db.Topic).filter(db.Topic.name == branch).first()
114 if t and t.cooking_notes:
115 full.append_line(sha1, 'Pu-Topic:\n')
116 full.extend(sha1, split_and_tab(t.cooking_notes))
117 full.append_line(sha1, '\n')
119 def _redo_for_query(full, terse, query, skip_if_exists=False, guess_by=None):
120 count = 0
121 for cmt, mail in query:
122 if skip_if_exists and cmt.sha1 in full:
123 continue
124 f, t = compute_notes(cmt, mail, guess_by=guess_by)
125 if not (f or t):
126 continue
127 if f:
128 full.extend(cmt.sha1, f)
129 if t:
130 terse.extend(cmt.sha1, t)
131 sys.stdout.write('\r%6d' % count)
132 sys.stdout.flush()
133 count = count + 1
134 sys.stdout.write('\n')
136 def _redo_patches(full, terse):
137 # matching by patch-id
138 query = (db.session.query(db.Commit, db.Mail)
139 .select_from(join(db.Mail, db.Commit,
140 db.Mail.patch_id==db.Commit.patch_id))
141 .filter(db.Commit.upstream==True)
142 .filter(db.Mail.has_patch==True)
143 .order_by(db.Mail.post_date))
144 _redo_for_query(full, terse, query)
145 # matching by author/date
146 query = (db.session.query(db.Commit, db.Mail)
147 .select_from(join(db.Mail, db.Commit,
148 and_(db.Mail.author==db.Commit.author,
149 db.Mail.post_date==db.Commit.adate)))
150 .filter(db.Commit.upstream==True)
151 .filter(db.Mail.has_patch==True))
152 _redo_for_query(full, terse, query, True, guess_by='author,date')
154 def _redo_all():
155 count = 0
156 full = Notes('refs/heads/notes/full', 'git-full-notes-index')
157 terse = Notes('refs/heads/notes/terse', 'git-terse-notes-index')
158 _redo_patches(full, terse)
159 _redo_pu(full, terse)
160 full.flush()
161 terse.flush()
163 if __name__ == '__main__':
164 if len(sys.argv) > 1:
165 cmt = db.session.query(db.Commit).filter(db.Commit.sha1==sys.argv[1]).one()
166 print compute_notes(cmt)
167 else:
168 _redo_all()