Use 'am -3' to apply patches
[trackgit.git] / mail.py
blob194eb37626384ea064078e242441889e73c79d1d
1 #!/usr/bin/python
3 import sys
4 import re
5 import time
6 import random
7 import mailbox
8 import email.Iterators
9 import email.Parser
10 import email.utils
11 import sqlalchemy
12 from sqlalchemy.orm import join
13 import cStringIO as StringIO
15 import db
16 import patch
17 from git import git
18 from blobtracker import BlobTracker
19 import dbcache
21 _msg_id_regex = re.compile(r'<([^<>]+)>')
22 def _parse_msg_id(str):
23 m = _msg_id_regex.search(str)
24 if m:
25 return m.group(1)
27 parser = email.Parser.Parser()
29 def _detect_reply_id(msg):
30 if msg['In-Reply-To']:
31 return _parse_msg_id(msg['In-Reply-To'])
32 if msg['References']:
33 refs = ' '.join(msg.get_all('References'))
34 ref_ids = [m.group(1) for m in _msg_id_regex.finditer(refs)]
35 return ref_ids[-1]
37 def _get_text_payloads(msg):
38 if not msg.is_multipart():
39 yield msg.get_payload()
40 return
41 for part in email.Iterators.typed_subpart_iterator(msg):
42 if part.is_multipart():
43 yield part.get_payload(0)
44 else:
45 yield part.get_payload()
47 _format_patch_regex = re.compile('.*^---$.*^diff --git', re.MULTILINE|re.DOTALL)
48 _snip_patch_regex = re.compile('.*^-+ ?(?:8<|>8) ?-+\n(.*^diff --git.*)',
49 re.MULTILINE|re.DOTALL)
50 def _guess_patch_contents(msg):
51 for p in _get_text_payloads(msg):
52 if _format_patch_regex.match(p):
53 msg.set_payload(p)
54 return msg.as_string()
55 m = _snip_patch_regex.match(p)
56 if m:
57 msg.set_payload(m.group(1))
58 return msg.as_string()
59 # no patch found
60 return None
62 def try_patch(m, pp, base_sha1):
63 git('checkout', base_sha1)
64 try:
65 pp.apply()
66 except patch.PatchError:
67 return # failed
68 pipe = git('show', ret_pipe=True)
69 output = git('patch-id', input_pipe=pipe)[0]
70 if not output:
71 # this means the patch had no diff; e.g., a mode change
72 return
73 patch_id, commit_id = output.split()
74 c = db.session.query(db.Commit).filter(db.Commit.sha1 == commit_id).first()
75 if not c:
76 new_commit = True
77 output = git('log', '-1', '--pretty=format:%cD\t%aD\t%an <%ae>', sha1)[0]
78 adate_s, cdate_s, author = output.split('\t', 2)
79 adate = email.utils.mktime_tz(email.utils.parsedate_tz(adate_s))
80 cdate = email.utils.mktime_tz(email.utils.parsedate_tz(cdate_s))
81 try:
82 author = author.decode('utf8')
83 except UnicodeDecodeError:
84 author = author.decode('latin1')
85 c = db.Commit(commit_id, cdate, adate, author, patch_id, False)
86 blobtracker.scan_commit_tree(c)
87 p = db.Patch(c, m.id, pp.notes)
88 db.session.add(p)
89 return p
91 def find_blobs(upstream, blobs):
92 commits = []
93 for prefix in blobs:
94 ret = (db.session.query(db.Blob, db.Commit)
95 .filter(db.Blob.newest_commit_sha1 == db.Commit.sha1)
96 .filter(db.Blob.sha1.like(prefix+'%'))
97 .filter(db.Commit.upstream == upstream)
98 .order_by(db.Commit.cdate.desc()).first())
99 if not ret:
100 print 'blob %s not found?!' % prefix
101 break
102 commits.append(ret[1])
103 else:
104 if commits:
105 # all blobs found
106 cmt = min(commits)
107 return cmt
109 def try_patch_anywhere(msg, m):
110 print '*', m.message_id
111 pdata = _guess_patch_contents(msg)
112 if not pdata:
113 return
114 pp = patch.Patch(pdata)
115 if pp.missing_files:
116 m.has_patch = False
117 return # probably for another project
118 # perhaps we already know about this patch id
119 patch_id = pp.fast_patch_id()
120 if patch_id:
121 cmt = db.query(db.Commit).filter(db.Commit.patch_id==patch_id).first()
122 if cmt:
123 p = db.Patch(cmt, m.id, pp.notes)
124 db.session.add(p)
125 return p
126 # first try on the commit given by the blobs
127 cmt = find_blobs(True, pp.blobs_pre)
128 if cmt:
129 print 'trying canonical commit %s (upstream)' % cmt.sha1
130 applied = try_patch(m, pp, cmt.sha1)
131 if applied:
132 return applied
133 # this is just hopeless: it doesn't apply to the commit it should!
134 return
135 else:
136 print "no canonical commit found (upstream)"
138 cmt = find_blobs(False, pp.blobs_pre)
139 if cmt:
140 print 'trying canonical commit %s (local)' % cmt.sha1
141 applied = try_patch(m, pp, cmt.sha1)
142 if applied:
143 return applied
144 # this is just hopeless: it doesn't apply to the commit it should!
145 return
146 else:
147 print "no canonical commit found (local)"
148 # if we have a parent, try on the parent
149 parent = db.session.query(db.Mail).filter(db.Mail.message_id==m.in_reply_to).first()
150 if parent and parent.has_patch and parent.patch_id:
151 cmt = (db.session.query(db.Commit)
152 .filter(db.Commit.patch_id==parent.patch_id)
153 .order_by(db.Commit.cdate.desc()).first())
154 print 'trying to apply on parent %s' % cmt.sha1
155 applied = try_patch(m, pp, cmt.sha1)
156 if applied:
157 return applied
158 else:
159 print "no parent commit found"
160 # try on origin/master
161 sha1 = git('rev-list', '--first-parent', '-1', '--before=%d' % m.post_date,
162 'origin/master')[0].strip()
163 if not sha1:
164 sha1 = 'origin/master'
165 print 'trying on master at time of patch (%s)' % sha1
166 applied = try_patch(m, pp, sha1)
167 if applied:
168 return applied
169 # same for origin/next
170 print 'trying on origin/next'
171 applied = try_patch(m, pp, 'origin/next')
172 if applied:
173 return applied
174 # all out of ideas!
176 _whats_cooking_subject = re.compile(r"^What's cooking in git\.git")
177 _whats_cooking_category = re.compile(r"^\[(.*)\]$")
178 _whats_cooking_header = re.compile(r"\* (../[a-zA-Z0-9-]+) \([^)]*\) \d+ commits?")
179 _whats_cooking_separator = re.compile(r"^(-{5,}|-- )$")
181 def parse_whats_cooking(msg, mail):
182 if not (msg["Subject"] and _whats_cooking_subject.match(msg["Subject"])):
183 return
184 category = None
185 branch = 'pu' # initial part goes on 'pu'
186 notes = []
187 def _rotate_notes(category, branch, notes):
188 if branch:
189 t = db.session.query(db.Topic).filter(db.Topic.name==branch).first()
190 if not t:
191 t = db.Topic()
192 t.name = branch
193 db.session.add(t)
194 t.mail_id = mail.id
195 t.cooking_notes = '\n'.join(notes)
196 notes = []
197 if category:
198 notes.append("[%s]" % category)
199 return notes
200 text = ''.join(_get_text_payloads(msg))
201 for line in text.splitlines():
202 if _whats_cooking_separator.match(line):
203 category = None
204 notes = _rotate_notes(category, branch, notes)
205 branch = None
206 continue
207 m = _whats_cooking_category.match(line)
208 if m:
209 category = m.group(1)
210 notes = _rotate_notes(category, branch, notes)
211 continue
212 m = _whats_cooking_header.match(line)
213 if m:
214 notes = _rotate_notes(category, branch, notes)
215 notes.append(line)
216 branch = m.group(1)
217 continue
218 notes.append(line)
221 def process_mail(mail):
222 msg = parser.parse(StringIO.StringIO(mail.data))
223 parse_whats_cooking(msg, mail)
224 patch = try_patch_anywhere(msg, mail)
225 if patch:
226 mail.patch_id = patch.commit.patch_id
227 mail.stale = False
230 def _query_stale_mail():
231 return (db.session.query(db.Mail)
232 .filter(db.Mail.stale == True)
233 .order_by(db.Mail.post_date.asc(), db.Mail.subject.asc()))
235 def walk_stale_mail():
236 global blobtracker
237 blobtracker = BlobTracker()
238 count = _query_stale_mail().count()
239 for mail in _query_stale_mail():
240 print "** %6d\n" % count
241 process_mail(mail)
242 count = count - 1
245 def get_mail_by_id(msg_id):
246 # Note: use first() because we don't know it exists. The DB
247 # guarantees uniqueness anyway.
248 try:
249 return dbcache.mail_cache[msg_id]
250 except KeyError:
251 return None
253 def decode_quoted(m, header):
254 return email.utils.collapse_rfc2231_value(m.get_param(header))
256 _space_regex = re.compile(r'\s+')
257 def sanitize_single_line(s):
258 return _space_regex.sub(' ', s)[:255]
260 _gmane_id_regex = re.compile(r'<http://permalink\.gmane\.org/gmane\.comp\.version-control\.git/(\d+)>')
261 def insert_mail_into_db(msg):
262 if (msg.get('Message-Id', None)
263 and get_mail_by_id(_parse_msg_id(msg['Message-Id']))):
264 return # already exists
265 gmane_id = None
266 if msg['Archived-At']:
267 m = _gmane_id_regex.match(msg['Archived-At'])
268 if m:
269 gmane_id = int(m.group(1))
270 msgid = msg.get('Message-Id', None)
271 if not msgid or not _parse_msg_id(msgid):
272 if gmane_id:
273 msgid = 'gmane-%d@mailnotes.trast.ch' % gmane_id
274 else:
275 msgid = 'fallback-%X@mailnotes.trast.ch' % random.randrange(2**32)
276 else:
277 msgid = _parse_msg_id(msgid)
278 mail = dbcache.mail_cache.get(msgid)
279 mail.gmane_id = gmane_id
280 mail.message_id = sanitize_single_line(msgid)
281 if msg['From']:
282 mail.author = sanitize_single_line(decode_quoted(msg['From']))
283 tm = None
284 if msg['Date']:
285 tm = email.utils.parsedate_tz(msg['Date'])
286 if tm:
287 tm = email.utils.mktime_tz(time.mktime(tm))
288 else:
289 tm = time.time()
290 mail.post_date = tm
291 if msg['Subject']:
292 mail.subject = sanitize_single_line(decode_quoted(msg['Subject']))
293 mail.in_reply_to = sanitize_single_line(_detect_reply_id(msg))
294 mail.data = msg.as_string()
295 mail.stale = mail.has_patch = bool(_guess_patch_contents(msg))
296 if msg['Subject'] and _whats_cooking_subject.match(msg['Subject']):
297 mail.stale = True
298 db.session.add(mail)
299 starter = mail
300 if mail.in_reply_to:
301 parent = get_mail_by_id(mail.in_reply_to)
302 if parent:
303 starter = parent
304 # Flag all so-far unapplied patches downwards of this one as
305 # 'stale' so they'll be tried again. XXX should use an sql UPDATE
306 # here!
307 for child in (db.session.query(db.Mail)
308 .select_from(join(db.Mail, db.Reference,
309 db.Mail.id == db.Reference.mail_id))
310 .filter(db.Reference.reference_id == starter.message_id)
311 .filter(db.Mail.has_patch == True)
312 .filter(db.Mail.patch_id == None)
313 .filter(db.Mail.message_id != mail.message_id)
314 .filter(db.Mail.stale == False)):
315 child.stale = True
316 if msg['References']:
317 for m in _msg_id_regex.finditer(' '.join(msg.get_all('References'))):
318 db.session.add(db.Reference(mail.id,
319 sanitize_single_line(m.group(1))))
321 def parse_mbox(fname):
322 mbox = mailbox.mbox(fname, parser.parse)
323 mbox_parsed = list(mbox)
324 count = len(mbox_parsed)
325 for msg in mbox_parsed:
326 insert_mail_into_db(msg)
327 count = count - 1
328 sys.stderr.write("%6d\r" % count)
329 sys.stderr.write("\n")
330 dbcache.mail_cache.flush()
332 if __name__ == '__main__':
333 for mbox in sys.argv[1:]:
334 parse_mbox(mbox)
335 walk_stale_mail()
336 db.session.commit()