13 from sqlalchemy
.orm
import join
14 import cStringIO
as StringIO
19 from blobtracker
import BlobTracker
22 _msg_id_regex
= re
.compile(r
'<([^<>]+)>')
23 def _parse_msg_id(str):
24 m
= _msg_id_regex
.search(str)
28 parser
= email
.Parser
.Parser()
30 def _detect_reply_id(msg
):
31 if msg
['In-Reply-To']:
32 return _parse_msg_id(msg
['In-Reply-To'])
34 refs
= ' '.join(msg
.get_all('References'))
35 ref_ids
= [m
.group(1) for m
in _msg_id_regex
.finditer(refs
)]
39 def _get_text_payloads(msg
):
40 if not msg
.is_multipart():
41 yield msg
.get_payload()
43 for part
in email
.Iterators
.typed_subpart_iterator(msg
):
44 if part
.is_multipart():
45 yield part
.get_payload(0)
47 yield part
.get_payload()
49 _format_patch_regex
= re
.compile('.*^---$.*^diff --git', re
.MULTILINE|re
.DOTALL
)
50 _snip_patch_regex
= re
.compile('.*^-+ ?(?:8<|>8) ?-+\n(.*^diff --git.*)',
51 re
.MULTILINE|re
.DOTALL
)
52 def _guess_patch_contents(msg
):
53 for p
in _get_text_payloads(msg
):
54 if _format_patch_regex
.match(p
):
56 return msg
.as_string()
57 m
= _snip_patch_regex
.match(p
)
59 msg
.set_payload(m
.group(1))
60 return msg
.as_string()
64 def try_patch(m
, pp
, base_sha1
):
65 git('checkout', '-f', base_sha1
)
68 except patch
.PatchError
:
70 pipe
= git('show', ret_pipe
=True)
71 output
= git('patch-id', input_pipe
=pipe
)[0]
73 # this means the patch had no diff; e.g., a mode change
75 patch_id
, commit_id
= output
.split()
76 c
= db
.session
.query(db
.Commit
).filter(db
.Commit
.sha1
== commit_id
).first()
79 output
= git('log', '-1', '--pretty=format:%aD\t%cD\t%an <%ae>', commit_id
)[0]
80 adate_s
, cdate_s
, author
= output
.split('\t', 2)
81 adate
= email
.utils
.mktime_tz(email
.utils
.parsedate_tz(adate_s
))
82 cdate
= email
.utils
.mktime_tz(email
.utils
.parsedate_tz(cdate_s
))
84 author
= author
.decode('utf8')
85 except UnicodeDecodeError:
86 author
= author
.decode('latin1')
87 c
= db
.Commit(commit_id
, cdate
, adate
, author
, patch_id
, False)
88 blobtracker
.scan_commit_tree(c
)
89 p
= db
.Patch(c
, m
.id, pp
.notes
)
93 def find_blobs(upstream
, blobs
):
96 ret
= (db
.session
.query(db
.Blob
, db
.Commit
)
97 .filter(db
.Blob
.newest_commit_sha1
== db
.Commit
.sha1
)
98 .filter(db
.Blob
.sha1
.like(prefix
+'%'))
99 .filter(db
.Commit
.upstream
== upstream
)
100 .order_by(db
.Commit
.cdate
.desc()).first())
102 print 'blob %s not found?!' % prefix
104 commits
.append(ret
[1])
111 def try_patch_anywhere(msg
, m
):
112 print '*', m
.message_id
113 pdata
= _guess_patch_contents(msg
)
116 pp
= patch
.Patch(pdata
)
119 return # probably for another project
120 # perhaps we already know about this patch id
121 patch_id
= pp
.fast_patch_id()
123 cmt
= (db
.query(db
.Commit
).filter(db
.Commit
.patch_id
==patch_id
)
124 .filter(db
.Commit
.upstream
==True).first())
126 print "patch id %s" % patch_id
127 print "matches commit %s" % cmt
.sha1
128 p
= db
.Patch(cmt
, m
.id, pp
.notes
)
131 # first try on the commit given by the blobs
132 cmt
= find_blobs(True, pp
.blobs_pre
)
134 print 'trying canonical commit %s (upstream)' % cmt
.sha1
135 applied
= try_patch(m
, pp
, cmt
.sha1
)
138 # this is just hopeless: it doesn't apply to the commit it should!
141 print "no canonical commit found (upstream)"
143 cmt
= find_blobs(False, pp
.blobs_pre
)
145 print 'trying canonical commit %s (local)' % cmt
.sha1
146 applied
= try_patch(m
, pp
, cmt
.sha1
)
149 # this is just hopeless: it doesn't apply to the commit it should!
152 print "no canonical commit found (local)"
153 # if we have a parent, try on the parent
154 parent
= db
.session
.query(db
.Mail
).filter(db
.Mail
.message_id
==m
.in_reply_to
).first()
155 if parent
and parent
.has_patch
and parent
.patch_id
:
156 cmt
= (db
.session
.query(db
.Commit
)
157 .filter(db
.Commit
.patch_id
==parent
.patch_id
)
158 .order_by(db
.Commit
.cdate
.desc()).first())
159 print 'trying to apply on parent %s' % cmt
.sha1
160 applied
= try_patch(m
, pp
, cmt
.sha1
)
164 print "no parent commit found"
165 # try on origin/master
166 sha1
= git('rev-list', '--first-parent', '-1', '--before=%d' % m
.post_date
,
167 'origin/master')[0].strip()
169 sha1
= 'origin/master'
170 print 'trying on master at time of patch (%s)' % sha1
171 applied
= try_patch(m
, pp
, sha1
)
174 # same for origin/next
175 print 'trying on origin/next'
176 applied
= try_patch(m
, pp
, 'origin/next')
181 _whats_cooking_subject
= re
.compile(r
"^What's cooking in git\.git")
182 _whats_cooking_category
= re
.compile(r
"^\[(.*)\]$")
183 _whats_cooking_header
= re
.compile(r
"\* (../[a-zA-Z0-9-]+) \([^)]*\) \d+ commits?")
184 _whats_cooking_separator
= re
.compile(r
"^(-{5,}|-- )$")
186 def parse_whats_cooking(msg
, mail
):
187 if not (msg
["Subject"] and _whats_cooking_subject
.match(msg
["Subject"])):
190 branch
= 'pu' # initial part goes on 'pu'
192 def _rotate_notes(category
, branch
, notes
):
194 t
= db
.session
.query(db
.Topic
).filter(db
.Topic
.name
==branch
).first()
200 t
.cooking_notes
= '\n'.join(notes
)
203 notes
.append("[%s]" % category
)
205 text
= ''.join(_get_text_payloads(msg
))
206 for line
in text
.splitlines():
207 if _whats_cooking_separator
.match(line
):
209 notes
= _rotate_notes(category
, branch
, notes
)
212 m
= _whats_cooking_category
.match(line
)
214 category
= m
.group(1)
215 notes
= _rotate_notes(category
, branch
, notes
)
217 m
= _whats_cooking_header
.match(line
)
219 notes
= _rotate_notes(category
, branch
, notes
)
226 def process_mail(mail
):
227 msg
= parser
.parse(StringIO
.StringIO(mail
.data
))
228 parse_whats_cooking(msg
, mail
)
229 #patch = try_patch_anywhere(msg, mail)
231 # mail.patch_id = patch.commit.patch_id
235 def _query_stale_mail():
236 return (db
.session
.query(db
.Mail
)
237 .filter(db
.Mail
.stale
== True)
238 .order_by(db
.Mail
.post_date
.asc(), db
.Mail
.subject
.asc()))
240 def walk_stale_mail():
242 blobtracker
= BlobTracker()
243 count
= _query_stale_mail().count()
244 for mail
in _query_stale_mail():
245 print "** %6d\n" % count
250 def get_mail_by_id(msg_id
):
251 # Note: use first() because we don't know it exists. The DB
252 # guarantees uniqueness anyway.
254 return dbcache
.mail_cache
[msg_id
]
258 def decode_quoted(s
):
261 for s
, e
in email
.header
.decode_header(s
):
269 _space_regex
= re
.compile(r
'\s+')
270 def sanitize_single_line(s
):
272 return _space_regex
.sub(' ', s
)[:255]
274 _gmane_id_regex
= re
.compile(r
'<http://permalink\.gmane\.org/gmane\.comp\.version-control\.git/(\d+)>')
275 def insert_mail_into_db(msg
):
276 if (msg
.get('Message-Id', None)
277 and get_mail_by_id(_parse_msg_id(msg
['Message-Id']))):
278 return # already exists
280 if msg
['Archived-At']:
281 m
= _gmane_id_regex
.match(msg
['Archived-At'])
283 gmane_id
= int(m
.group(1))
284 msgid
= msg
.get('Message-Id', None)
285 if not msgid
or not _parse_msg_id(msgid
):
287 msgid
= 'gmane-%d@mailnotes.thomasrast.ch' % gmane_id
289 msgid
= 'fallback-%X@mailnotes.thomasrast.ch' % random
.randrange(2**32)
291 msgid
= _parse_msg_id(msgid
)
292 mail
= dbcache
.mail_cache
.get(msgid
)
293 mail
.gmane_id
= gmane_id
294 mail
.message_id
= sanitize_single_line(msgid
)
296 name
, addr
= email
.utils
.parseaddr(msg
['From'])
298 mail
.author
= sanitize_single_line("%s <%s>" % (decode_quoted(name
), addr
))
300 mail
.author
= sanitize_single_line(decode_quoted(msg
['From']))
303 tm
= email
.utils
.parsedate_tz(msg
['Date'])
305 tm
= email
.utils
.mktime_tz(tm
)
310 mail
.subject
= sanitize_single_line(decode_quoted(msg
['Subject']))
311 mail
.in_reply_to
= sanitize_single_line(_detect_reply_id(msg
))
312 mail
.data
= msg
.as_string()
313 mail
.stale
= mail
.has_patch
= bool(_guess_patch_contents(msg
))
314 if msg
['Subject'] and _whats_cooking_subject
.match(msg
['Subject']):
319 parent
= get_mail_by_id(mail
.in_reply_to
)
322 # Flag all so-far unapplied patches downwards of this one as
323 # 'stale' so they'll be tried again. XXX should use an sql UPDATE
325 for child
in (db
.session
.query(db
.Mail
)
326 .select_from(join(db
.Mail
, db
.Reference
,
327 db
.Mail
.id == db
.Reference
.mail_id
))
328 .filter(db
.Reference
.reference_id
== starter
.message_id
)
329 .filter(db
.Mail
.has_patch
== True)
330 .filter(db
.Mail
.patch_id
== None)
331 .filter(db
.Mail
.message_id
!= mail
.message_id
)
332 .filter(db
.Mail
.stale
== False)):
334 if msg
['References']:
335 for m
in _msg_id_regex
.finditer(' '.join(msg
.get_all('References'))):
336 db
.session
.add(db
.Reference(mail
.id,
337 sanitize_single_line(m
.group(1))))
340 return parser
.parse(StringIO
.StringIO(s
))
342 def parse_mbox(fname
):
347 if line
.startswith('From news@gmane.org'):
349 mbox_parsed
.append(_parse_mail(''.join(cur
)))
354 mbox_parsed
.append(_parse_mail(''.join(cur
)))
355 count
= len(mbox_parsed
)
356 for msg
in mbox_parsed
:
357 insert_mail_into_db(msg
)
359 sys
.stderr
.write("%6d\r" % count
)
360 sys
.stderr
.write("\n")
361 dbcache
.mail_cache
.flush()
363 if __name__
== '__main__':
364 for mbox
in sys
.argv
[1:]: