14 _msg_id_regex
= re
.compile(r
'<([^<>]+)>')
15 def _parse_msg_id(str):
16 return _msg_id_regex
.search(str).group(1)
18 parser
= email
.Parser
.Parser()
20 def _detect_reply_id(msg
):
21 if msg
['In-Reply-To']:
22 return _parse_msg_id(msg
['In-Reply-To'])
24 refs
= ' '.join(msg
.get_all('References'))
25 ref_ids
= [m
.group(1) for m
in _msg_id_regex
.finditer(refs
)]
28 def _get_text_payload(msg
):
29 if not msg
.is_multipart():
30 return msg
.get_payload()
31 textpart
= max(email
.Iterators
.typed_subpart_iterator(msg
), key
=len)
32 if textpart
.is_multipart():
33 return textpart
.get_payload(0)
35 return textpart
.get_payload()
37 _format_patch_regex
= re
.compile('.*^---$.*^diff --git', re
.MULTILINE|re
.DOTALL
)
38 _snip_patch_regex
= re
.compile('.*^-+ ?(?:8<|>8) ?-+\n(.*^diff --git.*)',
39 re
.MULTILINE|re
.DOTALL
)
40 def _guess_patch_contents(msg
):
41 p
= _get_text_payload(msg
)
42 if _format_patch_regex
.match(p
):
44 return msg
.as_string()
45 m
= _snip_patch_regex
.match(p
)
47 msg
.set_payload(m
.group(1))
48 return msg
.as_string()
53 def later_unapplied_patches(session
, msg
):
54 return (session
.query(Mail
)
55 .filter(Mail
.has_patch
==True)
56 .filter(sqlalchemy
.in_(msg
.message_id
, Mail
.references
))
60 def try_patch(session
, m
, pp
, commit
):
61 git('checkout', commit
.sha1
)
64 except patch
.PatchError
:
66 pipe
= git('show', ret_pipe
=True)
67 output
= git('patch-id', input_pipe
=pipe
)[0]
69 # this means the patch had no diff; e.g., a mode change
71 patch_id
, commit_id
= output
.split()
72 c
= session
.query(db
.Commit
).filter(db
.Commit
.sha1
== commit_id
).first()
74 output
= git('log', '-1', '--pretty=format:%ct %at')[0]
75 adate
, cdate
= [int(s
) for s
in output
.split()]
76 c
= db
.Commit(commit_id
, cdate
, adate
, patch_id
, False)
78 p
= db
.Patch(c
, m
.id, pp
.notes
)
82 def try_patch_anywhere(session
, msg
, m
):
83 pdata
= _guess_patch_contents(msg
)
86 pp
= patch
.Patch(pdata
)
87 # first try on the commit given by the blobs
89 for prefix
in pp
.blobs_pre
:
90 ret
= (session
.query(db
.Blob
, db
.Commit
)
91 .filter(db
.Blob
.newest_commit_sha1
== db
.Commit
.sha1
)
92 .filter(db
.Blob
.sha1
.like(prefix
+'%'))
93 .filter(db
.Commit
.upstream
== True)
94 .order_by(db
.Commit
.cdate
.desc()).first())
96 print 'blob %s not found?!' % prefix
98 commits
.append(ret
[1])
103 print 'trying canonical commit %s' % cmt
.sha1
104 applied
= try_patch(session
, m
, pp
, cmt
)
107 # this is just hopeless: it doesn't apply to the commit it should!
110 print "no canonical commit found"
111 # if we have a parent, try on the parent
112 parent
= session
.query(db
.Mail
).filter(db
.Mail
.message_id
==m
.in_reply_to
).first()
113 if parent
and parent
.has_patch
and parent
.patch_id
:
114 cmt
= (session
.query(db
.Commit
)
115 .filter(db
.Commit
.patch_id
==parent
.patch_id
)
116 .order_by(db
.Commit
.cdate
.desc()).first())
117 print 'trying to apply on parent %s' % cmt
.sha1
118 applied
= try_patch(session
, m
, pp
, cmt
)
122 print "no parent commit found"
123 # try on origin/master
124 print 'trying on origin/master'
125 master
= git('rev-parse', 'origin/master')[0].strip()
126 cmt
= session
.query(db
.Commit
).filter(db
.Commit
.sha1
==master
).one()
127 applied
= try_patch(session
, m
, pp
, cmt
)
130 # same for origin/next
131 print 'trying on origin/next'
132 next
= git('rev-parse', 'origin/next')[0].strip()
133 cmt
= session
.query(db
.Commit
).filter(db
.Commit
.sha1
==next
).one()
134 applied
= try_patch(session
, m
, pp
, cmt
)
139 def parse_mail(session
, msg
):
140 if (session
.query(db
.Mail
.message_id
)
141 .filter(db
.Mail
.message_id
== _parse_msg_id(msg
['Message-Id']))
143 return [] # already exists
145 m
.message_id
= _parse_msg_id(msg
['Message-Id'])
146 m
.author
= msg
['From']
147 m
.in_reply_to
= _detect_reply_id(msg
)
148 m
.post_date
= time
.mktime(email
.utils
.parsedate(msg
['Date']))
149 m
.payload
= msg
.as_string()
150 m
.has_patch
= bool(_guess_patch_contents(msg
))
153 if msg
['References']:
154 for im
in _msg_id_regex
.finditer(' '.join(msg
.get_all('References'))):
155 references
.append((m
, im
.group(1)))
156 patch
= try_patch_anywhere(session
, msg
, m
)
158 m
.patch_id
= patch
.commit
.patch_id
162 def parse_mbox(fname
):
163 session
= db
.Session()
164 mbox
= mailbox
.mbox(fname
, parser
.parse
)
165 mbox_parsed
= list(mbox
)
167 for msg
in mbox_parsed
:
168 references
.extend(parse_mail(session
, msg
))
170 for m
, r
in references
:
171 session
.add(db
.Reference(m
.id, r
))
174 if __name__
== '__main__':
175 for mbox
in sys
.argv
[1:]: