10 from sqlalchemy
.orm
import join
16 _msg_id_regex
= re
.compile(r
'<([^<>]+)>')
17 def _parse_msg_id(str):
18 m
= _msg_id_regex
.search(str)
22 parser
= email
.Parser
.Parser()
24 def _detect_reply_id(msg
):
25 if msg
['In-Reply-To']:
26 return _parse_msg_id(msg
['In-Reply-To'])
28 refs
= ' '.join(msg
.get_all('References'))
29 ref_ids
= [m
.group(1) for m
in _msg_id_regex
.finditer(refs
)]
32 def _get_text_payloads(msg
):
33 if not msg
.is_multipart():
34 yield msg
.get_payload()
36 for part
in email
.Iterators
.typed_subpart_iterator(msg
):
37 if part
.is_multipart():
38 yield textpart
.get_payload(0)
40 yield textpart
.get_payload()
42 _format_patch_regex
= re
.compile('.*^---$.*^diff --git', re
.MULTILINE|re
.DOTALL
)
43 _snip_patch_regex
= re
.compile('.*^-+ ?(?:8<|>8) ?-+\n(.*^diff --git.*)',
44 re
.MULTILINE|re
.DOTALL
)
45 def _guess_patch_contents(msg
):
46 for p
in _get_text_payloads(msg
):
47 if _format_patch_regex
.match(p
):
49 return msg
.as_string()
50 m
= _snip_patch_regex
.match(p
)
52 msg
.set_payload(m
.group(1))
53 return msg
.as_string()
58 def later_unapplied_patches(session
, msg
):
59 return (session
.query(Mail
)
60 .filter(Mail
.has_patch
==True)
61 .filter(sqlalchemy
.in_(msg
.message_id
, Mail
.references
))
65 def try_patch(session
, m
, pp
, commit
):
66 git('checkout', commit
.sha1
)
69 except patch
.PatchError
:
71 pipe
= git('show', ret_pipe
=True)
72 output
= git('patch-id', input_pipe
=pipe
)[0]
74 # this means the patch had no diff; e.g., a mode change
76 patch_id
, commit_id
= output
.split()
77 c
= session
.query(db
.Commit
).filter(db
.Commit
.sha1
== commit_id
).first()
79 output
= git('log', '-1', '--pretty=format:%ct %at')[0]
80 adate
, cdate
= [int(s
) for s
in output
.split()]
81 c
= db
.Commit(commit_id
, cdate
, adate
, patch_id
, False)
83 p
= db
.Patch(c
, m
.id, pp
.notes
)
87 def try_patch_anywhere(session
, msg
, m
):
88 pdata
= _guess_patch_contents(msg
)
91 pp
= patch
.Patch(pdata
)
92 # first try on the commit given by the blobs
94 for prefix
in pp
.blobs_pre
:
95 ret
= (session
.query(db
.Blob
, db
.Commit
)
96 .filter(db
.Blob
.newest_commit_sha1
== db
.Commit
.sha1
)
97 .filter(db
.Blob
.sha1
.like(prefix
+'%'))
98 .filter(db
.Commit
.upstream
== True)
99 .order_by(db
.Commit
.cdate
.desc()).first())
101 print 'blob %s not found?!' % prefix
103 commits
.append(ret
[1])
108 print 'trying canonical commit %s' % cmt
.sha1
109 applied
= try_patch(session
, m
, pp
, cmt
)
112 # this is just hopeless: it doesn't apply to the commit it should!
115 print "no canonical commit found"
116 # if we have a parent, try on the parent
117 parent
= session
.query(db
.Mail
).filter(db
.Mail
.message_id
==m
.in_reply_to
).first()
118 if parent
and parent
.has_patch
and parent
.patch_id
:
119 cmt
= (session
.query(db
.Commit
)
120 .filter(db
.Commit
.patch_id
==parent
.patch_id
)
121 .order_by(db
.Commit
.cdate
.desc()).first())
122 print 'trying to apply on parent %s' % cmt
.sha1
123 applied
= try_patch(session
, m
, pp
, cmt
)
127 print "no parent commit found"
128 # try on origin/master
129 print 'trying on origin/master'
130 master
= git('rev-parse', 'origin/master')[0].strip()
131 cmt
= session
.query(db
.Commit
).filter(db
.Commit
.sha1
==master
).one()
132 applied
= try_patch(session
, m
, pp
, cmt
)
135 # same for origin/next
136 print 'trying on origin/next'
137 next
= git('rev-parse', 'origin/next')[0].strip()
138 cmt
= session
.query(db
.Commit
).filter(db
.Commit
.sha1
==next
).one()
139 applied
= try_patch(session
, m
, pp
, cmt
)
144 _whats_cooking_subject
= re
.compile(r
"^What's cooking in git\.git")
145 _whats_cooking_category
= re
.compile(r
"^\[(.*)\]$")
146 _whats_cooking_header
= re
.compile(r
"\* (../[a-zA-Z0-9-]+) \([^)]*\) \d+ commits?")
147 _whats_cooking_separator
= re
.compile(r
"^(-{5,}|-- )$")
149 def parse_whats_cooking(session
, msg
, mail
):
150 if not (msg
["Subject"] and _whats_cooking_subject
.match(msg
["Subject"])):
153 branch
= 'pu' # initial part goes on 'pu'
155 def _rotate_notes(category
, branch
, notes
):
157 t
= session
.query(db
.Topic
).filter(db
.Topic
.name
==branch
).first()
163 t
.cooking_notes
= '\n'.join(notes
)
166 notes
.append("[%s]" % category
)
168 for line
in _get_text_payload(msg
).splitlines():
169 if _whats_cooking_separator
.match(line
):
171 notes
= _rotate_notes(category
, branch
, notes
)
174 m
= _whats_cooking_category
.match(line
)
176 category
= m
.group(1)
177 notes
= _rotate_notes(category
, branch
, notes
)
179 m
= _whats_cooking_header
.match(line
)
181 notes
= _rotate_notes(category
, branch
, notes
)
187 def parse_mail(session
, msg
):
188 if (session
.query(db
.Mail
.message_id
)
189 .filter(db
.Mail
.message_id
== _parse_msg_id(msg
['Message-Id']))
191 return [] # already exists
193 m
.message_id
= _parse_msg_id(msg
['Message-Id'])
194 m
.author
= msg
['From']
195 m
.in_reply_to
= _detect_reply_id(msg
)
196 m
.post_date
= time
.mktime(email
.utils
.parsedate(msg
['Date']))
197 m
.data
= msg
.as_string()
198 m
.has_patch
= bool(_guess_patch_contents(msg
))
201 if msg
['References']:
202 for im
in _msg_id_regex
.finditer(' '.join(msg
.get_all('References'))):
203 references
.append((m
, im
.group(1)))
205 patch
= try_patch_anywhere(session
, msg
, m
)
207 m
.patch_id
= patch
.commit
.patch_id
208 # try reading a what's cooking message
209 parse_whats_cooking(session
, msg
, m
)
213 def get_mail_by_id(session
, msg_id
):
214 # Note: use first() because we don't know it exists. The DB
215 # guarantees uniqueness anyway.
216 return (session
.query(db
.Mail
.message_id
)
217 .filter(db
.Mail
.message_id
== msg_id
)
220 _gmane_id_regex
= re
.compile(r
'<http://permalink\.gmane\.org/gmane\.comp\.version-control\.git/(\d+)>')
221 def insert_mail_into_db(msg
):
222 session
= db
.Session()
223 if (msg
.get('Message-Id', None)
224 and get_mail_by_id(_parse_msg_id(msg
['Message-Id']))):
225 return [] # already exists
227 if msg
['Archived-At']:
228 m
= _gmane_id_regex
.match(msg
['Archived-At'])
230 mail
.gmane_id
= int(m
.group(1))
231 msgid
= msg
.get('Message-Id', None)
232 if not msgid
or not _parse_msg_id(msg
):
234 msgid
= 'gmane-%d@mailnotes.trast.ch' % mail
.gmane_id
236 msgid
= 'fallback-%X@mailnotes.trast.ch' % random
.randrange(2**32)
237 mail
.message_id
= msgid
239 mail
.author
= msg
['From']
242 tm
= email
.utils
.parsedate(msg
['Date'])
249 mail
.subject
= msg
['Subject']
250 m
.in_reply_to
= _detect_reply_id(msg
)
251 mail
.data
= msg
.as_string()
252 mail
.stale
= mail
.has_patch
= bool(_guess_patch_contents(msg
))
254 if msg
['References']:
255 for m
in _msg_id_regex
.finditer(' '.join(msg
.get_all('References'))):
256 session
.add(db
.Reference(mail
.id, m
.group(1)))
259 parent
= get_mail_by_id(mail
.in_reply_to
)
262 # Flag all so-far unapplied patches downwards of this one as
263 # 'stale' so they'll be tried again. XXX should use an sql UPDATE
265 for child
in (session
.query(db
.Mail
)
266 .select_from(join(db
.Mail
, db
.Reference
,
267 db
.Mail
.id == db
.Reference
.mail_id
))
268 .filter(db
.Reference
.reference_id
== starter
.message_id
)
269 .filter(db
.Mail
.has_patch
== True)
270 .filter(db
.Mail
.patch_id
== None)
271 .filter(db
.Mail
.stale
== False)):
275 def parse_mbox(fname
):
276 session
= db
.Session()
277 mbox
= mailbox
.mbox(fname
, parser
.parse
)
278 mbox_parsed
= list(mbox
)
280 for msg
in mbox_parsed
:
281 references
.extend(parse_mail(session
, msg
))
283 for m
, r
in references
:
284 session
.add(db
.Reference(m
.id, r
))
287 if __name__
== '__main__':
288 for mbox
in sys
.argv
[1:]: