(closes #493) convert remaining GET form to use POST
[buildbot.git] / buildbot / changes / mail.py
blobe430ddad14fc8f4b3280f8564edffacbdc48e6d9
1 # -*- test-case-name: buildbot.test.test_mailparse -*-
3 """
4 Parse various kinds of 'CVS notify' email.
5 """
6 import os, re
7 import time, calendar
8 from email import message_from_file
9 from email.Utils import parseaddr
10 from email.Iterators import body_line_iterator
12 from zope.interface import implements
13 from twisted.python import log
14 from buildbot import util
15 from buildbot.interfaces import IChangeSource
16 from buildbot.changes import changes
17 from buildbot.changes.maildir import MaildirService
19 class MaildirSource(MaildirService, util.ComparableMixin):
20 """This source will watch a maildir that is subscribed to a FreshCVS
21 change-announcement mailing list.
22 """
23 implements(IChangeSource)
25 compare_attrs = ["basedir", "pollinterval", "prefix"]
26 name = None
28 def __init__(self, maildir, prefix=None):
29 MaildirService.__init__(self, maildir)
30 self.prefix = prefix
31 if prefix and not prefix.endswith("/"):
32 log.msg("%s: you probably want your prefix=('%s') to end with "
33 "a slash")
35 def describe(self):
36 return "%s mailing list in maildir %s" % (self.name, self.basedir)
38 def messageReceived(self, filename):
39 path = os.path.join(self.basedir, "new", filename)
40 change = self.parse_file(open(path, "r"), self.prefix)
41 if change:
42 self.parent.addChange(change)
43 os.rename(os.path.join(self.basedir, "new", filename),
44 os.path.join(self.basedir, "cur", filename))
46 def parse_file(self, fd, prefix=None):
47 m = message_from_file(fd)
48 return self.parse(m, prefix)
50 class FCMaildirSource(MaildirSource):
51 name = "FreshCVS"
53 def parse(self, m, prefix=None):
54 """Parse mail sent by FreshCVS"""
56 # FreshCVS sets From: to "user CVS <user>", but the <> part may be
57 # modified by the MTA (to include a local domain)
58 name, addr = parseaddr(m["from"])
59 if not name:
60 return None # no From means this message isn't from FreshCVS
61 cvs = name.find(" CVS")
62 if cvs == -1:
63 return None # this message isn't from FreshCVS
64 who = name[:cvs]
66 # we take the time of receipt as the time of checkin. Not correct,
67 # but it avoids the out-of-order-changes issue. See the comment in
68 # parseSyncmail about using the 'Date:' header
69 when = util.now()
71 files = []
72 comments = ""
73 isdir = 0
74 lines = list(body_line_iterator(m))
75 while lines:
76 line = lines.pop(0)
77 if line == "Modified files:\n":
78 break
79 while lines:
80 line = lines.pop(0)
81 if line == "\n":
82 break
83 line = line.rstrip("\n")
84 linebits = line.split(None, 1)
85 file = linebits[0]
86 if prefix:
87 # insist that the file start with the prefix: FreshCVS sends
88 # changes we don't care about too
89 if file.startswith(prefix):
90 file = file[len(prefix):]
91 else:
92 continue
93 if len(linebits) == 1:
94 isdir = 1
95 elif linebits[1] == "0 0":
96 isdir = 1
97 files.append(file)
98 while lines:
99 line = lines.pop(0)
100 if line == "Log message:\n":
101 break
102 # message is terminated by "ViewCVS links:" or "Index:..." (patch)
103 while lines:
104 line = lines.pop(0)
105 if line == "ViewCVS links:\n":
106 break
107 if line.find("Index: ") == 0:
108 break
109 comments += line
110 comments = comments.rstrip() + "\n"
112 if not files:
113 return None
115 change = changes.Change(who, files, comments, isdir, when=when)
117 return change
119 class SyncmailMaildirSource(MaildirSource):
120 name = "Syncmail"
122 def parse(self, m, prefix=None):
123 """Parse messages sent by the 'syncmail' program, as suggested by the
124 sourceforge.net CVS Admin documentation. Syncmail is maintained at
125 syncmail.sf.net .
127 # pretty much the same as freshcvs mail, not surprising since CVS is
128 # the one creating most of the text
130 # The mail is sent from the person doing the checkin. Assume that the
131 # local username is enough to identify them (this assumes a one-server
132 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS
133 # model)
134 name, addr = parseaddr(m["from"])
135 if not addr:
136 return None # no From means this message isn't from FreshCVS
137 at = addr.find("@")
138 if at == -1:
139 who = addr # might still be useful
140 else:
141 who = addr[:at]
143 # we take the time of receipt as the time of checkin. Not correct (it
144 # depends upon the email latency), but it avoids the
145 # out-of-order-changes issue. Also syncmail doesn't give us anything
146 # better to work with, unless you count pulling the v1-vs-v2
147 # timestamp out of the diffs, which would be ugly. TODO: Pulling the
148 # 'Date:' header from the mail is a possibility, and
149 # email.Utils.parsedate_tz may be useful. It should be configurable,
150 # however, because there are a lot of broken clocks out there.
151 when = util.now()
153 subject = m["subject"]
154 # syncmail puts the repository-relative directory in the subject:
155 # mprefix + "%(dir)s %(file)s,%(oldversion)s,%(newversion)s", where
156 # 'mprefix' is something that could be added by a mailing list
157 # manager.
158 # this is the only reasonable way to determine the directory name
159 space = subject.find(" ")
160 if space != -1:
161 directory = subject[:space]
162 else:
163 directory = subject
165 files = []
166 comments = ""
167 isdir = 0
168 branch = None
170 lines = list(body_line_iterator(m))
171 while lines:
172 line = lines.pop(0)
174 if (line == "Modified Files:\n" or
175 line == "Added Files:\n" or
176 line == "Removed Files:\n"):
177 break
179 while lines:
180 line = lines.pop(0)
181 if line == "\n":
182 break
183 if line == "Log Message:\n":
184 lines.insert(0, line)
185 break
186 line = line.lstrip()
187 line = line.rstrip()
188 # note: syncmail will send one email per directory involved in a
189 # commit, with multiple files if they were in the same directory.
190 # Unlike freshCVS, it makes no attempt to collect all related
191 # commits into a single message.
193 # note: syncmail will report a Tag underneath the ... Files: line
194 # e.g.: Tag: BRANCH-DEVEL
196 if line.startswith('Tag:'):
197 branch = line.split(' ')[-1].rstrip()
198 continue
200 thesefiles = line.split(" ")
201 for f in thesefiles:
202 f = directory + "/" + f
203 if prefix:
204 # insist that the file start with the prefix: we may get
205 # changes we don't care about too
206 if f.startswith(prefix):
207 f = f[len(prefix):]
208 else:
209 continue
210 break
211 # TODO: figure out how new directories are described, set
212 # .isdir
213 files.append(f)
215 if not files:
216 return None
218 while lines:
219 line = lines.pop(0)
220 if line == "Log Message:\n":
221 break
222 # message is terminated by "Index:..." (patch) or "--- NEW FILE.."
223 # or "--- filename DELETED ---". Sigh.
224 while lines:
225 line = lines.pop(0)
226 if line.find("Index: ") == 0:
227 break
228 if re.search(r"^--- NEW FILE", line):
229 break
230 if re.search(r" DELETED ---$", line):
231 break
232 comments += line
233 comments = comments.rstrip() + "\n"
235 change = changes.Change(who, files, comments, isdir, when=when,
236 branch=branch)
238 return change
240 # Bonsai mail parser by Stephen Davis.
242 # This handles changes for CVS repositories that are watched by Bonsai
243 # (http://www.mozilla.org/bonsai.html)
245 # A Bonsai-formatted email message looks like:
247 # C|1071099907|stephend|/cvs|Sources/Scripts/buildbot|bonsai.py|1.2|||18|7
248 # A|1071099907|stephend|/cvs|Sources/Scripts/buildbot|master.cfg|1.1|||18|7
249 # R|1071099907|stephend|/cvs|Sources/Scripts/buildbot|BuildMaster.py|||
250 # LOGCOMMENT
251 # Updated bonsai parser and switched master config to buildbot-0.4.1 style.
253 # :ENDLOGCOMMENT
255 # In the first example line, stephend is the user, /cvs the repository,
256 # buildbot the directory, bonsai.py the file, 1.2 the revision, no sticky
257 # and branch, 18 lines added and 7 removed. All of these fields might not be
258 # present (during "removes" for example).
260 # There may be multiple "control" lines or even none (imports, directory
261 # additions) but there is one email per directory. We only care about actual
262 # changes since it is presumed directory additions don't actually affect the
263 # build. At least one file should need to change (the makefile, say) to
264 # actually make a new directory part of the build process. That's my story
265 # and I'm sticking to it.
267 class BonsaiMaildirSource(MaildirSource):
268 name = "Bonsai"
270 def parse(self, m, prefix=None):
271 """Parse mail sent by the Bonsai cvs loginfo script."""
273 # we don't care who the email came from b/c the cvs user is in the
274 # msg text
276 who = "unknown"
277 timestamp = None
278 files = []
279 lines = list(body_line_iterator(m))
281 # read the control lines (what/who/where/file/etc.)
282 while lines:
283 line = lines.pop(0)
284 if line == "LOGCOMMENT\n":
285 break;
286 line = line.rstrip("\n")
288 # we'd like to do the following but it won't work if the number of
289 # items doesn't match so...
290 # what, timestamp, user, repo, module, file = line.split( '|' )
291 items = line.split('|')
292 if len(items) < 6:
293 # not a valid line, assume this isn't a bonsai message
294 return None
296 try:
297 # just grab the bottom-most timestamp, they're probably all the
298 # same. TODO: I'm assuming this is relative to the epoch, but
299 # this needs testing.
300 timestamp = int(items[1])
301 except ValueError:
302 pass
304 user = items[2]
305 if user:
306 who = user
308 module = items[4]
309 file = items[5]
310 if module and file:
311 path = "%s/%s" % (module, file)
312 files.append(path)
313 sticky = items[7]
314 branch = items[8]
316 # if no files changed, return nothing
317 if not files:
318 return None
320 # read the comments
321 comments = ""
322 while lines:
323 line = lines.pop(0)
324 if line == ":ENDLOGCOMMENT\n":
325 break
326 comments += line
327 comments = comments.rstrip() + "\n"
329 # return buildbot Change object
330 return changes.Change(who, files, comments, when=timestamp,
331 branch=branch)
333 # svn "commit-email.pl" handler. The format is very similar to freshcvs mail;
334 # here's a sample:
336 # From: username [at] apache.org [slightly obfuscated to avoid spam here]
337 # To: commits [at] spamassassin.apache.org
338 # Subject: svn commit: r105955 - in spamassassin/trunk: . lib/Mail
339 # ...
341 # Author: username
342 # Date: Sat Nov 20 00:17:49 2004 [note: TZ = local tz on server!]
343 # New Revision: 105955
345 # Modified: [also Removed: and Added:]
346 # [filename]
347 # ...
348 # Log:
349 # [log message]
350 # ...
353 # Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
354 # [unified diff]
356 # [end of mail]
358 class SVNCommitEmailMaildirSource(MaildirSource):
359 name = "SVN commit-email.pl"
361 def parse(self, m, prefix=None):
362 """Parse messages sent by the svn 'commit-email.pl' trigger.
365 # The mail is sent from the person doing the checkin. Assume that the
366 # local username is enough to identify them (this assumes a one-server
367 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS
368 # model)
369 name, addr = parseaddr(m["from"])
370 if not addr:
371 return None # no From means this message isn't from FreshCVS
372 at = addr.find("@")
373 if at == -1:
374 who = addr # might still be useful
375 else:
376 who = addr[:at]
378 # we take the time of receipt as the time of checkin. Not correct (it
379 # depends upon the email latency), but it avoids the
380 # out-of-order-changes issue. Also syncmail doesn't give us anything
381 # better to work with, unless you count pulling the v1-vs-v2
382 # timestamp out of the diffs, which would be ugly. TODO: Pulling the
383 # 'Date:' header from the mail is a possibility, and
384 # email.Utils.parsedate_tz may be useful. It should be configurable,
385 # however, because there are a lot of broken clocks out there.
386 when = util.now()
388 files = []
389 comments = ""
390 isdir = 0
391 lines = list(body_line_iterator(m))
392 rev = None
393 while lines:
394 line = lines.pop(0)
396 # "Author: jmason"
397 match = re.search(r"^Author: (\S+)", line)
398 if match:
399 who = match.group(1)
401 # "New Revision: 105955"
402 match = re.search(r"^New Revision: (\d+)", line)
403 if match:
404 rev = match.group(1)
406 # possible TODO: use "Date: ..." data here instead of time of
407 # commit message receipt, above. however, this timestamp is
408 # specified *without* a timezone, in the server's local TZ, so to
409 # be accurate buildbot would need a config setting to specify the
410 # source server's expected TZ setting! messy.
412 # this stanza ends with the "Log:"
413 if (line == "Log:\n"):
414 break
416 # commit message is terminated by the file-listing section
417 while lines:
418 line = lines.pop(0)
419 if (line == "Modified:\n" or
420 line == "Added:\n" or
421 line == "Removed:\n"):
422 break
423 comments += line
424 comments = comments.rstrip() + "\n"
426 while lines:
427 line = lines.pop(0)
428 if line == "\n":
429 break
430 if line.find("Modified:\n") == 0:
431 continue # ignore this line
432 if line.find("Added:\n") == 0:
433 continue # ignore this line
434 if line.find("Removed:\n") == 0:
435 continue # ignore this line
436 line = line.strip()
438 thesefiles = line.split(" ")
439 for f in thesefiles:
440 if prefix:
441 # insist that the file start with the prefix: we may get
442 # changes we don't care about too
443 if f.startswith(prefix):
444 f = f[len(prefix):]
445 else:
446 log.msg("ignored file from svn commit: prefix '%s' "
447 "does not match filename '%s'" % (prefix, f))
448 continue
450 # TODO: figure out how new directories are described, set
451 # .isdir
452 files.append(f)
454 if not files:
455 log.msg("no matching files found, ignoring commit")
456 return None
458 return changes.Change(who, files, comments, when=when, revision=rev)
460 # bzr Launchpad branch subscription mails. Sample mail:
462 # From: noreply@launchpad.net
463 # Subject: [Branch ~knielsen/maria/tmp-buildbot-test] Rev 2701: test add file
464 # To: Joe <joe@acme.com>
465 # ...
467 # ------------------------------------------------------------
468 # revno: 2701
469 # committer: Joe <joe@acme.com>
470 # branch nick: tmpbb
471 # timestamp: Fri 2009-05-15 10:35:43 +0200
472 # message:
473 # test add file
474 # added:
475 # test-add-file
478 # --
480 # https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test
482 # You are subscribed to branch lp:~knielsen/maria/tmp-buildbot-test.
483 # To unsubscribe from this branch go to https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test/+edit-subscription.
485 # [end of mail]
487 class BzrLaunchpadEmailMaildirSource(MaildirSource):
488 name = "Launchpad"
490 compare_attrs = MaildirSource.compare_attrs + ["branchMap", "defaultBranch"]
492 def __init__(self, maildir, prefix=None, branchMap=None, defaultBranch=None, **kwargs):
493 self.branchMap = branchMap
494 self.defaultBranch = defaultBranch
495 MaildirSource.__init__(self, maildir, prefix, **kwargs)
497 def parse(self, m, prefix=None):
498 """Parse branch notification messages sent by Launchpad.
501 subject = m["subject"]
502 match = re.search(r"^\s*\[Branch\s+([^]]+)\]", subject)
503 if match:
504 repository = match.group(1)
505 else:
506 repository = None
508 # Put these into a dictionary, otherwise we cannot assign them
509 # from nested function definitions.
510 d = { 'files': [], 'comments': "" }
511 gobbler = None
512 rev = None
513 who = None
514 when = util.now()
515 def gobble_comment(s):
516 d['comments'] += s + "\n"
517 def gobble_removed(s):
518 d['files'].append('%s REMOVED' % s)
519 def gobble_added(s):
520 d['files'].append('%s ADDED' % s)
521 def gobble_modified(s):
522 d['files'].append('%s MODIFIED' % s)
523 def gobble_renamed(s):
524 match = re.search(r"^(.+) => (.+)$", s)
525 if match:
526 d['files'].append('%s RENAMED %s' % (match.group(1), match.group(2)))
527 else:
528 d['files'].append('%s RENAMED' % s)
530 lines = list(body_line_iterator(m, True))
531 rev = None
532 while lines:
533 line = lines.pop(0)
535 # revno: 101
536 match = re.search(r"^revno: ([0-9.]+)", line)
537 if match:
538 rev = match.group(1)
540 # committer: Joe <joe@acme.com>
541 match = re.search(r"^committer: (.*)$", line)
542 if match:
543 who = match.group(1)
545 # timestamp: Fri 2009-05-15 10:35:43 +0200
546 # datetime.strptime() is supposed to support %z for time zone, but
547 # it does not seem to work. So handle the time zone manually.
548 match = re.search(r"^timestamp: [a-zA-Z]{3} (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ([-+])(\d{2})(\d{2})$", line)
549 if match:
550 datestr = match.group(1)
551 tz_sign = match.group(2)
552 tz_hours = match.group(3)
553 tz_minutes = match.group(4)
554 when = parseLaunchpadDate(datestr, tz_sign, tz_hours, tz_minutes)
556 if re.search(r"^message:\s*$", line):
557 gobbler = gobble_comment
558 elif re.search(r"^removed:\s*$", line):
559 gobbler = gobble_removed
560 elif re.search(r"^added:\s*$", line):
561 gobbler = gobble_added
562 elif re.search(r"^renamed:\s*$", line):
563 gobbler = gobble_renamed
564 elif re.search(r"^modified:\s*$", line):
565 gobbler = gobble_modified
566 elif re.search(r"^ ", line) and gobbler:
567 gobbler(line[2:-1]) # Use :-1 to gobble trailing newline
569 # Determine the name of the branch.
570 branch = None
571 if self.branchMap and repository:
572 if self.branchMap.has_key(repository):
573 branch = self.branchMap[repository]
574 elif self.branchMap.has_key('lp:' + repository):
575 branch = self.branchMap['lp:' + repository]
576 if not branch:
577 if self.defaultBranch:
578 branch = self.defaultBranch
579 else:
580 if repository:
581 branch = 'lp:' + repository
582 else:
583 branch = None
585 #log.msg("parse(): rev=%s who=%s files=%s comments='%s' when=%s branch=%s" % (rev, who, d['files'], d['comments'], time.asctime(time.localtime(when)), branch))
586 if rev and who:
587 return changes.Change(who, d['files'], d['comments'],
588 when=when, revision=rev, branch=branch)
589 else:
590 return None
592 def parseLaunchpadDate(datestr, tz_sign, tz_hours, tz_minutes):
593 time_no_tz = calendar.timegm(time.strptime(datestr, "%Y-%m-%d %H:%M:%S"))
594 tz_delta = 60 * 60 * int(tz_sign + tz_hours) + 60 * int(tz_minutes)
595 return time_no_tz - tz_delta