remove trailing whitespace
[buildbot.git] / buildbot / changes / mail.py
blob7d86d476ff3d3f77985b5b45d2a3103f611213d3
1 # -*- test-case-name: buildbot.test.test_mailparse -*-
3 """
4 Parse various kinds of 'CVS notify' email.
5 """
6 import os, re
7 from email import message_from_file
8 from email.Utils import parseaddr
9 from email.Iterators import body_line_iterator
11 from zope.interface import implements
12 from twisted.python import log
13 from buildbot import util
14 from buildbot.interfaces import IChangeSource
15 from buildbot.changes import changes
16 from buildbot.changes.maildir import MaildirService
18 class MaildirSource(MaildirService, util.ComparableMixin):
19 """This source will watch a maildir that is subscribed to a FreshCVS
20 change-announcement mailing list.
21 """
22 implements(IChangeSource)
24 compare_attrs = ["basedir", "pollinterval"]
25 name = None
27 def __init__(self, maildir, prefix=None):
28 MaildirService.__init__(self, maildir)
29 self.prefix = prefix
30 if prefix and not prefix.endswith("/"):
31 log.msg("%s: you probably want your prefix=('%s') to end with "
32 "a slash")
34 def describe(self):
35 return "%s mailing list in maildir %s" % (self.name, self.basedir)
37 def messageReceived(self, filename):
38 path = os.path.join(self.basedir, "new", filename)
39 change = self.parse_file(open(path, "r"), self.prefix)
40 if change:
41 self.parent.addChange(change)
42 os.rename(os.path.join(self.basedir, "new", filename),
43 os.path.join(self.basedir, "cur", filename))
45 def parse_file(self, fd, prefix=None):
46 m = message_from_file(fd)
47 return self.parse(m, prefix)
49 class FCMaildirSource(MaildirSource):
50 name = "FreshCVS"
52 def parse(self, m, prefix=None):
53 """Parse mail sent by FreshCVS"""
55 # FreshCVS sets From: to "user CVS <user>", but the <> part may be
56 # modified by the MTA (to include a local domain)
57 name, addr = parseaddr(m["from"])
58 if not name:
59 return None # no From means this message isn't from FreshCVS
60 cvs = name.find(" CVS")
61 if cvs == -1:
62 return None # this message isn't from FreshCVS
63 who = name[:cvs]
65 # we take the time of receipt as the time of checkin. Not correct,
66 # but it avoids the out-of-order-changes issue. See the comment in
67 # parseSyncmail about using the 'Date:' header
68 when = util.now()
70 files = []
71 comments = ""
72 isdir = 0
73 lines = list(body_line_iterator(m))
74 while lines:
75 line = lines.pop(0)
76 if line == "Modified files:\n":
77 break
78 while lines:
79 line = lines.pop(0)
80 if line == "\n":
81 break
82 line = line.rstrip("\n")
83 linebits = line.split(None, 1)
84 file = linebits[0]
85 if prefix:
86 # insist that the file start with the prefix: FreshCVS sends
87 # changes we don't care about too
88 if file.startswith(prefix):
89 file = file[len(prefix):]
90 else:
91 continue
92 if len(linebits) == 1:
93 isdir = 1
94 elif linebits[1] == "0 0":
95 isdir = 1
96 files.append(file)
97 while lines:
98 line = lines.pop(0)
99 if line == "Log message:\n":
100 break
101 # message is terminated by "ViewCVS links:" or "Index:..." (patch)
102 while lines:
103 line = lines.pop(0)
104 if line == "ViewCVS links:\n":
105 break
106 if line.find("Index: ") == 0:
107 break
108 comments += line
109 comments = comments.rstrip() + "\n"
111 if not files:
112 return None
114 change = changes.Change(who, files, comments, isdir, when=when)
116 return change
118 class SyncmailMaildirSource(MaildirSource):
119 name = "Syncmail"
121 def parse(self, m, prefix=None):
122 """Parse messages sent by the 'syncmail' program, as suggested by the
123 sourceforge.net CVS Admin documentation. Syncmail is maintained at
124 syncmail.sf.net .
126 # pretty much the same as freshcvs mail, not surprising since CVS is
127 # the one creating most of the text
129 # The mail is sent from the person doing the checkin. Assume that the
130 # local username is enough to identify them (this assumes a one-server
131 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS
132 # model)
133 name, addr = parseaddr(m["from"])
134 if not addr:
135 return None # no From means this message isn't from FreshCVS
136 at = addr.find("@")
137 if at == -1:
138 who = addr # might still be useful
139 else:
140 who = addr[:at]
142 # we take the time of receipt as the time of checkin. Not correct (it
143 # depends upon the email latency), but it avoids the
144 # out-of-order-changes issue. Also syncmail doesn't give us anything
145 # better to work with, unless you count pulling the v1-vs-v2
146 # timestamp out of the diffs, which would be ugly. TODO: Pulling the
147 # 'Date:' header from the mail is a possibility, and
148 # email.Utils.parsedate_tz may be useful. It should be configurable,
149 # however, because there are a lot of broken clocks out there.
150 when = util.now()
152 subject = m["subject"]
153 # syncmail puts the repository-relative directory in the subject:
154 # mprefix + "%(dir)s %(file)s,%(oldversion)s,%(newversion)s", where
155 # 'mprefix' is something that could be added by a mailing list
156 # manager.
157 # this is the only reasonable way to determine the directory name
158 space = subject.find(" ")
159 if space != -1:
160 directory = subject[:space]
161 else:
162 directory = subject
164 files = []
165 comments = ""
166 isdir = 0
167 branch = None
169 lines = list(body_line_iterator(m))
170 while lines:
171 line = lines.pop(0)
173 if (line == "Modified Files:\n" or
174 line == "Added Files:\n" or
175 line == "Removed Files:\n"):
176 break
178 while lines:
179 line = lines.pop(0)
180 if line == "\n":
181 break
182 if line == "Log Message:\n":
183 lines.insert(0, line)
184 break
185 line = line.lstrip()
186 line = line.rstrip()
187 # note: syncmail will send one email per directory involved in a
188 # commit, with multiple files if they were in the same directory.
189 # Unlike freshCVS, it makes no attempt to collect all related
190 # commits into a single message.
192 # note: syncmail will report a Tag underneath the ... Files: line
193 # e.g.: Tag: BRANCH-DEVEL
195 if line.startswith('Tag:'):
196 branch = line.split(' ')[-1].rstrip()
197 continue
199 thesefiles = line.split(" ")
200 for f in thesefiles:
201 f = directory + "/" + f
202 if prefix:
203 # insist that the file start with the prefix: we may get
204 # changes we don't care about too
205 if f.startswith(prefix):
206 f = f[len(prefix):]
207 else:
208 continue
209 break
210 # TODO: figure out how new directories are described, set
211 # .isdir
212 files.append(f)
214 if not files:
215 return None
217 while lines:
218 line = lines.pop(0)
219 if line == "Log Message:\n":
220 break
221 # message is terminated by "Index:..." (patch) or "--- NEW FILE.."
222 # or "--- filename DELETED ---". Sigh.
223 while lines:
224 line = lines.pop(0)
225 if line.find("Index: ") == 0:
226 break
227 if re.search(r"^--- NEW FILE", line):
228 break
229 if re.search(r" DELETED ---$", line):
230 break
231 comments += line
232 comments = comments.rstrip() + "\n"
234 change = changes.Change(who, files, comments, isdir, when=when,
235 branch=branch)
237 return change
239 # Bonsai mail parser by Stephen Davis.
241 # This handles changes for CVS repositories that are watched by Bonsai
242 # (http://www.mozilla.org/bonsai.html)
244 # A Bonsai-formatted email message looks like:
246 # C|1071099907|stephend|/cvs|Sources/Scripts/buildbot|bonsai.py|1.2|||18|7
247 # A|1071099907|stephend|/cvs|Sources/Scripts/buildbot|master.cfg|1.1|||18|7
248 # R|1071099907|stephend|/cvs|Sources/Scripts/buildbot|BuildMaster.py|||
249 # LOGCOMMENT
250 # Updated bonsai parser and switched master config to buildbot-0.4.1 style.
252 # :ENDLOGCOMMENT
254 # In the first example line, stephend is the user, /cvs the repository,
255 # buildbot the directory, bonsai.py the file, 1.2 the revision, no sticky
256 # and branch, 18 lines added and 7 removed. All of these fields might not be
257 # present (during "removes" for example).
259 # There may be multiple "control" lines or even none (imports, directory
260 # additions) but there is one email per directory. We only care about actual
261 # changes since it is presumed directory additions don't actually affect the
262 # build. At least one file should need to change (the makefile, say) to
263 # actually make a new directory part of the build process. That's my story
264 # and I'm sticking to it.
266 class BonsaiMaildirSource(MaildirSource):
267 name = "Bonsai"
269 def parse(self, m, prefix=None):
270 """Parse mail sent by the Bonsai cvs loginfo script."""
272 # we don't care who the email came from b/c the cvs user is in the
273 # msg text
275 who = "unknown"
276 timestamp = None
277 files = []
278 lines = list(body_line_iterator(m))
280 # read the control lines (what/who/where/file/etc.)
281 while lines:
282 line = lines.pop(0)
283 if line == "LOGCOMMENT\n":
284 break;
285 line = line.rstrip("\n")
287 # we'd like to do the following but it won't work if the number of
288 # items doesn't match so...
289 # what, timestamp, user, repo, module, file = line.split( '|' )
290 items = line.split('|')
291 if len(items) < 6:
292 # not a valid line, assume this isn't a bonsai message
293 return None
295 try:
296 # just grab the bottom-most timestamp, they're probably all the
297 # same. TODO: I'm assuming this is relative to the epoch, but
298 # this needs testing.
299 timestamp = int(items[1])
300 except ValueError:
301 pass
303 user = items[2]
304 if user:
305 who = user
307 module = items[4]
308 file = items[5]
309 if module and file:
310 path = "%s/%s" % (module, file)
311 files.append(path)
312 sticky = items[7]
313 branch = items[8]
315 # if no files changed, return nothing
316 if not files:
317 return None
319 # read the comments
320 comments = ""
321 while lines:
322 line = lines.pop(0)
323 if line == ":ENDLOGCOMMENT\n":
324 break
325 comments += line
326 comments = comments.rstrip() + "\n"
328 # return buildbot Change object
329 return changes.Change(who, files, comments, when=timestamp,
330 branch=branch)
332 # svn "commit-email.pl" handler. The format is very similar to freshcvs mail;
333 # here's a sample:
335 # From: username [at] apache.org [slightly obfuscated to avoid spam here]
336 # To: commits [at] spamassassin.apache.org
337 # Subject: svn commit: r105955 - in spamassassin/trunk: . lib/Mail
338 # ...
340 # Author: username
341 # Date: Sat Nov 20 00:17:49 2004 [note: TZ = local tz on server!]
342 # New Revision: 105955
344 # Modified: [also Removed: and Added:]
345 # [filename]
346 # ...
347 # Log:
348 # [log message]
349 # ...
352 # Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
353 # [unified diff]
355 # [end of mail]
357 class SVNCommitEmailMaildirSource(MaildirSource):
358 name = "SVN commit-email.pl"
360 def parse(self, m, prefix=None):
361 """Parse messages sent by the svn 'commit-email.pl' trigger.
364 # The mail is sent from the person doing the checkin. Assume that the
365 # local username is enough to identify them (this assumes a one-server
366 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS
367 # model)
368 name, addr = parseaddr(m["from"])
369 if not addr:
370 return None # no From means this message isn't from FreshCVS
371 at = addr.find("@")
372 if at == -1:
373 who = addr # might still be useful
374 else:
375 who = addr[:at]
377 # we take the time of receipt as the time of checkin. Not correct (it
378 # depends upon the email latency), but it avoids the
379 # out-of-order-changes issue. Also syncmail doesn't give us anything
380 # better to work with, unless you count pulling the v1-vs-v2
381 # timestamp out of the diffs, which would be ugly. TODO: Pulling the
382 # 'Date:' header from the mail is a possibility, and
383 # email.Utils.parsedate_tz may be useful. It should be configurable,
384 # however, because there are a lot of broken clocks out there.
385 when = util.now()
387 files = []
388 comments = ""
389 isdir = 0
390 lines = list(body_line_iterator(m))
391 rev = None
392 while lines:
393 line = lines.pop(0)
395 # "Author: jmason"
396 match = re.search(r"^Author: (\S+)", line)
397 if match:
398 who = match.group(1)
400 # "New Revision: 105955"
401 match = re.search(r"^New Revision: (\d+)", line)
402 if match:
403 rev = match.group(1)
405 # possible TODO: use "Date: ..." data here instead of time of
406 # commit message receipt, above. however, this timestamp is
407 # specified *without* a timezone, in the server's local TZ, so to
408 # be accurate buildbot would need a config setting to specify the
409 # source server's expected TZ setting! messy.
411 # this stanza ends with the "Log:"
412 if (line == "Log:\n"):
413 break
415 # commit message is terminated by the file-listing section
416 while lines:
417 line = lines.pop(0)
418 if (line == "Modified:\n" or
419 line == "Added:\n" or
420 line == "Removed:\n"):
421 break
422 comments += line
423 comments = comments.rstrip() + "\n"
425 while lines:
426 line = lines.pop(0)
427 if line == "\n":
428 break
429 if line.find("Modified:\n") == 0:
430 continue # ignore this line
431 if line.find("Added:\n") == 0:
432 continue # ignore this line
433 if line.find("Removed:\n") == 0:
434 continue # ignore this line
435 line = line.strip()
437 thesefiles = line.split(" ")
438 for f in thesefiles:
439 if prefix:
440 # insist that the file start with the prefix: we may get
441 # changes we don't care about too
442 if f.startswith(prefix):
443 f = f[len(prefix):]
444 else:
445 log.msg("ignored file from svn commit: prefix '%s' "
446 "does not match filename '%s'" % (prefix, f))
447 continue
449 # TODO: figure out how new directories are described, set
450 # .isdir
451 files.append(f)
453 if not files:
454 log.msg("no matching files found, ignoring commit")
455 return None
457 return changes.Change(who, files, comments, when=when, revision=rev)