rename old extra_args in commands.py to svn_args to avoid confusion
[buildbot.git] / buildbot / changes / svnpoller.py
blobe31b60646458dd17a185ba283a441b40a699005d
1 # -*- test-case-name: buildbot.test.test_svnpoller -*-
3 # Based on the work of Dave Peticolas for the P4poll
4 # Changed to svn (using xml.dom.minidom) by Niklaus Giger
5 # Hacked beyond recognition by Brian Warner
7 from twisted.python import log
8 from twisted.internet import defer, reactor, utils
9 from twisted.internet.task import LoopingCall
11 from buildbot import util
12 from buildbot.changes import base
13 from buildbot.changes.changes import Change
15 import xml.dom.minidom
16 import urllib
18 def _assert(condition, msg):
19 if condition:
20 return True
21 raise AssertionError(msg)
23 def dbgMsg(myString):
24 log.msg(myString)
25 return 1
27 # these split_file_* functions are available for use as values to the
28 # split_file= argument.
29 def split_file_alwaystrunk(path):
30 return (None, path)
32 def split_file_branches(path):
33 # turn trunk/subdir/file.c into (None, "subdir/file.c")
34 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c")
35 pieces = path.split('/')
36 if pieces[0] == 'trunk':
37 return (None, '/'.join(pieces[1:]))
38 elif pieces[0] == 'branches':
39 return ('/'.join(pieces[0:2]), '/'.join(pieces[2:]))
40 else:
41 return None
44 class SVNPoller(base.ChangeSource, util.ComparableMixin):
45 """This source will poll a Subversion repository for changes and submit
46 them to the change master."""
48 compare_attrs = ["svnurl", "split_file_function",
49 "svnuser", "svnpasswd",
50 "pollinterval", "histmax",
51 "svnbin"]
53 parent = None # filled in when we're added
54 last_change = None
55 loop = None
56 working = False
58 def __init__(self, svnurl, split_file=None,
59 svnuser=None, svnpasswd=None,
60 pollinterval=10*60, histmax=100,
61 svnbin='svn', revlinktmpl=''):
62 """
63 @type svnurl: string
64 @param svnurl: the SVN URL that describes the repository and
65 subdirectory to watch. If this ChangeSource should
66 only pay attention to a single branch, this should
67 point at the repository for that branch, like
68 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it
69 should follow multiple branches, point it at the
70 repository directory that contains all the branches
71 like svn://svn.twistedmatrix.com/svn/Twisted and also
72 provide a branch-determining function.
74 Each file in the repository has a SVN URL in the form
75 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be
76 empty or not, depending upon your branch-determining
77 function. Only files that start with (SVNURL)/(BRANCH)
78 will be monitored. The Change objects that are sent to
79 the Schedulers will see (FILEPATH) for each modified
80 file.
82 @type split_file: callable or None
83 @param split_file: a function that is called with a string of the
84 form (BRANCH)/(FILEPATH) and should return a tuple
85 (BRANCH, FILEPATH). This function should match
86 your repository's branch-naming policy. Each
87 changed file has a fully-qualified URL that can be
88 split into a prefix (which equals the value of the
89 'svnurl' argument) and a suffix; it is this suffix
90 which is passed to the split_file function.
92 If the function returns None, the file is ignored.
93 Use this to indicate that the file is not a part
94 of this project.
96 For example, if your repository puts the trunk in
97 trunk/... and branches are in places like
98 branches/1.5/..., your split_file function could
99 look like the following (this function is
100 available as svnpoller.split_file_branches)::
102 pieces = path.split('/')
103 if pieces[0] == 'trunk':
104 return (None, '/'.join(pieces[1:]))
105 elif pieces[0] == 'branches':
106 return ('/'.join(pieces[0:2]),
107 '/'.join(pieces[2:]))
108 else:
109 return None
111 If instead your repository layout puts the trunk
112 for ProjectA in trunk/ProjectA/... and the 1.5
113 branch in branches/1.5/ProjectA/..., your
114 split_file function could look like::
116 pieces = path.split('/')
117 if pieces[0] == 'trunk':
118 branch = None
119 pieces.pop(0) # remove 'trunk'
120 elif pieces[0] == 'branches':
121 pieces.pop(0) # remove 'branches'
122 # grab branch name
123 branch = 'branches/' + pieces.pop(0)
124 else:
125 return None # something weird
126 projectname = pieces.pop(0)
127 if projectname != 'ProjectA':
128 return None # wrong project
129 return (branch, '/'.join(pieces))
131 The default of split_file= is None, which
132 indicates that no splitting should be done. This
133 is equivalent to the following function::
135 return (None, path)
137 If you wish, you can override the split_file
138 method with the same sort of function instead of
139 passing in a split_file= argument.
142 @type svnuser: string
143 @param svnuser: If set, the --username option will be added to
144 the 'svn log' command. You may need this to get
145 access to a private repository.
146 @type svnpasswd: string
147 @param svnpasswd: If set, the --password option will be added.
149 @type pollinterval: int
150 @param pollinterval: interval in seconds between polls. The default
151 is 600 seconds (10 minutes). Smaller values
152 decrease the latency between the time a change
153 is recorded and the time the buildbot notices
154 it, but it also increases the system load.
156 @type histmax: int
157 @param histmax: maximum number of changes to look back through.
158 The default is 100. Smaller values decrease
159 system load, but if more than histmax changes
160 are recorded between polls, the extra ones will
161 be silently lost.
163 @type svnbin: string
164 @param svnbin: path to svn binary, defaults to just 'svn'. Use
165 this if your subversion command lives in an
166 unusual location.
168 @type revlinktmpl: string
169 @param revlinktmpl: A format string to use for hyperlinks to revision
170 information. For example, setting this to
171 "http://reposerver/websvn/revision.php?rev=%s"
172 would create suitable links on the build pages
173 to information in websvn on each revision.
176 if svnurl.endswith("/"):
177 svnurl = svnurl[:-1] # strip the trailing slash
178 self.svnurl = svnurl
179 self.split_file_function = split_file or split_file_alwaystrunk
180 self.svnuser = svnuser
181 self.svnpasswd = svnpasswd
183 self.revlinktmpl = revlinktmpl
185 self.svnbin = svnbin
186 self.pollinterval = pollinterval
187 self.histmax = histmax
188 self._prefix = None
189 self.overrun_counter = 0
190 self.loop = LoopingCall(self.checksvn)
192 def split_file(self, path):
193 # use getattr() to avoid turning this function into a bound method,
194 # which would require it to have an extra 'self' argument
195 f = getattr(self, "split_file_function")
196 return f(path)
198 def startService(self):
199 log.msg("SVNPoller(%s) starting" % self.svnurl)
200 base.ChangeSource.startService(self)
201 # Don't start the loop just yet because the reactor isn't running.
202 # Give it a chance to go and install our SIGCHLD handler before
203 # spawning processes.
204 reactor.callLater(0, self.loop.start, self.pollinterval)
206 def stopService(self):
207 log.msg("SVNPoller(%s) shutting down" % self.svnurl)
208 self.loop.stop()
209 return base.ChangeSource.stopService(self)
211 def describe(self):
212 return "SVNPoller watching %s" % self.svnurl
214 def checksvn(self):
215 # Our return value is only used for unit testing.
217 # we need to figure out the repository root, so we can figure out
218 # repository-relative pathnames later. Each SVNURL is in the form
219 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something
220 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a
221 # physical repository at /svn/Twisted on that host), (PROJECT) is
222 # something like Projects/Twisted (i.e. within the repository's
223 # internal namespace, everything under Projects/Twisted/ has
224 # something to do with Twisted, but these directory names do not
225 # actually appear on the repository host), (BRANCH) is something like
226 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative
227 # filename like "twisted/internet/defer.py".
229 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined
230 # together in a way that we can't separate without svn's help. If the
231 # user is not using the split_file= argument, then self.svnurl might
232 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will
233 # get back from 'svn log' will be of the form
234 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove
235 # that (PROJECT) prefix from them. To do this without requiring the
236 # user to tell us how svnurl is split into ROOT and PROJECT, we do an
237 # 'svn info --xml' command at startup. This command will include a
238 # <root> element that tells us ROOT. We then strip this prefix from
239 # self.svnurl to determine PROJECT, and then later we strip the
240 # PROJECT prefix from the filenames reported by 'svn log --xml' to
241 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to
242 # turn into separate BRANCH and FILEPATH values.
244 # whew.
246 if self.working:
247 log.msg("SVNPoller(%s) overrun: timer fired but the previous "
248 "poll had not yet finished." % self.svnurl)
249 self.overrun_counter += 1
250 return defer.succeed(None)
251 self.working = True
253 log.msg("SVNPoller polling")
254 if not self._prefix:
255 # this sets self._prefix when it finishes. It fires with
256 # self._prefix as well, because that makes the unit tests easier
257 # to write.
258 d = self.get_root()
259 d.addCallback(self.determine_prefix)
260 else:
261 d = defer.succeed(self._prefix)
263 d.addCallback(self.get_logs)
264 d.addCallback(self.parse_logs)
265 d.addCallback(self.get_new_logentries)
266 d.addCallback(self.create_changes)
267 d.addCallback(self.submit_changes)
268 d.addCallbacks(self.finished_ok, self.finished_failure)
269 return d
271 def getProcessOutput(self, args):
272 # this exists so we can override it during the unit tests
273 d = utils.getProcessOutput(self.svnbin, args, {})
274 return d
276 def get_root(self):
277 args = ["info", "--xml", "--non-interactive", self.svnurl]
278 if self.svnuser:
279 args.extend(["--username=%s" % self.svnuser])
280 if self.svnpasswd:
281 args.extend(["--password=%s" % self.svnpasswd])
282 d = self.getProcessOutput(args)
283 return d
285 def determine_prefix(self, output):
286 try:
287 doc = xml.dom.minidom.parseString(output)
288 except xml.parsers.expat.ExpatError:
289 dbgMsg("_process_changes: ExpatError in %s" % output)
290 log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'"
291 % output)
292 raise
293 rootnodes = doc.getElementsByTagName("root")
294 if not rootnodes:
295 # this happens if the URL we gave was already the root. In this
296 # case, our prefix is empty.
297 self._prefix = ""
298 return self._prefix
299 rootnode = rootnodes[0]
300 root = "".join([c.data for c in rootnode.childNodes])
301 # root will be a unicode string
302 _assert(self.svnurl.startswith(root),
303 "svnurl='%s' doesn't start with <root>='%s'" %
304 (self.svnurl, root))
305 self._prefix = self.svnurl[len(root):]
306 if self._prefix.startswith("/"):
307 self._prefix = self._prefix[1:]
308 log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" %
309 (self.svnurl, root, self._prefix))
310 return self._prefix
312 def get_logs(self, ignored_prefix=None):
313 args = []
314 args.extend(["log", "--xml", "--verbose", "--non-interactive"])
315 if self.svnuser:
316 args.extend(["--username=%s" % self.svnuser])
317 if self.svnpasswd:
318 args.extend(["--password=%s" % self.svnpasswd])
319 args.extend(["--limit=%d" % (self.histmax), self.svnurl])
320 d = self.getProcessOutput(args)
321 return d
323 def parse_logs(self, output):
324 # parse the XML output, return a list of <logentry> nodes
325 try:
326 doc = xml.dom.minidom.parseString(output)
327 except xml.parsers.expat.ExpatError:
328 dbgMsg("_process_changes: ExpatError in %s" % output)
329 log.msg("SVNPoller._parse_changes: ExpatError in '%s'" % output)
330 raise
331 logentries = doc.getElementsByTagName("logentry")
332 return logentries
335 def _filter_new_logentries(self, logentries, last_change):
336 # given a list of logentries, return a tuple of (new_last_change,
337 # new_logentries), where new_logentries contains only the ones after
338 # last_change
339 if not logentries:
340 # no entries, so last_change must stay at None
341 return (None, [])
343 mostRecent = int(logentries[0].getAttribute("revision"))
345 if last_change is None:
346 # if this is the first time we've been run, ignore any changes
347 # that occurred before now. This prevents a build at every
348 # startup.
349 log.msg('svnPoller: starting at change %s' % mostRecent)
350 return (mostRecent, [])
352 if last_change == mostRecent:
353 # an unmodified repository will hit this case
354 log.msg('svnPoller: _process_changes last %s mostRecent %s' % (
355 last_change, mostRecent))
356 return (mostRecent, [])
358 new_logentries = []
359 for el in logentries:
360 if last_change == int(el.getAttribute("revision")):
361 break
362 new_logentries.append(el)
363 new_logentries.reverse() # return oldest first
364 return (mostRecent, new_logentries)
366 def get_new_logentries(self, logentries):
367 last_change = self.last_change
368 (new_last_change,
369 new_logentries) = self._filter_new_logentries(logentries,
370 self.last_change)
371 self.last_change = new_last_change
372 log.msg('svnPoller: _process_changes %s .. %s' %
373 (last_change, new_last_change))
374 return new_logentries
377 def _get_text(self, element, tag_name):
378 try:
379 child_nodes = element.getElementsByTagName(tag_name)[0].childNodes
380 text = "".join([t.data for t in child_nodes])
381 except:
382 text = "<unknown>"
383 return text
385 def _transform_path(self, path):
386 _assert(path.startswith(self._prefix),
387 "filepath '%s' should start with prefix '%s'" %
388 (path, self._prefix))
389 relative_path = path[len(self._prefix):]
390 if relative_path.startswith("/"):
391 relative_path = relative_path[1:]
392 where = self.split_file(relative_path)
393 # 'where' is either None or (branch, final_path)
394 return where
396 def create_changes(self, new_logentries):
397 changes = []
399 for el in new_logentries:
400 branch_files = [] # get oldest change first
401 revision = str(el.getAttribute("revision"))
403 revlink=''
405 if self.revlinktmpl:
406 if revision:
407 revlink = self.revlinktmpl % urllib.quote_plus(revision)
409 dbgMsg("Adding change revision %s" % (revision,))
410 # TODO: the rest of buildbot may not be ready for unicode 'who'
411 # values
412 author = self._get_text(el, "author")
413 comments = self._get_text(el, "msg")
414 # there is a "date" field, but it provides localtime in the
415 # repository's timezone, whereas we care about buildmaster's
416 # localtime (since this will get used to position the boxes on
417 # the Waterfall display, etc). So ignore the date field and use
418 # our local clock instead.
419 #when = self._get_text(el, "date")
420 #when = time.mktime(time.strptime("%.19s" % when,
421 # "%Y-%m-%dT%H:%M:%S"))
422 branches = {}
423 pathlist = el.getElementsByTagName("paths")[0]
424 for p in pathlist.getElementsByTagName("path"):
425 action = p.getAttribute("action")
426 path = "".join([t.data for t in p.childNodes])
427 # the rest of buildbot is certaily not yet ready to handle
428 # unicode filenames, because they get put in RemoteCommands
429 # which get sent via PB to the buildslave, and PB doesn't
430 # handle unicode.
431 path = path.encode("ascii")
432 if path.startswith("/"):
433 path = path[1:]
434 where = self._transform_path(path)
436 # if 'where' is None, the file was outside any project that
437 # we care about and we should ignore it
438 if where:
439 branch, filename = where
440 if not branch in branches:
441 branches[branch] = { 'files': []}
442 branches[branch]['files'].append(filename)
444 if not branches[branch].has_key('action'):
445 branches[branch]['action'] = action
447 for branch in branches.keys():
448 action = branches[branch]['action']
449 files = branches[branch]['files']
450 number_of_files_changed = len(files)
452 if action == u'D' and number_of_files_changed == 1 and files[0] == '':
453 log.msg("Ignoring deletion of branch '%s'" % branch)
454 else:
455 c = Change(who=author,
456 files=files,
457 comments=comments,
458 revision=revision,
459 branch=branch,
460 revlink=revlink)
461 changes.append(c)
463 return changes
465 def submit_changes(self, changes):
466 for c in changes:
467 self.parent.addChange(c)
469 def finished_ok(self, res):
470 log.msg("SVNPoller finished polling")
471 dbgMsg('_finished : %s' % res)
472 assert self.working
473 self.working = False
474 return res
476 def finished_failure(self, f):
477 log.msg("SVNPoller failed")
478 dbgMsg('_finished : %s' % f)
479 assert self.working
480 self.working = False
481 return None # eat the failure