1 # -*- test-case-name: buildbot.test.test_svnpoller -*-
3 # Based on the work of Dave Peticolas for the P4poll
4 # Changed to svn (using xml.dom.minidom) by Niklaus Giger
5 # Hacked beyond recognition by Brian Warner
7 from twisted
.python
import log
8 from twisted
.internet
import defer
, reactor
, utils
9 from twisted
.internet
.task
import LoopingCall
11 from buildbot
import util
12 from buildbot
.changes
import base
13 from buildbot
.changes
.changes
import Change
15 import xml
.dom
.minidom
18 def _assert(condition
, msg
):
21 raise AssertionError(msg
)
27 # these split_file_* functions are available for use as values to the
28 # split_file= argument.
29 def split_file_alwaystrunk(path
):
32 def split_file_branches(path
):
33 # turn trunk/subdir/file.c into (None, "subdir/file.c")
34 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c")
35 pieces
= path
.split('/')
36 if pieces
[0] == 'trunk':
37 return (None, '/'.join(pieces
[1:]))
38 elif pieces
[0] == 'branches':
39 return ('/'.join(pieces
[0:2]), '/'.join(pieces
[2:]))
44 class SVNPoller(base
.ChangeSource
, util
.ComparableMixin
):
45 """This source will poll a Subversion repository for changes and submit
46 them to the change master."""
48 compare_attrs
= ["svnurl", "split_file_function",
49 "svnuser", "svnpasswd",
50 "pollinterval", "histmax",
53 parent
= None # filled in when we're added
58 def __init__(self
, svnurl
, split_file
=None,
59 svnuser
=None, svnpasswd
=None,
60 pollinterval
=10*60, histmax
=100,
61 svnbin
='svn', revlinktmpl
=''):
64 @param svnurl: the SVN URL that describes the repository and
65 subdirectory to watch. If this ChangeSource should
66 only pay attention to a single branch, this should
67 point at the repository for that branch, like
68 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it
69 should follow multiple branches, point it at the
70 repository directory that contains all the branches
71 like svn://svn.twistedmatrix.com/svn/Twisted and also
72 provide a branch-determining function.
74 Each file in the repository has a SVN URL in the form
75 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be
76 empty or not, depending upon your branch-determining
77 function. Only files that start with (SVNURL)/(BRANCH)
78 will be monitored. The Change objects that are sent to
79 the Schedulers will see (FILEPATH) for each modified
82 @type split_file: callable or None
83 @param split_file: a function that is called with a string of the
84 form (BRANCH)/(FILEPATH) and should return a tuple
85 (BRANCH, FILEPATH). This function should match
86 your repository's branch-naming policy. Each
87 changed file has a fully-qualified URL that can be
88 split into a prefix (which equals the value of the
89 'svnurl' argument) and a suffix; it is this suffix
90 which is passed to the split_file function.
92 If the function returns None, the file is ignored.
93 Use this to indicate that the file is not a part
96 For example, if your repository puts the trunk in
97 trunk/... and branches are in places like
98 branches/1.5/..., your split_file function could
99 look like the following (this function is
100 available as svnpoller.split_file_branches)::
102 pieces = path.split('/')
103 if pieces[0] == 'trunk':
104 return (None, '/'.join(pieces[1:]))
105 elif pieces[0] == 'branches':
106 return ('/'.join(pieces[0:2]),
107 '/'.join(pieces[2:]))
111 If instead your repository layout puts the trunk
112 for ProjectA in trunk/ProjectA/... and the 1.5
113 branch in branches/1.5/ProjectA/..., your
114 split_file function could look like::
116 pieces = path.split('/')
117 if pieces[0] == 'trunk':
119 pieces.pop(0) # remove 'trunk'
120 elif pieces[0] == 'branches':
121 pieces.pop(0) # remove 'branches'
123 branch = 'branches/' + pieces.pop(0)
125 return None # something weird
126 projectname = pieces.pop(0)
127 if projectname != 'ProjectA':
128 return None # wrong project
129 return (branch, '/'.join(pieces))
131 The default of split_file= is None, which
132 indicates that no splitting should be done. This
133 is equivalent to the following function::
137 If you wish, you can override the split_file
138 method with the same sort of function instead of
139 passing in a split_file= argument.
142 @type svnuser: string
143 @param svnuser: If set, the --username option will be added to
144 the 'svn log' command. You may need this to get
145 access to a private repository.
146 @type svnpasswd: string
147 @param svnpasswd: If set, the --password option will be added.
149 @type pollinterval: int
150 @param pollinterval: interval in seconds between polls. The default
151 is 600 seconds (10 minutes). Smaller values
152 decrease the latency between the time a change
153 is recorded and the time the buildbot notices
154 it, but it also increases the system load.
157 @param histmax: maximum number of changes to look back through.
158 The default is 100. Smaller values decrease
159 system load, but if more than histmax changes
160 are recorded between polls, the extra ones will
164 @param svnbin: path to svn binary, defaults to just 'svn'. Use
165 this if your subversion command lives in an
168 @type revlinktmpl: string
169 @param revlinktmpl: A format string to use for hyperlinks to revision
170 information. For example, setting this to
171 "http://reposerver/websvn/revision.php?rev=%s"
172 would create suitable links on the build pages
173 to information in websvn on each revision.
176 if svnurl
.endswith("/"):
177 svnurl
= svnurl
[:-1] # strip the trailing slash
179 self
.split_file_function
= split_file
or split_file_alwaystrunk
180 self
.svnuser
= svnuser
181 self
.svnpasswd
= svnpasswd
183 self
.revlinktmpl
= revlinktmpl
186 self
.pollinterval
= pollinterval
187 self
.histmax
= histmax
189 self
.overrun_counter
= 0
190 self
.loop
= LoopingCall(self
.checksvn
)
192 def split_file(self
, path
):
193 # use getattr() to avoid turning this function into a bound method,
194 # which would require it to have an extra 'self' argument
195 f
= getattr(self
, "split_file_function")
198 def startService(self
):
199 log
.msg("SVNPoller(%s) starting" % self
.svnurl
)
200 base
.ChangeSource
.startService(self
)
201 # Don't start the loop just yet because the reactor isn't running.
202 # Give it a chance to go and install our SIGCHLD handler before
203 # spawning processes.
204 reactor
.callLater(0, self
.loop
.start
, self
.pollinterval
)
206 def stopService(self
):
207 log
.msg("SVNPoller(%s) shutting down" % self
.svnurl
)
209 return base
.ChangeSource
.stopService(self
)
212 return "SVNPoller watching %s" % self
.svnurl
215 # Our return value is only used for unit testing.
217 # we need to figure out the repository root, so we can figure out
218 # repository-relative pathnames later. Each SVNURL is in the form
219 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something
220 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a
221 # physical repository at /svn/Twisted on that host), (PROJECT) is
222 # something like Projects/Twisted (i.e. within the repository's
223 # internal namespace, everything under Projects/Twisted/ has
224 # something to do with Twisted, but these directory names do not
225 # actually appear on the repository host), (BRANCH) is something like
226 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative
227 # filename like "twisted/internet/defer.py".
229 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined
230 # together in a way that we can't separate without svn's help. If the
231 # user is not using the split_file= argument, then self.svnurl might
232 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will
233 # get back from 'svn log' will be of the form
234 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove
235 # that (PROJECT) prefix from them. To do this without requiring the
236 # user to tell us how svnurl is split into ROOT and PROJECT, we do an
237 # 'svn info --xml' command at startup. This command will include a
238 # <root> element that tells us ROOT. We then strip this prefix from
239 # self.svnurl to determine PROJECT, and then later we strip the
240 # PROJECT prefix from the filenames reported by 'svn log --xml' to
241 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to
242 # turn into separate BRANCH and FILEPATH values.
247 log
.msg("SVNPoller(%s) overrun: timer fired but the previous "
248 "poll had not yet finished." % self
.svnurl
)
249 self
.overrun_counter
+= 1
250 return defer
.succeed(None)
253 log
.msg("SVNPoller polling")
255 # this sets self._prefix when it finishes. It fires with
256 # self._prefix as well, because that makes the unit tests easier
259 d
.addCallback(self
.determine_prefix
)
261 d
= defer
.succeed(self
._prefix
)
263 d
.addCallback(self
.get_logs
)
264 d
.addCallback(self
.parse_logs
)
265 d
.addCallback(self
.get_new_logentries
)
266 d
.addCallback(self
.create_changes
)
267 d
.addCallback(self
.submit_changes
)
268 d
.addCallbacks(self
.finished_ok
, self
.finished_failure
)
271 def getProcessOutput(self
, args
):
272 # this exists so we can override it during the unit tests
273 d
= utils
.getProcessOutput(self
.svnbin
, args
, {})
277 args
= ["info", "--xml", "--non-interactive", self
.svnurl
]
279 args
.extend(["--username=%s" % self
.svnuser
])
281 args
.extend(["--password=%s" % self
.svnpasswd
])
282 d
= self
.getProcessOutput(args
)
285 def determine_prefix(self
, output
):
287 doc
= xml
.dom
.minidom
.parseString(output
)
288 except xml
.parsers
.expat
.ExpatError
:
289 dbgMsg("_process_changes: ExpatError in %s" % output
)
290 log
.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'"
293 rootnodes
= doc
.getElementsByTagName("root")
295 # this happens if the URL we gave was already the root. In this
296 # case, our prefix is empty.
299 rootnode
= rootnodes
[0]
300 root
= "".join([c
.data
for c
in rootnode
.childNodes
])
301 # root will be a unicode string
302 _assert(self
.svnurl
.startswith(root
),
303 "svnurl='%s' doesn't start with <root>='%s'" %
305 self
._prefix
= self
.svnurl
[len(root
):]
306 if self
._prefix
.startswith("/"):
307 self
._prefix
= self
._prefix
[1:]
308 log
.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" %
309 (self
.svnurl
, root
, self
._prefix
))
312 def get_logs(self
, ignored_prefix
=None):
314 args
.extend(["log", "--xml", "--verbose", "--non-interactive"])
316 args
.extend(["--username=%s" % self
.svnuser
])
318 args
.extend(["--password=%s" % self
.svnpasswd
])
319 args
.extend(["--limit=%d" % (self
.histmax
), self
.svnurl
])
320 d
= self
.getProcessOutput(args
)
323 def parse_logs(self
, output
):
324 # parse the XML output, return a list of <logentry> nodes
326 doc
= xml
.dom
.minidom
.parseString(output
)
327 except xml
.parsers
.expat
.ExpatError
:
328 dbgMsg("_process_changes: ExpatError in %s" % output
)
329 log
.msg("SVNPoller._parse_changes: ExpatError in '%s'" % output
)
331 logentries
= doc
.getElementsByTagName("logentry")
335 def _filter_new_logentries(self
, logentries
, last_change
):
336 # given a list of logentries, return a tuple of (new_last_change,
337 # new_logentries), where new_logentries contains only the ones after
340 # no entries, so last_change must stay at None
343 mostRecent
= int(logentries
[0].getAttribute("revision"))
345 if last_change
is None:
346 # if this is the first time we've been run, ignore any changes
347 # that occurred before now. This prevents a build at every
349 log
.msg('svnPoller: starting at change %s' % mostRecent
)
350 return (mostRecent
, [])
352 if last_change
== mostRecent
:
353 # an unmodified repository will hit this case
354 log
.msg('svnPoller: _process_changes last %s mostRecent %s' % (
355 last_change
, mostRecent
))
356 return (mostRecent
, [])
359 for el
in logentries
:
360 if last_change
== int(el
.getAttribute("revision")):
362 new_logentries
.append(el
)
363 new_logentries
.reverse() # return oldest first
364 return (mostRecent
, new_logentries
)
366 def get_new_logentries(self
, logentries
):
367 last_change
= self
.last_change
369 new_logentries
) = self
._filter
_new
_logentries
(logentries
,
371 self
.last_change
= new_last_change
372 log
.msg('svnPoller: _process_changes %s .. %s' %
373 (last_change
, new_last_change
))
374 return new_logentries
377 def _get_text(self
, element
, tag_name
):
379 child_nodes
= element
.getElementsByTagName(tag_name
)[0].childNodes
380 text
= "".join([t
.data
for t
in child_nodes
])
385 def _transform_path(self
, path
):
386 _assert(path
.startswith(self
._prefix
),
387 "filepath '%s' should start with prefix '%s'" %
388 (path
, self
._prefix
))
389 relative_path
= path
[len(self
._prefix
):]
390 if relative_path
.startswith("/"):
391 relative_path
= relative_path
[1:]
392 where
= self
.split_file(relative_path
)
393 # 'where' is either None or (branch, final_path)
396 def create_changes(self
, new_logentries
):
399 for el
in new_logentries
:
400 branch_files
= [] # get oldest change first
401 revision
= str(el
.getAttribute("revision"))
407 revlink
= self
.revlinktmpl
% urllib
.quote_plus(revision
)
409 dbgMsg("Adding change revision %s" % (revision
,))
410 # TODO: the rest of buildbot may not be ready for unicode 'who'
412 author
= self
._get
_text
(el
, "author")
413 comments
= self
._get
_text
(el
, "msg")
414 # there is a "date" field, but it provides localtime in the
415 # repository's timezone, whereas we care about buildmaster's
416 # localtime (since this will get used to position the boxes on
417 # the Waterfall display, etc). So ignore the date field and use
418 # our local clock instead.
419 #when = self._get_text(el, "date")
420 #when = time.mktime(time.strptime("%.19s" % when,
421 # "%Y-%m-%dT%H:%M:%S"))
423 pathlist
= el
.getElementsByTagName("paths")[0]
424 for p
in pathlist
.getElementsByTagName("path"):
425 action
= p
.getAttribute("action")
426 path
= "".join([t
.data
for t
in p
.childNodes
])
427 # the rest of buildbot is certaily not yet ready to handle
428 # unicode filenames, because they get put in RemoteCommands
429 # which get sent via PB to the buildslave, and PB doesn't
431 path
= path
.encode("ascii")
432 if path
.startswith("/"):
434 where
= self
._transform
_path
(path
)
436 # if 'where' is None, the file was outside any project that
437 # we care about and we should ignore it
439 branch
, filename
= where
440 if not branch
in branches
:
441 branches
[branch
] = { 'files': []}
442 branches
[branch
]['files'].append(filename
)
444 if not branches
[branch
].has_key('action'):
445 branches
[branch
]['action'] = action
447 for branch
in branches
.keys():
448 action
= branches
[branch
]['action']
449 files
= branches
[branch
]['files']
450 number_of_files_changed
= len(files
)
452 if action
== u
'D' and number_of_files_changed
== 1 and files
[0] == '':
453 log
.msg("Ignoring deletion of branch '%s'" % branch
)
455 c
= Change(who
=author
,
465 def submit_changes(self
, changes
):
467 self
.parent
.addChange(c
)
469 def finished_ok(self
, res
):
470 log
.msg("SVNPoller finished polling")
471 dbgMsg('_finished : %s' % res
)
476 def finished_failure(self
, f
):
477 log
.msg("SVNPoller failed")
478 dbgMsg('_finished : %s' % f
)
481 return None # eat the failure