6 from twisted
.spread
import pb
7 from twisted
.python
import log
8 from twisted
.internet
import reactor
, defer
9 from twisted
.application
import service
, internet
10 from twisted
.cred
import credentials
12 from buildbot
.util
import now
13 from buildbot
.pbutil
import ReconnectingPBClientFactory
14 from buildbot
.slave
import registry
15 # make sure the standard commands get registered. This import is performed
16 # for its side-effects.
17 from buildbot
.slave
import commands
18 # and make pyflakes think we aren't being stupid
21 class NoCommandRunning(pb
.Error
):
23 class WrongCommandRunning(pb
.Error
):
25 class UnknownCommand(pb
.Error
):
29 def __init__(self
, host
, port
, username
, password
):
32 self
.username
= username
33 self
.password
= password
37 """This is an object that can hold state from one step to another in the
38 same build. All SlaveCommands have access to it.
40 def __init__(self
, builder
):
41 self
.builder
= builder
43 class SlaveBuilder(pb
.Referenceable
, service
.Service
):
45 """This is the local representation of a single Builder: it handles a
46 single kind of build (like an all-warnings build). It has a name and a
47 home directory. The rest of its behavior is determined by the master.
50 stopCommandOnShutdown
= True
52 # remote is a ref to the Builder object on the master side, and is set
53 # when they attach. We use it to detect when the connection to the master
57 # .build points to a SlaveBuild object, a new one for each build
60 # .command points to a SlaveCommand instance, and is set while the step
61 # is running. We use it to implement the stopBuild method.
64 # .remoteStep is a ref to the master-side BuildStep object, and is set
65 # when the step is started
68 def __init__(self
, name
, not_really
):
69 #service.Service.__init__(self) # Service has no __init__ method
71 self
.not_really
= not_really
74 return "<SlaveBuilder '%s' at %d>" % (self
.name
, id(self
))
76 def setServiceParent(self
, parent
):
77 service
.Service
.setServiceParent(self
, parent
)
78 self
.bot
= self
.parent
79 # note that self.parent will go away when the buildmaster's config
80 # file changes and this Builder is removed (possibly because it has
81 # been changed, so the Builder will be re-added again in a moment).
82 # This may occur during a build, while a step is running.
84 def setBuilddir(self
, builddir
):
86 self
.builddir
= builddir
87 self
.basedir
= os
.path
.join(self
.bot
.basedir
, self
.builddir
)
88 if not os
.path
.isdir(self
.basedir
):
89 os
.mkdir(self
.basedir
)
91 def stopService(self
):
92 service
.Service
.stopService(self
)
93 if self
.stopCommandOnShutdown
:
99 buildslave
= bot
.parent
104 def remote_setMaster(self
, remote
):
106 self
.remote
.notifyOnDisconnect(self
.lostRemote
)
107 def remote_print(self
, message
):
108 log
.msg("SlaveBuilder.remote_print(%s): message from master: %s" %
109 (self
.name
, message
))
110 if message
== "ping":
111 return self
.remote_ping()
113 def remote_ping(self
):
114 log
.msg("SlaveBuilder.remote_ping(%s)" % self
)
115 if self
.bot
and self
.bot
.parent
:
116 debugOpts
= self
.bot
.parent
.debugOpts
117 if debugOpts
.get("stallPings"):
118 log
.msg(" debug_stallPings")
119 timeout
, timers
= debugOpts
["stallPings"]
121 t
= reactor
.callLater(timeout
, d
.callback
, None)
124 if debugOpts
.get("failPingOnce"):
125 log
.msg(" debug_failPingOnce")
126 class FailPingError(pb
.Error
): pass
127 del debugOpts
['failPingOnce']
128 raise FailPingError("debug_failPingOnce means we should fail")
130 def lostRemote(self
, remote
):
131 log
.msg("lost remote")
134 def lostRemoteStep(self
, remotestep
):
135 log
.msg("lost remote step")
136 self
.remoteStep
= None
137 if self
.stopCommandOnShutdown
:
140 # the following are Commands that can be invoked by the master-side
142 def remote_startBuild(self
):
143 """This is invoked before the first step of any new build is run. It
144 creates a new SlaveBuild object, which holds slave-side state from
145 one step to the next."""
146 self
.build
= SlaveBuild(self
)
147 log
.msg("%s.startBuild" % self
)
149 def remote_startCommand(self
, stepref
, stepId
, command
, args
):
151 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as
152 part of various master-side BuildSteps, to start various commands
153 that actually do the build. I return nothing. Eventually I will call
154 .commandComplete() to notify the master-side RemoteCommand that I'm
161 log
.msg("leftover command, dropping it")
165 factory
, version
= registry
.commandRegistry
[command
]
167 raise UnknownCommand
, "unrecognized SlaveCommand '%s'" % command
168 self
.command
= factory(self
, stepId
, args
)
170 log
.msg(" startCommand:%s [id %s]" % (command
,stepId
))
171 self
.remoteStep
= stepref
172 self
.remoteStep
.notifyOnDisconnect(self
.lostRemoteStep
)
173 d
= self
.command
.doStart()
174 d
.addCallback(lambda res
: None)
175 d
.addBoth(self
.commandComplete
)
178 def remote_interruptCommand(self
, stepId
, why
):
179 """Halt the current step."""
180 log
.msg("asked to interrupt current command: %s" % why
)
183 # TODO: just log it, a race could result in their interrupting a
184 # command that wasn't actually running
185 log
.msg(" .. but none was running")
187 self
.command
.doInterrupt()
190 def stopCommand(self
):
191 """Make any currently-running command die, with no further status
192 output. This is used when the buildslave is shutting down or the
193 connection to the master has been lost. Interrupt the command,
194 silence it, and then forget about it."""
197 log
.msg("stopCommand: halting current command %s" % self
.command
)
198 self
.command
.doInterrupt() # shut up! and die!
199 self
.command
= None # forget you!
201 # sendUpdate is invoked by the Commands we spawn
202 def sendUpdate(self
, data
):
203 """This sends the status update to the master-side
204 L{buildbot.process.step.RemoteCommand} object, giving it a sequence
205 number in the process. It adds the update to a queue, and asks the
206 master to acknowledge the update so it can be removed from that
210 # .running comes from service.Service, and says whether the
211 # service is running or not. If we aren't running, don't send any
214 # the update[1]=0 comes from the leftover 'updateNum', which the
215 # master still expects to receive. Provide it to avoid significant
216 # interoperability issues between new slaves and old masters.
220 d
= self
.remoteStep
.callRemote("update", updates
)
221 d
.addCallback(self
.ackUpdate
)
222 d
.addErrback(self
._ackFailed
, "SlaveBuilder.sendUpdate")
224 def ackUpdate(self
, acknum
):
225 self
.activity() # update the "last activity" timer
227 def ackComplete(self
, dummy
):
228 self
.activity() # update the "last activity" timer
230 def _ackFailed(self
, why
, where
):
231 log
.msg("SlaveBuilder._ackFailed:", where
)
232 #log.err(why) # we don't really care
235 # this is fired by the Deferred attached to each Command
236 def commandComplete(self
, failure
):
238 log
.msg("SlaveBuilder.commandFailed", self
.command
)
240 # failure, if present, is a failure.Failure. To send it across
241 # the wire, we must turn it into a pb.CopyableFailure.
242 failure
= pb
.CopyableFailure(failure
)
243 failure
.unsafeTracebacks
= True
246 log
.msg("SlaveBuilder.commandComplete", self
.command
)
249 log
.msg(" but we weren't running, quitting silently")
252 self
.remoteStep
.dontNotifyOnDisconnect(self
.lostRemoteStep
)
253 d
= self
.remoteStep
.callRemote("complete", failure
)
254 d
.addCallback(self
.ackComplete
)
255 d
.addErrback(self
._ackFailed
, "sendComplete")
256 self
.remoteStep
= None
259 def remote_shutdown(self
):
260 print "slave shutting down on command from master"
264 class Bot(pb
.Referenceable
, service
.MultiService
):
265 """I represent the slave-side bot."""
269 def __init__(self
, basedir
, usePTY
, not_really
=0):
270 service
.MultiService
.__init
__(self
)
271 self
.basedir
= basedir
273 self
.not_really
= not_really
276 def startService(self
):
277 assert os
.path
.isdir(self
.basedir
)
278 service
.MultiService
.startService(self
)
280 def remote_getDirs(self
):
281 return filter(lambda d
: os
.path
.isdir(d
), os
.listdir(self
.basedir
))
283 def remote_getCommands(self
):
285 for name
, (factory
, version
) in registry
.commandRegistry
.items():
286 commands
[name
] = version
289 def remote_setBuilderList(self
, wanted
):
291 wanted_dirs
= ["info"]
292 for (name
, builddir
) in wanted
:
293 wanted_dirs
.append(builddir
)
294 b
= self
.builders
.get(name
, None)
296 if b
.builddir
!= builddir
:
297 log
.msg("changing builddir for builder %s from %s to %s" \
298 % (name
, b
.builddir
, builddir
))
299 b
.setBuilddir(builddir
)
301 b
= SlaveBuilder(name
, self
.not_really
)
302 b
.usePTY
= self
.usePTY
303 b
.setServiceParent(self
)
304 b
.setBuilddir(builddir
)
305 self
.builders
[name
] = b
307 for name
in self
.builders
.keys():
308 if not name
in map(lambda a
: a
[0], wanted
):
309 log
.msg("removing old builder %s" % name
)
310 self
.builders
[name
].disownServiceParent()
311 del(self
.builders
[name
])
313 for d
in os
.listdir(self
.basedir
):
315 if d
not in wanted_dirs
:
316 log
.msg("I have a leftover directory '%s' that is not "
317 "being used by the buildmaster: you can delete "
321 def remote_print(self
, message
):
322 log
.msg("message from master:", message
)
324 def remote_getSlaveInfo(self
):
325 """This command retrieves data from the files in SLAVEDIR/info/* and
326 sends the contents to the buildmaster. These are used to describe
327 the slave and its configuration, and should be created and
328 maintained by the slave administrator. They will be retrieved each
329 time the master-slave connection is established.
333 basedir
= os
.path
.join(self
.basedir
, "info")
334 if not os
.path
.isdir(basedir
):
336 for f
in os
.listdir(basedir
):
337 filename
= os
.path
.join(basedir
, f
)
338 if os
.path
.isfile(filename
):
339 files
[f
] = open(filename
, "r").read()
342 class BotFactory(ReconnectingPBClientFactory
):
343 # 'keepaliveInterval' serves two purposes. The first is to keep the
344 # connection alive: it guarantees that there will be at least some
345 # traffic once every 'keepaliveInterval' seconds, which may help keep an
346 # interposed NAT gateway from dropping the address mapping because it
347 # thinks the connection has been abandoned. The second is to put an upper
348 # limit on how long the buildmaster might have gone away before we notice
349 # it. For this second purpose, we insist upon seeing *some* evidence of
350 # the buildmaster at least once every 'keepaliveInterval' seconds.
351 keepaliveInterval
= None # None = do not use keepalives
353 # 'keepaliveTimeout' seconds before the interval expires, we will send a
354 # keepalive request, both to add some traffic to the connection, and to
355 # prompt a response from the master in case all our builders are idle. We
356 # don't insist upon receiving a timely response from this message: a slow
357 # link might put the request at the wrong end of a large build message.
358 keepaliveTimeout
= 30 # how long we will go without a response
360 keepaliveTimer
= None
366 def __init__(self
, keepaliveInterval
, keepaliveTimeout
):
367 ReconnectingPBClientFactory
.__init
__(self
)
368 self
.keepaliveInterval
= keepaliveInterval
369 self
.keepaliveTimeout
= keepaliveTimeout
371 def startedConnecting(self
, connector
):
372 ReconnectingPBClientFactory
.startedConnecting(self
, connector
)
373 self
.connector
= connector
375 def gotPerspective(self
, perspective
):
376 ReconnectingPBClientFactory
.gotPerspective(self
, perspective
)
377 self
.perspective
= perspective
379 perspective
.broker
.transport
.setTcpKeepAlive(1)
381 log
.msg("unable to set SO_KEEPALIVE")
382 if not self
.keepaliveInterval
:
383 self
.keepaliveInterval
= 10*60
385 if self
.keepaliveInterval
:
386 log
.msg("sending application-level keepalives every %d seconds" \
387 % self
.keepaliveInterval
)
390 def clientConnectionFailed(self
, connector
, reason
):
391 self
.connector
= None
392 ReconnectingPBClientFactory
.clientConnectionFailed(self
,
395 def clientConnectionLost(self
, connector
, reason
):
396 self
.connector
= None
398 self
.perspective
= None
399 ReconnectingPBClientFactory
.clientConnectionLost(self
,
402 def startTimers(self
):
403 assert self
.keepaliveInterval
404 assert not self
.keepaliveTimer
405 assert not self
.activityTimer
406 # Insist that doKeepalive fires before checkActivity. Really, it
407 # needs to happen at least one RTT beforehand.
408 assert self
.keepaliveInterval
> self
.keepaliveTimeout
410 # arrange to send a keepalive a little while before our deadline
411 when
= self
.keepaliveInterval
- self
.keepaliveTimeout
412 self
.keepaliveTimer
= reactor
.callLater(when
, self
.doKeepalive
)
413 # and check for activity too
414 self
.activityTimer
= reactor
.callLater(self
.keepaliveInterval
,
417 def stopTimers(self
):
418 if self
.keepaliveTimer
:
419 self
.keepaliveTimer
.cancel()
420 self
.keepaliveTimer
= None
421 if self
.activityTimer
:
422 self
.activityTimer
.cancel()
423 self
.activityTimer
= None
425 def activity(self
, res
=None):
426 self
.lastActivity
= now()
428 def doKeepalive(self
):
429 # send the keepalive request. If it fails outright, the connection
430 # was already dropped, so just log and ignore.
431 self
.keepaliveTimer
= None
432 log
.msg("sending app-level keepalive")
433 d
= self
.perspective
.callRemote("keepalive")
434 d
.addCallback(self
.activity
)
435 d
.addErrback(self
.keepaliveLost
)
437 def keepaliveLost(self
, f
):
438 log
.msg("BotFactory.keepaliveLost")
440 def checkActivity(self
):
441 self
.activityTimer
= None
442 if self
.lastActivity
+ self
.keepaliveInterval
< now():
443 log
.msg("BotFactory.checkActivity: nothing from master for "
444 "%d secs" % (now() - self
.lastActivity
))
445 self
.perspective
.broker
.transport
.loseConnection()
449 def stopFactory(self
):
450 ReconnectingPBClientFactory
.stopFactory(self
)
454 class BuildSlave(service
.MultiService
):
457 # debugOpts is a dictionary used during unit tests.
459 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any
460 # calls to remote_print will stall for 'timeout' seconds before
461 # returning. The DelayedCalls used to implement this are stashed in the
462 # list so they can be cancelled later.
464 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail
467 def __init__(self
, buildmaster_host
, port
, name
, passwd
, basedir
,
468 keepalive
, usePTY
, keepaliveTimeout
=30, umask
=None,
470 log
.msg("Creating BuildSlave -- buildbot.version: %s" % buildbot
.version
)
471 service
.MultiService
.__init
__(self
)
472 self
.debugOpts
= debugOpts
.copy()
473 bot
= self
.botClass(basedir
, usePTY
)
474 bot
.setServiceParent(self
)
479 bf
= self
.bf
= BotFactory(keepalive
, keepaliveTimeout
)
480 bf
.startLogin(credentials
.UsernamePassword(name
, passwd
), client
=bot
)
481 self
.connection
= c
= internet
.TCPClient(buildmaster_host
, port
, bf
)
482 c
.setServiceParent(self
)
484 def waitUntilDisconnected(self
):
485 # utility method for testing. Returns a Deferred that will fire when
486 # we lose the connection to the master.
487 if not self
.bf
.perspective
:
488 return defer
.succeed(None)
490 self
.bf
.perspective
.notifyOnDisconnect(lambda res
: d
.callback(None))
493 def startService(self
):
494 if self
.umask
is not None:
496 service
.MultiService
.startService(self
)
498 def stopService(self
):
499 self
.bf
.continueTrying
= 0
501 service
.MultiService
.stopService(self
)
502 # now kill the TCP connection
503 # twisted >2.0.1 does this for us, and leaves _connection=None
504 if self
.connection
._connection
:
505 self
.connection
._connection
.disconnect()