5 from twisted
.spread
import pb
6 from twisted
.python
import log
7 from twisted
.internet
import reactor
, defer
8 from twisted
.application
import service
, internet
9 from twisted
.cred
import credentials
11 from buildbot
.util
import now
12 from buildbot
.pbutil
import ReconnectingPBClientFactory
13 from buildbot
.slave
import registry
14 # make sure the standard commands get registered
15 from buildbot
.slave
import commands
17 class NoCommandRunning(pb
.Error
):
19 class WrongCommandRunning(pb
.Error
):
21 class UnknownCommand(pb
.Error
):
25 def __init__(self
, host
, port
, username
, password
):
28 self
.username
= username
29 self
.password
= password
33 """This is an object that can hold state from one step to another in the
34 same build. All SlaveCommands have access to it.
36 def __init__(self
, builder
):
37 self
.builder
= builder
39 class SlaveBuilder(pb
.Referenceable
, service
.Service
):
41 """This is the local representation of a single Builder: it handles a
42 single kind of build (like an all-warnings build). It has a name and a
43 home directory. The rest of its behavior is determined by the master.
46 stopCommandOnShutdown
= True
48 # remote is a ref to the Builder object on the master side, and is set
49 # when they attach. We use it to detect when the connection to the master
53 # .build points to a SlaveBuild object, a new one for each build
56 # .command points to a SlaveCommand instance, and is set while the step
57 # is running. We use it to implement the stopBuild method.
60 # .remoteStep is a ref to the master-side BuildStep object, and is set
61 # when the step is started
64 def __init__(self
, name
, not_really
):
65 #service.Service.__init__(self) # Service has no __init__ method
67 self
.not_really
= not_really
70 return "<SlaveBuilder '%s' at %d>" % (self
.name
, id(self
))
72 def setServiceParent(self
, parent
):
73 service
.Service
.setServiceParent(self
, parent
)
74 self
.bot
= self
.parent
75 # note that self.parent will go away when the buildmaster's config
76 # file changes and this Builder is removed (possibly because it has
77 # been changed, so the Builder will be re-added again in a moment).
78 # This may occur during a build, while a step is running.
80 def setBuilddir(self
, builddir
):
82 self
.builddir
= builddir
83 self
.basedir
= os
.path
.join(self
.bot
.basedir
, self
.builddir
)
84 if not os
.path
.isdir(self
.basedir
):
85 os
.mkdir(self
.basedir
)
87 def stopService(self
):
88 service
.Service
.stopService(self
)
89 if self
.stopCommandOnShutdown
:
95 buildslave
= bot
.parent
100 def remote_setMaster(self
, remote
):
102 self
.remote
.notifyOnDisconnect(self
.lostRemote
)
103 def remote_print(self
, message
):
104 log
.msg("SlaveBuilder.remote_print(%s): message from master: %s" %
105 (self
.name
, message
))
106 if message
== "ping":
107 return self
.remote_ping()
109 def remote_ping(self
):
110 log
.msg("SlaveBuilder.remote_ping(%s)" % self
)
111 if self
.bot
and self
.bot
.parent
:
112 debugOpts
= self
.bot
.parent
.debugOpts
113 if debugOpts
.get("stallPings"):
114 log
.msg(" debug_stallPings")
115 timeout
, timers
= debugOpts
["stallPings"]
117 t
= reactor
.callLater(timeout
, d
.callback
, None)
120 if debugOpts
.get("failPingOnce"):
121 log
.msg(" debug_failPingOnce")
122 class FailPingError(pb
.Error
): pass
123 del debugOpts
['failPingOnce']
124 raise FailPingError("debug_failPingOnce means we should fail")
126 def lostRemote(self
, remote
):
127 log
.msg("lost remote")
130 def lostRemoteStep(self
, remotestep
):
131 log
.msg("lost remote step")
132 self
.remoteStep
= None
133 if self
.stopCommandOnShutdown
:
136 # the following are Commands that can be invoked by the master-side
138 def remote_startBuild(self
):
139 """This is invoked before the first step of any new build is run. It
140 creates a new SlaveBuild object, which holds slave-side state from
141 one step to the next."""
142 self
.build
= SlaveBuild(self
)
143 log
.msg("%s.startBuild" % self
)
145 def remote_startCommand(self
, stepref
, stepId
, command
, args
):
147 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as
148 part of various master-side BuildSteps, to start various commands
149 that actually do the build. I return nothing. Eventually I will call
150 .commandComplete() to notify the master-side RemoteCommand that I'm
157 log
.msg("leftover command, dropping it")
161 factory
, version
= registry
.commandRegistry
[command
]
163 raise UnknownCommand
, "unrecognized SlaveCommand '%s'" % command
164 self
.command
= factory(self
, stepId
, args
)
166 log
.msg(" startCommand:%s [id %s]" % (command
,stepId
))
167 self
.remoteStep
= stepref
168 self
.remoteStep
.notifyOnDisconnect(self
.lostRemoteStep
)
169 d
= self
.command
.doStart()
170 d
.addCallback(lambda res
: None)
171 d
.addBoth(self
.commandComplete
)
174 def remote_interruptCommand(self
, stepId
, why
):
175 """Halt the current step."""
176 log
.msg("asked to interrupt current command: %s" % why
)
179 # TODO: just log it, a race could result in their interrupting a
180 # command that wasn't actually running
181 log
.msg(" .. but none was running")
183 self
.command
.doInterrupt()
186 def stopCommand(self
):
187 """Make any currently-running command die, with no further status
188 output. This is used when the buildslave is shutting down or the
189 connection to the master has been lost. Interrupt the command,
190 silence it, and then forget about it."""
193 log
.msg("stopCommand: halting current command %s" % self
.command
)
194 self
.command
.doInterrupt() # shut up! and die!
195 self
.command
= None # forget you!
197 # sendUpdate is invoked by the Commands we spawn
198 def sendUpdate(self
, data
):
199 """This sends the status update to the master-side
200 L{buildbot.process.step.RemoteCommand} object, giving it a sequence
201 number in the process. It adds the update to a queue, and asks the
202 master to acknowledge the update so it can be removed from that
206 # .running comes from service.Service, and says whether the
207 # service is running or not. If we aren't running, don't send any
210 # the update[1]=0 comes from the leftover 'updateNum', which the
211 # master still expects to receive. Provide it to avoid significant
212 # interoperability issues between new slaves and old masters.
216 d
= self
.remoteStep
.callRemote("update", updates
)
217 d
.addCallback(self
.ackUpdate
)
218 d
.addErrback(self
._ackFailed
, "SlaveBuilder.sendUpdate")
220 def ackUpdate(self
, acknum
):
221 self
.activity() # update the "last activity" timer
223 def ackComplete(self
, dummy
):
224 self
.activity() # update the "last activity" timer
226 def _ackFailed(self
, why
, where
):
227 log
.msg("SlaveBuilder._ackFailed:", where
)
228 #log.err(why) # we don't really care
231 # this is fired by the Deferred attached to each Command
232 def commandComplete(self
, failure
):
234 log
.msg("SlaveBuilder.commandFailed", self
.command
)
236 # failure, if present, is a failure.Failure. To send it across
237 # the wire, we must turn it into a pb.CopyableFailure.
238 failure
= pb
.CopyableFailure(failure
)
239 failure
.unsafeTracebacks
= True
242 log
.msg("SlaveBuilder.commandComplete", self
.command
)
245 log
.msg(" but we weren't running, quitting silently")
248 self
.remoteStep
.dontNotifyOnDisconnect(self
.lostRemoteStep
)
249 d
= self
.remoteStep
.callRemote("complete", failure
)
250 d
.addCallback(self
.ackComplete
)
251 d
.addErrback(self
._ackFailed
, "sendComplete")
252 self
.remoteStep
= None
255 def remote_shutdown(self
):
256 print "slave shutting down on command from master"
260 class Bot(pb
.Referenceable
, service
.MultiService
):
261 """I represent the slave-side bot."""
265 def __init__(self
, basedir
, usePTY
, not_really
=0):
266 service
.MultiService
.__init
__(self
)
267 self
.basedir
= basedir
269 self
.not_really
= not_really
272 def startService(self
):
273 assert os
.path
.isdir(self
.basedir
)
274 service
.MultiService
.startService(self
)
276 def remote_getDirs(self
):
277 return filter(lambda d
: os
.path
.isdir(d
), os
.listdir(self
.basedir
))
279 def remote_getCommands(self
):
281 for name
, (factory
, version
) in registry
.commandRegistry
.items():
282 commands
[name
] = version
285 def remote_setBuilderList(self
, wanted
):
288 for (name
, builddir
) in wanted
:
289 wanted_dirs
.append(builddir
)
290 b
= self
.builders
.get(name
, None)
292 if b
.builddir
!= builddir
:
293 log
.msg("changing builddir for builder %s from %s to %s" \
294 % (name
, b
.builddir
, builddir
))
295 b
.setBuilddir(builddir
)
297 b
= SlaveBuilder(name
, self
.not_really
)
298 b
.usePTY
= self
.usePTY
299 b
.setServiceParent(self
)
300 b
.setBuilddir(builddir
)
301 self
.builders
[name
] = b
303 for name
in self
.builders
.keys():
304 if not name
in map(lambda a
: a
[0], wanted
):
305 log
.msg("removing old builder %s" % name
)
306 self
.builders
[name
].disownServiceParent()
307 del(self
.builders
[name
])
309 for d
in os
.listdir(self
.basedir
):
311 if d
not in wanted_dirs
:
312 log
.msg("I have a leftover directory '%s' that is not "
313 "being used by the buildmaster: you can delete "
317 def remote_print(self
, message
):
318 log
.msg("message from master:", message
)
320 def remote_getSlaveInfo(self
):
321 """This command retrieves data from the files in SLAVEDIR/info/* and
322 sends the contents to the buildmaster. These are used to describe
323 the slave and its configuration, and should be created and
324 maintained by the slave administrator. They will be retrieved each
325 time the master-slave connection is established.
329 basedir
= os
.path
.join(self
.basedir
, "info")
330 if not os
.path
.isdir(basedir
):
332 for f
in os
.listdir(basedir
):
333 filename
= os
.path
.join(basedir
, f
)
334 if os
.path
.isfile(filename
):
335 files
[f
] = open(filename
, "r").read()
338 class BotFactory(ReconnectingPBClientFactory
):
339 # 'keepaliveInterval' serves two purposes. The first is to keep the
340 # connection alive: it guarantees that there will be at least some
341 # traffic once every 'keepaliveInterval' seconds, which may help keep an
342 # interposed NAT gateway from dropping the address mapping because it
343 # thinks the connection has been abandoned. The second is to put an upper
344 # limit on how long the buildmaster might have gone away before we notice
345 # it. For this second purpose, we insist upon seeing *some* evidence of
346 # the buildmaster at least once every 'keepaliveInterval' seconds.
347 keepaliveInterval
= None # None = do not use keepalives
349 # 'keepaliveTimeout' seconds before the interval expires, we will send a
350 # keepalive request, both to add some traffic to the connection, and to
351 # prompt a response from the master in case all our builders are idle. We
352 # don't insist upon receiving a timely response from this message: a slow
353 # link might put the request at the wrong end of a large build message.
354 keepaliveTimeout
= 30 # how long we will go without a response
356 keepaliveTimer
= None
362 def __init__(self
, keepaliveInterval
, keepaliveTimeout
):
363 ReconnectingPBClientFactory
.__init
__(self
)
364 self
.keepaliveInterval
= keepaliveInterval
365 self
.keepaliveTimeout
= keepaliveTimeout
367 def startedConnecting(self
, connector
):
368 ReconnectingPBClientFactory
.startedConnecting(self
, connector
)
369 self
.connector
= connector
371 def gotPerspective(self
, perspective
):
372 ReconnectingPBClientFactory
.gotPerspective(self
, perspective
)
373 self
.perspective
= perspective
375 perspective
.broker
.transport
.setTcpKeepAlive(1)
377 log
.msg("unable to set SO_KEEPALIVE")
378 if not self
.keepaliveInterval
:
379 self
.keepaliveInterval
= 10*60
381 if self
.keepaliveInterval
:
382 log
.msg("sending application-level keepalives every %d seconds" \
383 % self
.keepaliveInterval
)
386 def clientConnectionFailed(self
, connector
, reason
):
387 self
.connector
= None
388 ReconnectingPBClientFactory
.clientConnectionFailed(self
,
391 def clientConnectionLost(self
, connector
, reason
):
392 self
.connector
= None
394 self
.perspective
= None
395 ReconnectingPBClientFactory
.clientConnectionLost(self
,
398 def startTimers(self
):
399 assert self
.keepaliveInterval
400 assert not self
.keepaliveTimer
401 assert not self
.activityTimer
402 # Insist that doKeepalive fires before checkActivity. Really, it
403 # needs to happen at least one RTT beforehand.
404 assert self
.keepaliveInterval
> self
.keepaliveTimeout
406 # arrange to send a keepalive a little while before our deadline
407 when
= self
.keepaliveInterval
- self
.keepaliveTimeout
408 self
.keepaliveTimer
= reactor
.callLater(when
, self
.doKeepalive
)
409 # and check for activity too
410 self
.activityTimer
= reactor
.callLater(self
.keepaliveInterval
,
413 def stopTimers(self
):
414 if self
.keepaliveTimer
:
415 self
.keepaliveTimer
.cancel()
416 self
.keepaliveTimer
= None
417 if self
.activityTimer
:
418 self
.activityTimer
.cancel()
419 self
.activityTimer
= None
421 def activity(self
, res
=None):
422 self
.lastActivity
= now()
424 def doKeepalive(self
):
425 # send the keepalive request. If it fails outright, the connection
426 # was already dropped, so just log and ignore.
427 self
.keepaliveTimer
= None
428 log
.msg("sending app-level keepalive")
429 d
= self
.perspective
.callRemote("keepalive")
430 d
.addCallback(self
.activity
)
431 d
.addErrback(self
.keepaliveLost
)
433 def keepaliveLost(self
, f
):
434 log
.msg("BotFactory.keepaliveLost")
436 def checkActivity(self
):
437 self
.activityTimer
= None
438 if self
.lastActivity
+ self
.keepaliveInterval
< now():
439 log
.msg("BotFactory.checkActivity: nothing from master for "
440 "%d secs" % (now() - self
.lastActivity
))
441 self
.perspective
.broker
.transport
.loseConnection()
445 def stopFactory(self
):
446 ReconnectingPBClientFactory
.stopFactory(self
)
450 class BuildSlave(service
.MultiService
):
453 # debugOpts is a dictionary used during unit tests.
455 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any
456 # calls to remote_print will stall for 'timeout' seconds before
457 # returning. The DelayedCalls used to implement this are stashed in the
458 # list so they can be cancelled later.
460 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail
463 def __init__(self
, host
, port
, name
, passwd
, basedir
, keepalive
,
464 usePTY
, keepaliveTimeout
=30, umask
=None, debugOpts
={}):
465 service
.MultiService
.__init
__(self
)
466 self
.debugOpts
= debugOpts
.copy()
467 bot
= self
.botClass(basedir
, usePTY
)
468 bot
.setServiceParent(self
)
473 bf
= self
.bf
= BotFactory(keepalive
, keepaliveTimeout
)
474 bf
.startLogin(credentials
.UsernamePassword(name
, passwd
), client
=bot
)
475 self
.connection
= c
= internet
.TCPClient(host
, port
, bf
)
476 c
.setServiceParent(self
)
478 def waitUntilDisconnected(self
):
479 # utility method for testing. Returns a Deferred that will fire when
480 # we lose the connection to the master.
481 if not self
.bf
.perspective
:
482 return defer
.succeed(None)
484 self
.bf
.perspective
.notifyOnDisconnect(lambda res
: d
.callback(None))
487 def startService(self
):
488 if self
.umask
is not None:
490 service
.MultiService
.startService(self
)
492 def stopService(self
):
493 self
.bf
.continueTrying
= 0
495 service
.MultiService
.stopService(self
)
496 # now kill the TCP connection
497 # twisted >2.0.1 does this for us, and leaves _connection=None
498 if self
.connection
._connection
:
499 self
.connection
._connection
.disconnect()