3 import time
, os
, os
.path
, re
, sys
5 from twisted
.spread
import pb
6 from twisted
.python
import log
, usage
, failure
7 from twisted
.internet
import reactor
, defer
8 from twisted
.application
import service
, internet
9 from twisted
.cred
import credentials
11 from buildbot
.util
import now
12 from buildbot
.pbutil
import ReconnectingPBClientFactory
13 from buildbot
.slave
import registry
14 # make sure the standard commands get registered
15 from buildbot
.slave
import commands
17 class NoCommandRunning(pb
.Error
):
19 class WrongCommandRunning(pb
.Error
):
21 class UnknownCommand(pb
.Error
):
25 def __init__(self
, host
, port
, username
, password
):
28 self
.username
= username
29 self
.password
= password
33 """This is an object that can hold state from one step to another in the
34 same build. All SlaveCommands have access to it.
36 def __init__(self
, builder
):
37 self
.builder
= builder
39 class SlaveBuilder(pb
.Referenceable
, service
.Service
):
41 """This is the local representation of a single Builder: it handles a
42 single kind of build (like an all-warnings build). It has a name and a
43 home directory. The rest of its behavior is determined by the master.
46 stopCommandOnShutdown
= True
48 # remote is a ref to the Builder object on the master side, and is set
49 # when they attach. We use it to detect when the connection to the master
53 # .build points to a SlaveBuild object, a new one for each build
56 # .command points to a SlaveCommand instance, and is set while the step
57 # is running. We use it to implement the stopBuild method.
60 # .remoteStep is a ref to the master-side BuildStep object, and is set
61 # when the step is started
64 def __init__(self
, name
, not_really
):
65 #service.Service.__init__(self) # Service has no __init__ method
67 self
.not_really
= not_really
70 return "<SlaveBuilder '%s'>" % self
.name
72 def setServiceParent(self
, parent
):
73 service
.Service
.setServiceParent(self
, parent
)
74 self
.bot
= self
.parent
75 # note that self.parent will go away when the buildmaster's config
76 # file changes and this Builder is removed (possibly because it has
77 # been changed, so the Builder will be re-added again in a moment).
78 # This may occur during a build, while a step is running.
80 def setBuilddir(self
, builddir
):
82 self
.builddir
= builddir
83 self
.basedir
= os
.path
.join(self
.bot
.basedir
, self
.builddir
)
84 if not os
.path
.isdir(self
.basedir
):
85 os
.mkdir(self
.basedir
)
87 def stopService(self
):
88 service
.Service
.stopService(self
)
89 if self
.stopCommandOnShutdown
:
95 buildslave
= bot
.parent
100 def remote_setMaster(self
, remote
):
102 self
.remote
.notifyOnDisconnect(self
.lostRemote
)
103 def remote_print(self
, message
):
104 log
.msg("SlaveBuilder.remote_print(%s): message from master: %s" %
105 (self
.name
, message
))
106 if message
== "ping":
107 return self
.remote_ping()
109 def remote_ping(self
):
110 log
.msg("SlaveBuilder.remote_ping(%s)" % self
)
111 if self
.bot
and self
.bot
.parent
:
112 debugOpts
= self
.bot
.parent
.debugOpts
113 if debugOpts
.get("stallPings"):
114 log
.msg(" debug_stallPings")
115 timeout
, timers
= debugOpts
["stallPings"]
117 t
= reactor
.callLater(timeout
, d
.callback
, None)
120 if debugOpts
.get("failPingOnce"):
121 log
.msg(" debug_failPingOnce")
122 class FailPingError(pb
.Error
): pass
123 del debugOpts
['failPingOnce']
124 raise FailPingError("debug_failPingOnce means we should fail")
126 def lostRemote(self
, remote
):
127 log
.msg("lost remote")
130 def lostRemoteStep(self
, remotestep
):
131 log
.msg("lost remote step")
132 self
.remoteStep
= None
133 if self
.stopCommandOnShutdown
:
136 # the following are Commands that can be invoked by the master-side
138 def remote_startBuild(self
):
139 """This is invoked before the first step of any new build is run. It
140 creates a new SlaveBuild object, which holds slave-side state from
141 one step to the next."""
142 self
.build
= SlaveBuild(self
)
143 log
.msg("%s.startBuild" % self
)
145 def remote_startCommand(self
, stepref
, stepId
, command
, args
):
147 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as
148 part of various master-side BuildSteps, to start various commands
149 that actually do the build. I return nothing. Eventually I will call
150 .commandComplete() to notify the master-side RemoteCommand that I'm
157 log
.msg("leftover command, dropping it")
161 factory
, version
= registry
.commandRegistry
[command
]
163 raise UnknownCommand
, "unrecognized SlaveCommand '%s'" % command
164 self
.command
= factory(self
, stepId
, args
)
166 log
.msg(" startCommand:%s [id %s]" % (command
,stepId
))
167 self
.remoteStep
= stepref
168 self
.remoteStep
.notifyOnDisconnect(self
.lostRemoteStep
)
169 d
= self
.command
.doStart()
170 d
.addCallback(lambda res
: None)
171 d
.addBoth(self
.commandComplete
)
174 def remote_interruptCommand(self
, stepId
, why
):
175 """Halt the current step."""
176 log
.msg("asked to interrupt current command: %s" % why
)
179 # TODO: just log it, a race could result in their interrupting a
180 # command that wasn't actually running
181 log
.msg(" .. but none was running")
183 self
.command
.doInterrupt()
186 def stopCommand(self
):
187 """Make any currently-running command die, with no further status
188 output. This is used when the buildslave is shutting down or the
189 connection to the master has been lost. Interrupt the command,
190 silence it, and then forget about it."""
193 log
.msg("stopCommand: halting current command %s" % self
.command
)
194 self
.command
.doInterrupt() # shut up! and die!
195 self
.command
= None # forget you!
197 # sendUpdate is invoked by the Commands we spawn
198 def sendUpdate(self
, data
):
199 """This sends the status update to the master-side
200 L{buildbot.process.step.RemoteCommand} object, giving it a sequence
201 number in the process. It adds the update to a queue, and asks the
202 master to acknowledge the update so it can be removed from that
206 # .running comes from service.Service, and says whether the
207 # service is running or not. If we aren't running, don't send any
210 # the update[1]=0 comes from the leftover 'updateNum', which the
211 # master still expects to receive. Provide it to avoid significant
212 # interoperability issues between new slaves and old masters.
216 d
= self
.remoteStep
.callRemote("update", updates
)
217 d
.addCallback(self
.ackUpdate
)
218 d
.addErrback(self
._ackFailed
, "SlaveBuilder.sendUpdate")
220 def ackUpdate(self
, acknum
):
221 self
.activity() # update the "last activity" timer
223 def ackComplete(self
, dummy
):
224 self
.activity() # update the "last activity" timer
226 def _ackFailed(self
, why
, where
):
227 log
.msg("SlaveBuilder._ackFailed:", where
)
228 #log.err(why) # we don't really care
231 # this is fired by the Deferred attached to each Command
232 def commandComplete(self
, failure
):
234 log
.msg("SlaveBuilder.commandFailed", self
.command
)
236 # failure, if present, is a failure.Failure. To send it across
237 # the wire, we must turn it into a pb.CopyableFailure.
238 failure
= pb
.CopyableFailure(failure
)
239 failure
.unsafeTracebacks
= True
242 log
.msg("SlaveBuilder.commandComplete", self
.command
)
245 log
.msg(" but we weren't running, quitting silently")
248 self
.remoteStep
.dontNotifyOnDisconnect(self
.lostRemoteStep
)
249 d
= self
.remoteStep
.callRemote("complete", failure
)
250 d
.addCallback(self
.ackComplete
)
251 d
.addErrback(self
._ackFailed
, "sendComplete")
252 self
.remoteStep
= None
255 def remote_shutdown(self
):
256 print "slave shutting down on command from master"
260 class Bot(pb
.Referenceable
, service
.MultiService
):
261 """I represent the slave-side bot."""
265 def __init__(self
, basedir
, usePTY
, not_really
=0):
266 service
.MultiService
.__init
__(self
)
267 self
.basedir
= basedir
269 self
.not_really
= not_really
272 def startService(self
):
273 assert os
.path
.isdir(self
.basedir
)
274 service
.MultiService
.startService(self
)
276 def remote_getDirs(self
):
277 return filter(lambda d
: os
.path
.isdir(d
), os
.listdir(self
.basedir
))
279 def remote_getCommands(self
):
281 for name
, (factory
, version
) in registry
.commandRegistry
.items():
282 commands
[name
] = version
285 def remote_setBuilderList(self
, wanted
):
287 for (name
, builddir
) in wanted
:
288 b
= self
.builders
.get(name
, None)
290 if b
.builddir
!= builddir
:
291 log
.msg("changing builddir for builder %s from %s to %s" \
292 % (name
, b
.builddir
, builddir
))
293 b
.setBuilddir(builddir
)
295 b
= SlaveBuilder(name
, self
.not_really
)
296 b
.usePTY
= self
.usePTY
297 b
.setServiceParent(self
)
298 b
.setBuilddir(builddir
)
299 self
.builders
[name
] = b
301 for name
in self
.builders
.keys():
302 if not name
in map(lambda a
: a
[0], wanted
):
303 log
.msg("removing old builder %s" % name
)
304 self
.builders
[name
].disownServiceParent()
305 del(self
.builders
[name
])
308 def remote_print(self
, message
):
309 log
.msg("message from master:", message
)
311 def remote_getSlaveInfo(self
):
312 """This command retrieves data from the files in SLAVEDIR/info/* and
313 sends the contents to the buildmaster. These are used to describe
314 the slave and its configuration, and should be created and
315 maintained by the slave administrator. They will be retrieved each
316 time the master-slave connection is established.
320 basedir
= os
.path
.join(self
.basedir
, "info")
321 if not os
.path
.isdir(basedir
):
323 for f
in os
.listdir(basedir
):
324 filename
= os
.path
.join(basedir
, f
)
325 if os
.path
.isfile(filename
):
326 files
[f
] = open(filename
, "r").read()
329 def debug_forceBuild(self
, name
):
330 d
= self
.perspective
.callRemote("forceBuild", name
)
331 d
.addCallbacks(log
.msg
, log
.err
)
333 class BotFactory(ReconnectingPBClientFactory
):
334 # 'keepaliveInterval' serves two purposes. The first is to keep the
335 # connection alive: it guarantees that there will be at least some
336 # traffic once every 'keepaliveInterval' seconds, which may help keep an
337 # interposed NAT gateway from dropping the address mapping because it
338 # thinks the connection has been abandoned. The second is to put an upper
339 # limit on how long the buildmaster might have gone away before we notice
340 # it. For this second purpose, we insist upon seeing *some* evidence of
341 # the buildmaster at least once every 'keepaliveInterval' seconds.
342 keepaliveInterval
= None # None = do not use keepalives
344 # 'keepaliveTimeout' seconds before the interval expires, we will send a
345 # keepalive request, both to add some traffic to the connection, and to
346 # prompt a response from the master in case all our builders are idle. We
347 # don't insist upon receiving a timely response from this message: a slow
348 # link might put the request at the wrong end of a large build message.
349 keepaliveTimeout
= 30 # how long we will go without a response
351 keepaliveTimer
= None
357 def __init__(self
, keepaliveInterval
, keepaliveTimeout
):
358 ReconnectingPBClientFactory
.__init
__(self
)
359 self
.keepaliveInterval
= keepaliveInterval
360 self
.keepaliveTimeout
= keepaliveTimeout
362 def startedConnecting(self
, connector
):
363 ReconnectingPBClientFactory
.startedConnecting(self
, connector
)
364 self
.connector
= connector
366 def gotPerspective(self
, perspective
):
367 ReconnectingPBClientFactory
.gotPerspective(self
, perspective
)
368 self
.perspective
= perspective
370 perspective
.broker
.transport
.setTcpKeepAlive(1)
372 log
.msg("unable to set SO_KEEPALIVE")
373 if not self
.keepaliveInterval
:
374 self
.keepaliveInterval
= 10*60
376 if self
.keepaliveInterval
:
377 log
.msg("sending application-level keepalives every %d seconds" \
378 % self
.keepaliveInterval
)
381 def clientConnectionFailed(self
, connector
, reason
):
382 self
.connector
= None
383 ReconnectingPBClientFactory
.clientConnectionFailed(self
,
386 def clientConnectionLost(self
, connector
, reason
):
387 self
.connector
= None
389 self
.perspective
= None
390 ReconnectingPBClientFactory
.clientConnectionLost(self
,
393 def startTimers(self
):
394 assert self
.keepaliveInterval
395 assert not self
.keepaliveTimer
396 assert not self
.activityTimer
397 # Insist that doKeepalive fires before checkActivity. Really, it
398 # needs to happen at least one RTT beforehand.
399 assert self
.keepaliveInterval
> self
.keepaliveTimeout
401 # arrange to send a keepalive a little while before our deadline
402 when
= self
.keepaliveInterval
- self
.keepaliveTimeout
403 self
.keepaliveTimer
= reactor
.callLater(when
, self
.doKeepalive
)
404 # and check for activity too
405 self
.activityTimer
= reactor
.callLater(self
.keepaliveInterval
,
408 def stopTimers(self
):
409 if self
.keepaliveTimer
:
410 self
.keepaliveTimer
.cancel()
411 self
.keepaliveTimer
= None
412 if self
.activityTimer
:
413 self
.activityTimer
.cancel()
414 self
.activityTimer
= None
416 def activity(self
, res
=None):
417 self
.lastActivity
= now()
419 def doKeepalive(self
):
420 # send the keepalive request. If it fails outright, the connection
421 # was already dropped, so just log and ignore.
422 self
.keepaliveTimer
= None
423 log
.msg("sending app-level keepalive")
424 d
= self
.perspective
.callRemote("keepalive")
425 d
.addCallback(self
.activity
)
426 d
.addErrback(self
.keepaliveLost
)
428 def keepaliveLost(self
, f
):
429 log
.msg("BotFactory.keepaliveLost")
431 def checkActivity(self
):
432 self
.activityTimer
= None
433 if self
.lastActivity
+ self
.keepaliveInterval
< now():
434 log
.msg("BotFactory.checkActivity: nothing from master for "
435 "%d secs" % (now() - self
.lastActivity
))
436 self
.perspective
.broker
.transport
.loseConnection()
440 def stopFactory(self
):
441 ReconnectingPBClientFactory
.stopFactory(self
)
445 class BuildSlave(service
.MultiService
):
448 # debugOpts is a dictionary used during unit tests.
450 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any
451 # calls to remote_print will stall for 'timeout' seconds before
452 # returning. The DelayedCalls used to implement this are stashed in the
453 # list so they can be cancelled later.
455 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail
458 def __init__(self
, host
, port
, name
, passwd
, basedir
, keepalive
,
459 usePTY
, keepaliveTimeout
=30, umask
=None, debugOpts
={}):
460 service
.MultiService
.__init
__(self
)
461 self
.debugOpts
= debugOpts
.copy()
462 bot
= self
.botClass(basedir
, usePTY
)
463 bot
.setServiceParent(self
)
468 bf
= self
.bf
= BotFactory(keepalive
, keepaliveTimeout
)
469 bf
.startLogin(credentials
.UsernamePassword(name
, passwd
), client
=bot
)
470 self
.connection
= c
= internet
.TCPClient(host
, port
, bf
)
471 c
.setServiceParent(self
)
473 def waitUntilDisconnected(self
):
474 # utility method for testing. Returns a Deferred that will fire when
475 # we lose the connection to the master.
476 if not self
.bf
.perspective
:
477 return defer
.succeed(None)
479 self
.bf
.perspective
.notifyOnDisconnect(lambda res
: d
.callback(None))
482 def startService(self
):
483 if self
.umask
is not None:
485 service
.MultiService
.startService(self
)
487 def stopService(self
):
488 self
.bf
.continueTrying
= 0
490 service
.MultiService
.stopService(self
)
491 # now kill the TCP connection
492 # twisted >2.0.1 does this for us, and leaves _connection=None
493 if self
.connection
._connection
:
494 self
.connection
._connection
.disconnect()