3 from zope
.interface
import implements
4 from twisted
.python
import log
, components
5 from twisted
.spread
import pb
6 from twisted
.internet
import reactor
, defer
8 from buildbot
import interfaces
9 from buildbot
.status
.progress
import Expectations
10 from buildbot
.util
import now
11 from buildbot
.process
import base
13 (ATTACHING
, # slave attached, still checking hostinfo/etc
14 IDLE
, # idle, available for use
15 PINGING
, # build about to start, making sure it is still alive
16 BUILDING
, # build is running
19 class SlaveBuilder(pb
.Referenceable
):
20 """I am the master-side representative for one of the
21 L{buildbot.slave.bot.SlaveBuilder} objects that lives in a remote
22 buildbot. When a remote builder connects, I query it for command versions
23 and then make it available to any Builds that are ready to run. """
26 self
.ping_watchers
= []
27 self
.state
= ATTACHING
30 self
.builder_name
= None
35 r
+= " builder=%s" % self
.builder_name
37 r
+= " slave=%s" % self
.slave
.slavename
41 def setBuilder(self
, b
):
43 self
.builder_name
= b
.name
45 def getSlaveCommandVersion(self
, command
, oldversion
=None):
46 if self
.remoteCommands
is None:
47 # the slave is 0.5.0 or earlier
49 return self
.remoteCommands
.get(command
)
51 def isAvailable(self
):
52 # if this SlaveBuilder is busy, then it's definitely not available
56 # otherwise, check in with the BuildSlave
58 return self
.slave
.canStartBuild()
60 # no slave? not very available.
64 return self
.state
!= IDLE
66 def attached(self
, slave
, remote
, commands
):
68 @type slave: L{buildbot.buildslave.BuildSlave}
69 @param slave: the BuildSlave that represents the buildslave as a
71 @type remote: L{twisted.spread.pb.RemoteReference}
72 @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
73 @type commands: dict: string -> string, or None
74 @param commands: provides the slave's version of each RemoteCommand
78 self
.remoteCommands
= commands
# maps command name to version
79 self
.slave
.addSlaveBuilder(self
)
80 log
.msg("Buildslave %s attached to %s" % (slave
.slavename
,
82 d
= self
.remote
.callRemote("setMaster", self
)
83 d
.addErrback(self
._attachFailure
, "Builder.setMaster")
84 d
.addCallback(self
._attached
2)
87 def _attached2(self
, res
):
88 d
= self
.remote
.callRemote("print", "attached")
89 d
.addErrback(self
._attachFailure
, "Builder.print 'attached'")
90 d
.addCallback(self
._attached
3)
93 def _attached3(self
, res
):
94 # now we say they're really attached
98 def _attachFailure(self
, why
, where
):
99 assert isinstance(where
, str)
105 log
.msg("Buildslave %s detached from %s" % (self
.slave
.slavename
,
108 self
.slave
.removeSlaveBuilder(self
)
111 self
.remoteCommands
= None
113 def buildStarted(self
):
114 self
.state
= BUILDING
116 def buildFinished(self
):
118 reactor
.callLater(0, self
.builder
.botmaster
.maybeStartAllBuilds
)
120 def ping(self
, timeout
, status
=None):
121 """Ping the slave to make sure it is still there. Returns a Deferred
122 that fires with True if it is.
124 @param status: if you point this at a BuilderStatus, a 'pinging'
125 event will be pushed.
129 newping
= not self
.ping_watchers
131 self
.ping_watchers
.append(d
)
134 event
= status
.addEvent(["pinging"], "yellow")
135 d2
= defer
.Deferred()
136 d2
.addCallback(self
._pong
_status
, event
)
137 self
.ping_watchers
.insert(0, d2
)
138 # I think it will make the tests run smoother if the status
139 # is updated before the ping completes
140 Ping().ping(self
.remote
, timeout
).addCallback(self
._pong
)
144 def _pong(self
, res
):
145 watchers
, self
.ping_watchers
= self
.ping_watchers
, []
149 def _pong_status(self
, res
, event
):
151 event
.text
= ["ping", "success"]
152 event
.color
= "green"
154 event
.text
= ["ping", "failed"]
162 def ping(self
, remote
, timeout
):
163 assert not self
.running
165 log
.msg("sending ping")
166 self
.d
= defer
.Deferred()
167 # TODO: add a distinct 'ping' command on the slave.. using 'print'
168 # for this purpose is kind of silly.
169 remote
.callRemote("print", "ping").addCallbacks(self
._pong
,
171 errbackArgs
=(remote
,))
173 # We use either our own timeout or the (long) TCP timeout to detect
174 # silently-missing slaves. This might happen because of a NAT
175 # timeout or a routing loop. If the slave just shuts down (and we
176 # somehow missed the FIN), we should get a "connection refused"
178 self
.timer
= reactor
.callLater(timeout
, self
._ping
_timeout
, remote
)
181 def _ping_timeout(self
, remote
):
182 log
.msg("ping timeout")
183 # force the BuildSlave to disconnect, since this indicates that
184 # the bot is unreachable.
186 remote
.broker
.transport
.loseConnection()
187 # the forcibly-lost connection will now cause the ping to fail
189 def _stopTimer(self
):
198 def _pong(self
, res
):
199 log
.msg("ping finished: success")
201 self
.d
.callback(True)
203 def _ping_failed(self
, res
, remote
):
204 log
.msg("ping finished: failure")
206 # the slave has some sort of internal error, disconnect them. If we
207 # don't, we'll requeue a build and ping them again right away,
208 # creating a nasty loop.
209 remote
.broker
.transport
.loseConnection()
210 # TODO: except, if they actually did manage to get this far, they'll
211 # probably reconnect right away, and we'll do this game again. Maybe
212 # it would be better to leave them in the PINGING state.
213 self
.d
.callback(False)
216 class Builder(pb
.Referenceable
):
217 """I manage all Builds of a given type.
219 Each Builder is created by an entry in the config file (the c['builders']
220 list), with a number of parameters.
222 One of these parameters is the L{buildbot.process.factory.BuildFactory}
223 object that is associated with this Builder. The factory is responsible
224 for creating new L{Build<buildbot.process.base.Build>} objects. Each
225 Build object defines when and how the build is performed, so a new
226 Factory or Builder should be defined to control this behavior.
228 The Builder holds on to a number of L{base.BuildRequest} objects in a
229 list named C{.buildable}. Incoming BuildRequest objects will be added to
230 this list, or (if possible) merged into an existing request. When a slave
231 becomes available, I will use my C{BuildFactory} to turn the request into
232 a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
233 goes into C{.building} while it runs. Once the build finishes, I will
236 I maintain a list of available SlaveBuilders, one for each connected
237 slave that the C{slavenames} parameter says we can use. Some of these
238 will be idle, some of them will be busy running builds for me. If there
239 are multiple slaves, I can run multiple builds at once.
241 I also manage forced builds, progress expectation (ETA) management, and
242 some status delivery chores.
244 I am persisted in C{BASEDIR/BUILDERNAME/builder}, so I can remember how
245 long a build usually takes to run (in my C{expectations} attribute). This
246 pickle also includes the L{buildbot.status.builder.BuilderStatus} object,
247 which remembers the set of historic builds.
249 @type buildable: list of L{buildbot.process.base.BuildRequest}
250 @ivar buildable: BuildRequests that are ready to build, but which are
251 waiting for a buildslave to be available.
253 @type building: list of L{buildbot.process.base.Build}
254 @ivar building: Builds that are actively running
258 expectations
= None # this is created the first time we get a good build
259 START_BUILD_TIMEOUT
= 10
260 CHOOSE_SLAVES_RANDOMLY
= True # disabled for determinism during tests
262 def __init__(self
, setup
, builder_status
):
265 @param setup: builder setup data, as stored in
266 BuildmasterConfig['builders']. Contains name,
267 slavename(s), builddir, factory, locks.
268 @type builder_status: L{buildbot.status.builder.BuilderStatus}
270 self
.name
= setup
['name']
272 if setup
.has_key('slavename'):
273 self
.slavenames
.append(setup
['slavename'])
274 if setup
.has_key('slavenames'):
275 self
.slavenames
.extend(setup
['slavenames'])
276 self
.builddir
= setup
['builddir']
277 self
.buildFactory
= setup
['factory']
278 self
.locks
= setup
.get("locks", [])
279 if setup
.has_key('periodicBuildTime'):
280 raise ValueError("periodicBuildTime can no longer be defined as"
281 " part of the Builder: use scheduler.Periodic"
284 # build/wannabuild slots: Build objects move along this sequence
288 # buildslaves which have connected but which are not yet available.
289 # These are always in the ATTACHING state.
290 self
.attaching_slaves
= []
292 # buildslaves at our disposal. Each SlaveBuilder instance has a
293 # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
294 # Build is about to start, to make sure that they're still alive.
297 self
.builder_status
= builder_status
298 self
.builder_status
.setSlavenames(self
.slavenames
)
300 # for testing, to help synchronize tests
301 self
.watchers
= {'attach': [], 'detach': [], 'detach_all': [],
304 def setBotmaster(self
, botmaster
):
305 self
.botmaster
= botmaster
307 def compareToSetup(self
, setup
):
309 setup_slavenames
= []
310 if setup
.has_key('slavename'):
311 setup_slavenames
.append(setup
['slavename'])
312 setup_slavenames
.extend(setup
.get('slavenames', []))
313 if setup_slavenames
!= self
.slavenames
:
314 diffs
.append('slavenames changed from %s to %s' \
315 % (self
.slavenames
, setup_slavenames
))
316 if setup
['builddir'] != self
.builddir
:
317 diffs
.append('builddir changed from %s to %s' \
318 % (self
.builddir
, setup
['builddir']))
319 if setup
['factory'] != self
.buildFactory
: # compare objects
320 diffs
.append('factory changed')
321 oldlocks
= [(lock
.__class
__, lock
.name
)
322 for lock
in setup
.get('locks',[])]
323 newlocks
= [(lock
.__class
__, lock
.name
)
324 for lock
in self
.locks
]
325 if oldlocks
!= newlocks
:
326 diffs
.append('locks changed from %s to %s' % (oldlocks
, newlocks
))
330 return "<Builder '%s' at %d>" % (self
.name
, id(self
))
333 def submitBuildRequest(self
, req
):
334 req
.submittedAt
= now()
335 self
.buildable
.append(req
)
336 req
.requestSubmitted(self
)
337 self
.builder_status
.addBuildRequest(req
.status
)
338 self
.maybeStartBuild()
340 def cancelBuildRequest(self
, req
):
341 if req
in self
.buildable
:
342 self
.buildable
.remove(req
)
343 self
.builder_status
.removeBuildRequest(req
.status
)
347 def __getstate__(self
):
348 d
= self
.__dict
__.copy()
349 # TODO: note that d['buildable'] can contain Deferreds
350 del d
['building'] # TODO: move these back to .buildable?
354 def __setstate__(self
, d
):
359 def consumeTheSoulOfYourPredecessor(self
, old
):
360 """Suck the brain out of an old Builder.
362 This takes all the runtime state from an existing Builder and moves
363 it into ourselves. This is used when a Builder is changed in the
364 master.cfg file: the new Builder has a different factory, but we want
365 all the builds that were queued for the old one to get processed by
366 the new one. Any builds which are already running will keep running.
367 The new Builder will get as many of the old SlaveBuilder objects as
370 log
.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
372 # we claim all the pending builds, removing them from the old
373 # Builder's queue. This insures that the old Builder will not start
375 log
.msg(" stealing %s buildrequests" % len(old
.buildable
))
376 self
.buildable
.extend(old
.buildable
)
379 # old.building is not migrated: it keeps track of builds which were
380 # in progress in the old Builder. When those builds finish, the old
381 # Builder will be notified, not us. However, since the old
382 # SlaveBuilder will point to us, it is our maybeStartBuild() that
385 self
.builder_status
.setBigState("building")
387 # Our set of slavenames may be different. Steal any of the old
388 # buildslaves that we want to keep using.
389 for sb
in old
.slaves
[:]:
390 if sb
.slave
.slavename
in self
.slavenames
:
391 log
.msg(" stealing buildslave %s" % sb
)
392 self
.slaves
.append(sb
)
393 old
.slaves
.remove(sb
)
396 # old.attaching_slaves:
397 # these SlaveBuilders are waiting on a sequence of calls:
398 # remote.setMaster and remote.print . When these two complete,
399 # old._attached will be fired, which will add a 'connect' event to
400 # the builder_status and try to start a build. However, we've pulled
401 # everything out of the old builder's queue, so it will have no work
402 # to do. The outstanding remote.setMaster/print call will be holding
403 # the last reference to the old builder, so it will disappear just
404 # after that response comes back.
406 # The BotMaster will ask the slave to re-set their list of Builders
407 # shortly after this function returns, which will cause our
408 # attached() method to be fired with a bunch of references to remote
409 # SlaveBuilders, some of which we already have (by stealing them
410 # from the old Builder), some of which will be new. The new ones
411 # will be re-attached.
413 # Therefore, we don't need to do anything about old.attaching_slaves
417 def fireTestEvent(self
, name
, fire_with
=None):
418 if fire_with
is None:
420 watchers
= self
.watchers
[name
]
421 self
.watchers
[name
] = []
423 reactor
.callLater(0, w
.callback
, fire_with
)
425 def attached(self
, slave
, remote
, commands
):
426 """This is invoked by the BuildSlave when the self.slavename bot
427 registers their builder.
429 @type slave: L{buildbot.master.BuildSlave}
430 @param slave: the BuildSlave that represents the buildslave as a whole
431 @type remote: L{twisted.spread.pb.RemoteReference}
432 @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
433 @type commands: dict: string -> string, or None
434 @param commands: provides the slave's version of each RemoteCommand
436 @rtype: L{twisted.internet.defer.Deferred}
437 @return: a Deferred that fires (with 'self') when the slave-side
438 builder is fully attached and ready to accept commands.
440 for s
in self
.attaching_slaves
+ self
.slaves
:
442 # already attached to them. This is fairly common, since
443 # attached() gets called each time we receive the builder
444 # list from the slave, and we ask for it each time we add or
445 # remove a builder. So if the slave is hosting builders
446 # A,B,C, and the config file changes A, we'll remove A and
447 # re-add it, triggering two builder-list requests, getting
448 # two redundant calls to attached() for B, and another two
451 # Therefore, when we see that we're already attached, we can
452 # just ignore it. TODO: build a diagram of the state
453 # transitions here, I'm concerned about sb.attached() failing
454 # and leaving sb.state stuck at 'ATTACHING', and about
455 # the detached() message arriving while there's some
456 # transition pending such that the response to the transition
458 return defer
.succeed(self
)
462 self
.attaching_slaves
.append(sb
)
463 d
= sb
.attached(slave
, remote
, commands
)
464 d
.addCallback(self
._attached
)
465 d
.addErrback(self
._not
_attached
, slave
)
468 def _attached(self
, sb
):
469 # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
470 self
.builder_status
.addPointEvent(['connect', sb
.slave
.slavename
])
471 self
.attaching_slaves
.remove(sb
)
472 self
.slaves
.append(sb
)
473 reactor
.callLater(0, self
.maybeStartBuild
)
475 self
.fireTestEvent('attach')
478 def _not_attached(self
, why
, slave
):
479 # already log.err'ed by SlaveBuilder._attachFailure
480 # TODO: make this .addSlaveEvent?
481 # TODO: remove from self.slaves (except that detached() should get
483 self
.builder_status
.addPointEvent(['failed', 'connect',
484 slave
.slave
.slavename
])
485 # TODO: add an HTMLLogFile of the exception
486 self
.fireTestEvent('attach', why
)
488 def detached(self
, slave
):
489 """This is called when the connection to the bot is lost."""
490 log
.msg("%s.detached" % self
, slave
.slavename
)
491 for sb
in self
.attaching_slaves
+ self
.slaves
:
492 if sb
.slave
== slave
:
495 log
.msg("WEIRD: Builder.detached(%s) (%s)"
496 " not in attaching_slaves(%s)"
497 " or slaves(%s)" % (slave
, slave
.slavename
,
498 self
.attaching_slaves
,
501 if sb
.state
== BUILDING
:
502 # the Build's .lostRemote method (invoked by a notifyOnDisconnect
503 # handler) will cause the Build to be stopped, probably right
504 # after the notifyOnDisconnect that invoked us finishes running.
506 # TODO: should failover to a new Build
507 #self.retryBuild(sb.build)
510 if sb
in self
.attaching_slaves
:
511 self
.attaching_slaves
.remove(sb
)
512 if sb
in self
.slaves
:
513 self
.slaves
.remove(sb
)
515 # TODO: make this .addSlaveEvent?
516 self
.builder_status
.addPointEvent(['disconnect', slave
.slavename
])
517 sb
.detached() # inform the SlaveBuilder that their slave went away
518 self
.updateBigStatus()
519 self
.fireTestEvent('detach')
521 self
.fireTestEvent('detach_all')
523 def updateBigStatus(self
):
525 self
.builder_status
.setBigState("offline")
527 self
.builder_status
.setBigState("building")
529 self
.builder_status
.setBigState("idle")
530 self
.fireTestEvent('idle')
532 def maybeStartBuild(self
):
533 log
.msg("maybeStartBuild %s: %s %s" %
534 (self
, self
.buildable
, self
.slaves
))
535 if not self
.buildable
:
536 self
.updateBigStatus()
537 return # nothing to do
540 available_slaves
= [sb
for sb
in self
.slaves
if sb
.isAvailable()]
541 if not available_slaves
:
542 log
.msg("%s: want to start build, but we don't have a remote"
544 self
.updateBigStatus()
546 if self
.CHOOSE_SLAVES_RANDOMLY
:
547 sb
= random
.choice(available_slaves
)
549 sb
= available_slaves
[0]
551 # there is something to build, and there is a slave on which to build
552 # it. Grab the oldest request, see if we can merge it with anything
554 req
= self
.buildable
.pop(0)
555 self
.builder_status
.removeBuildRequest(req
.status
)
557 for br
in self
.buildable
[:]:
558 if req
.canBeMergedWith(br
):
559 self
.buildable
.remove(br
)
560 self
.builder_status
.removeBuildRequest(br
.status
)
562 requests
= [req
] + mergers
564 # Create a new build from our build factory and set ourself as the
566 build
= self
.buildFactory
.newBuild(requests
)
567 build
.setBuilder(self
)
568 build
.setLocks(self
.locks
)
571 self
.startBuild(build
, sb
)
573 def startBuild(self
, build
, sb
):
574 """Start a build on the given slave.
575 @param build: the L{base.Build} to start
576 @param sb: the L{SlaveBuilder} which will host this build
578 @return: a Deferred which fires with a
579 L{buildbot.interfaces.IBuildControl} that can be used to stop the
580 Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
581 watch the Build as it runs. """
583 self
.building
.append(build
)
584 self
.updateBigStatus()
586 log
.msg("starting build %s.. pinging the slave %s" % (build
, sb
))
587 # ping the slave to make sure they're still there. If they're fallen
588 # off the map (due to a NAT timeout or something), this will fail in
589 # a couple of minutes, depending upon the TCP timeout. TODO: consider
590 # making this time out faster, or at least characterize the likely
592 d
= sb
.ping(self
.START_BUILD_TIMEOUT
)
593 d
.addCallback(self
._startBuild
_1, build
, sb
)
596 def _startBuild_1(self
, res
, build
, sb
):
598 return self
._startBuildFailed
("slave ping failed", build
, sb
)
599 # The buildslave is ready to go. sb.buildStarted() sets its state to
600 # BUILDING (so we won't try to use it for any other builds). This
601 # gets set back to IDLE by the Build itself when it finishes.
603 d
= sb
.remote
.callRemote("startBuild")
604 d
.addCallbacks(self
._startBuild
_2, self
._startBuildFailed
,
605 callbackArgs
=(build
,sb
), errbackArgs
=(build
,sb
))
608 def _startBuild_2(self
, res
, build
, sb
):
609 # create the BuildStatus object that goes with the Build
610 bs
= self
.builder_status
.newBuild()
612 # start the build. This will first set up the steps, then tell the
613 # BuildStatus that it has started, which will announce it to the
614 # world (through our BuilderStatus object, which is its parent).
615 # Finally it will start the actual build process.
616 d
= build
.startBuild(bs
, self
.expectations
, sb
)
617 d
.addCallback(self
.buildFinished
, sb
)
618 d
.addErrback(log
.err
) # this shouldn't happen. if it does, the slave
620 for req
in build
.requests
:
621 req
.buildStarted(build
, bs
)
622 return build
# this is the IBuildControl
624 def _startBuildFailed(self
, why
, build
, sb
):
625 # put the build back on the buildable list
626 log
.msg("I tried to tell the slave that the build %s started, but "
627 "remote_startBuild failed: %s" % (build
, why
))
628 # release the slave. This will queue a call to maybeStartBuild, which
629 # will fire after other notifyOnDisconnect handlers have marked the
630 # slave as disconnected (so we don't try to use it again).
633 log
.msg("re-queueing the BuildRequest")
634 self
.building
.remove(build
)
635 for req
in build
.requests
:
636 self
.buildable
.insert(0, req
) # the interrupted build gets first
638 self
.builder_status
.addBuildRequest(req
.status
)
641 def buildFinished(self
, build
, sb
):
642 """This is called when the Build has finished (either success or
643 failure). Any exceptions during the build are reported with
644 results=FAILURE, not with an errback."""
646 # by the time we get here, the Build has already released the slave
647 # (which queues a call to maybeStartBuild)
649 self
.building
.remove(build
)
650 for req
in build
.requests
:
651 req
.finished(build
.build_status
)
653 def setExpectations(self
, progress
):
654 """Mark the build as successful and update expectations for the next
655 build. Only call this when the build did not fail in any way that
656 would invalidate the time expectations generated by it. (if the
657 compile failed and thus terminated early, we can't use the last
658 build to predict how long the next one will take).
660 if self
.expectations
:
661 self
.expectations
.update(progress
)
663 # the first time we get a good build, create our Expectations
664 # based upon its results
665 self
.expectations
= Expectations(progress
)
666 log
.msg("new expectations: %s seconds" % \
667 self
.expectations
.expectedBuildTime())
669 def shutdownSlave(self
):
671 self
.remote
.callRemote("shutdown")
674 class BuilderControl(components
.Adapter
):
675 implements(interfaces
.IBuilderControl
)
677 def requestBuild(self
, req
):
678 """Submit a BuildRequest to this Builder."""
679 self
.original
.submitBuildRequest(req
)
681 def requestBuildSoon(self
, req
):
682 """Submit a BuildRequest like requestBuild, but raise a
683 L{buildbot.interfaces.NoSlaveError} if no slaves are currently
684 available, so it cannot be used to queue a BuildRequest in the hopes
685 that a slave will eventually connect. This method is appropriate for
686 use by things like the web-page 'Force Build' button."""
687 if not self
.original
.slaves
:
688 raise interfaces
.NoSlaveError
689 self
.requestBuild(req
)
691 def resubmitBuild(self
, bs
, reason
="<rebuild, no reason given>"):
692 if not bs
.isFinished():
695 ss
= bs
.getSourceStamp()
696 req
= base
.BuildRequest(reason
, ss
, self
.original
.name
)
697 self
.requestBuild(req
)
699 def getPendingBuilds(self
):
700 # return IBuildRequestControl objects
701 raise NotImplementedError
703 def getBuild(self
, number
):
704 for b
in self
.original
.building
:
705 if b
.build_status
.number
== number
:
709 def ping(self
, timeout
=30):
710 if not self
.original
.slaves
:
711 self
.original
.builder_status
.addPointEvent(["ping", "no slave"],
713 return defer
.succeed(False) # interfaces.NoSlaveError
715 for s
in self
.original
.slaves
:
716 dl
.append(s
.ping(timeout
, self
.original
.builder_status
))
717 d
= defer
.DeferredList(dl
)
718 d
.addCallback(self
._gatherPingResults
)
721 def _gatherPingResults(self
, res
):
722 for ignored
,success
in res
:
727 components
.registerAdapter(BuilderControl
, Builder
, interfaces
.IBuilderControl
)