Merge branch 'web-improvements' of git://github.com/catlee/buildbot
[buildbot.git] / buildbot / process / builder.py
blobe06bd5f22c140d23457a7713baebde63848c9828
2 import random, weakref
3 from zope.interface import implements
4 from twisted.python import log, components
5 from twisted.python.failure import Failure
6 from twisted.spread import pb
7 from twisted.internet import reactor, defer
9 from buildbot import interfaces
10 from buildbot.status.progress import Expectations
11 from buildbot.util import now
12 from buildbot.process import base
14 (ATTACHING, # slave attached, still checking hostinfo/etc
15 IDLE, # idle, available for use
16 PINGING, # build about to start, making sure it is still alive
17 BUILDING, # build is running
18 LATENT, # latent slave is not substantiated; similar to idle
19 SUBSTANTIATING,
20 ) = range(6)
23 class AbstractSlaveBuilder(pb.Referenceable):
24 """I am the master-side representative for one of the
25 L{buildbot.slave.bot.SlaveBuilder} objects that lives in a remote
26 buildbot. When a remote builder connects, I query it for command versions
27 and then make it available to any Builds that are ready to run. """
29 def __init__(self):
30 self.ping_watchers = []
31 self.state = None # set in subclass
32 self.remote = None
33 self.slave = None
34 self.builder_name = None
36 def __repr__(self):
37 r = ["<", self.__class__.__name__]
38 if self.builder_name:
39 r.extend([" builder=", self.builder_name])
40 if self.slave:
41 r.extend([" slave=", self.slave.slavename])
42 r.append(">")
43 return ''.join(r)
45 def setBuilder(self, b):
46 self.builder = b
47 self.builder_name = b.name
49 def getSlaveCommandVersion(self, command, oldversion=None):
50 if self.remoteCommands is None:
51 # the slave is 0.5.0 or earlier
52 return oldversion
53 return self.remoteCommands.get(command)
55 def isAvailable(self):
56 # if this SlaveBuilder is busy, then it's definitely not available
57 if self.isBusy():
58 return False
60 # otherwise, check in with the BuildSlave
61 if self.slave:
62 return self.slave.canStartBuild()
64 # no slave? not very available.
65 return False
67 def isBusy(self):
68 return self.state not in (IDLE, LATENT)
70 def buildStarted(self):
71 self.state = BUILDING
73 def buildFinished(self):
74 self.state = IDLE
75 reactor.callLater(0, self.builder.botmaster.maybeStartAllBuilds)
77 def attached(self, slave, remote, commands):
78 """
79 @type slave: L{buildbot.buildslave.BuildSlave}
80 @param slave: the BuildSlave that represents the buildslave as a
81 whole
82 @type remote: L{twisted.spread.pb.RemoteReference}
83 @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
84 @type commands: dict: string -> string, or None
85 @param commands: provides the slave's version of each RemoteCommand
86 """
87 self.state = ATTACHING
88 self.remote = remote
89 self.remoteCommands = commands # maps command name to version
90 if self.slave is None:
91 self.slave = slave
92 self.slave.addSlaveBuilder(self)
93 else:
94 assert self.slave == slave
95 log.msg("Buildslave %s attached to %s" % (slave.slavename,
96 self.builder_name))
97 d = self.remote.callRemote("setMaster", self)
98 d.addErrback(self._attachFailure, "Builder.setMaster")
99 d.addCallback(self._attached2)
100 return d
102 def _attached2(self, res):
103 d = self.remote.callRemote("print", "attached")
104 d.addErrback(self._attachFailure, "Builder.print 'attached'")
105 d.addCallback(self._attached3)
106 return d
108 def _attached3(self, res):
109 # now we say they're really attached
110 self.state = IDLE
111 return self
113 def _attachFailure(self, why, where):
114 assert isinstance(where, str)
115 log.msg(where)
116 log.err(why)
117 return why
119 def ping(self, timeout, status=None):
120 """Ping the slave to make sure it is still there. Returns a Deferred
121 that fires with True if it is.
123 @param status: if you point this at a BuilderStatus, a 'pinging'
124 event will be pushed.
126 oldstate = self.state
127 self.state = PINGING
128 newping = not self.ping_watchers
129 d = defer.Deferred()
130 self.ping_watchers.append(d)
131 if newping:
132 if status:
133 event = status.addEvent(["pinging"])
134 d2 = defer.Deferred()
135 d2.addCallback(self._pong_status, event)
136 self.ping_watchers.insert(0, d2)
137 # I think it will make the tests run smoother if the status
138 # is updated before the ping completes
139 Ping().ping(self.remote, timeout).addCallback(self._pong)
141 def reset_state(res):
142 if self.state == PINGING:
143 self.state = oldstate
144 return res
145 d.addCallback(reset_state)
146 return d
148 def _pong(self, res):
149 watchers, self.ping_watchers = self.ping_watchers, []
150 for d in watchers:
151 d.callback(res)
153 def _pong_status(self, res, event):
154 if res:
155 event.text = ["ping", "success"]
156 else:
157 event.text = ["ping", "failed"]
158 event.finish()
160 def detached(self):
161 log.msg("Buildslave %s detached from %s" % (self.slave.slavename,
162 self.builder_name))
163 if self.slave:
164 self.slave.removeSlaveBuilder(self)
165 self.slave = None
166 self.remote = None
167 self.remoteCommands = None
170 class Ping:
171 running = False
172 timer = None
174 def ping(self, remote, timeout):
175 assert not self.running
176 self.running = True
177 log.msg("sending ping")
178 self.d = defer.Deferred()
179 # TODO: add a distinct 'ping' command on the slave.. using 'print'
180 # for this purpose is kind of silly.
181 remote.callRemote("print", "ping").addCallbacks(self._pong,
182 self._ping_failed,
183 errbackArgs=(remote,))
185 # We use either our own timeout or the (long) TCP timeout to detect
186 # silently-missing slaves. This might happen because of a NAT
187 # timeout or a routing loop. If the slave just shuts down (and we
188 # somehow missed the FIN), we should get a "connection refused"
189 # message.
190 self.timer = reactor.callLater(timeout, self._ping_timeout, remote)
191 return self.d
193 def _ping_timeout(self, remote):
194 log.msg("ping timeout")
195 # force the BuildSlave to disconnect, since this indicates that
196 # the bot is unreachable.
197 del self.timer
198 remote.broker.transport.loseConnection()
199 # the forcibly-lost connection will now cause the ping to fail
201 def _stopTimer(self):
202 if not self.running:
203 return
204 self.running = False
206 if self.timer:
207 self.timer.cancel()
208 del self.timer
210 def _pong(self, res):
211 log.msg("ping finished: success")
212 self._stopTimer()
213 self.d.callback(True)
215 def _ping_failed(self, res, remote):
216 log.msg("ping finished: failure")
217 self._stopTimer()
218 # the slave has some sort of internal error, disconnect them. If we
219 # don't, we'll requeue a build and ping them again right away,
220 # creating a nasty loop.
221 remote.broker.transport.loseConnection()
222 # TODO: except, if they actually did manage to get this far, they'll
223 # probably reconnect right away, and we'll do this game again. Maybe
224 # it would be better to leave them in the PINGING state.
225 self.d.callback(False)
228 class SlaveBuilder(AbstractSlaveBuilder):
230 def __init__(self):
231 AbstractSlaveBuilder.__init__(self)
232 self.state = ATTACHING
234 def detached(self):
235 AbstractSlaveBuilder.detached(self)
236 if self.slave:
237 self.slave.removeSlaveBuilder(self)
238 self.slave = None
239 self.state = ATTACHING
241 def buildFinished(self):
242 # Call the slave's buildFinished if we can; the slave may be waiting
243 # to do a graceful shutdown and needs to know when it's idle.
244 # After, we check to see if we can start other builds.
245 self.state = IDLE
246 if self.slave:
247 d = self.slave.buildFinished(self)
248 d.addCallback(lambda x: reactor.callLater(0, self.builder.botmaster.maybeStartAllBuilds))
249 else:
250 reactor.callLater(0, self.builder.botmaster.maybeStartAllBuilds)
253 class LatentSlaveBuilder(AbstractSlaveBuilder):
254 def __init__(self, slave, builder):
255 AbstractSlaveBuilder.__init__(self)
256 self.slave = slave
257 self.state = LATENT
258 self.setBuilder(builder)
259 self.slave.addSlaveBuilder(self)
260 log.msg("Latent buildslave %s attached to %s" % (slave.slavename,
261 self.builder_name))
263 def substantiate(self, build):
264 self.state = SUBSTANTIATING
265 d = self.slave.substantiate(self)
266 if not self.slave.substantiated:
267 event = self.builder.builder_status.addEvent(
268 ["substantiating"])
269 def substantiated(res):
270 msg = ["substantiate", "success"]
271 if isinstance(res, basestring):
272 msg.append(res)
273 elif isinstance(res, (tuple, list)):
274 msg.extend(res)
275 event.text = msg
276 event.finish()
277 return res
278 def substantiation_failed(res):
279 event.text = ["substantiate", "failed"]
280 # TODO add log of traceback to event
281 event.finish()
282 return res
283 d.addCallbacks(substantiated, substantiation_failed)
284 return d
286 def detached(self):
287 AbstractSlaveBuilder.detached(self)
288 self.state = LATENT
290 def buildStarted(self):
291 AbstractSlaveBuilder.buildStarted(self)
292 self.slave.buildStarted(self)
294 def buildFinished(self):
295 AbstractSlaveBuilder.buildFinished(self)
296 self.slave.buildFinished(self)
298 def _attachFailure(self, why, where):
299 self.state = LATENT
300 return AbstractSlaveBuilder._attachFailure(self, why, where)
302 def ping(self, timeout, status=None):
303 if not self.slave.substantiated:
304 if status:
305 status.addEvent(["ping", "latent"]).finish()
306 return defer.succeed(True)
307 return AbstractSlaveBuilder.ping(self, timeout, status)
310 class Builder(pb.Referenceable):
311 """I manage all Builds of a given type.
313 Each Builder is created by an entry in the config file (the c['builders']
314 list), with a number of parameters.
316 One of these parameters is the L{buildbot.process.factory.BuildFactory}
317 object that is associated with this Builder. The factory is responsible
318 for creating new L{Build<buildbot.process.base.Build>} objects. Each
319 Build object defines when and how the build is performed, so a new
320 Factory or Builder should be defined to control this behavior.
322 The Builder holds on to a number of L{base.BuildRequest} objects in a
323 list named C{.buildable}. Incoming BuildRequest objects will be added to
324 this list, or (if possible) merged into an existing request. When a slave
325 becomes available, I will use my C{BuildFactory} to turn the request into
326 a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
327 goes into C{.building} while it runs. Once the build finishes, I will
328 discard it.
330 I maintain a list of available SlaveBuilders, one for each connected
331 slave that the C{slavenames} parameter says we can use. Some of these
332 will be idle, some of them will be busy running builds for me. If there
333 are multiple slaves, I can run multiple builds at once.
335 I also manage forced builds, progress expectation (ETA) management, and
336 some status delivery chores.
338 I am persisted in C{BASEDIR/BUILDERNAME/builder}, so I can remember how
339 long a build usually takes to run (in my C{expectations} attribute). This
340 pickle also includes the L{buildbot.status.builder.BuilderStatus} object,
341 which remembers the set of historic builds.
343 @type buildable: list of L{buildbot.process.base.BuildRequest}
344 @ivar buildable: BuildRequests that are ready to build, but which are
345 waiting for a buildslave to be available.
347 @type building: list of L{buildbot.process.base.Build}
348 @ivar building: Builds that are actively running
350 @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
351 @ivar slaves: the slaves currently available for building
354 expectations = None # this is created the first time we get a good build
355 START_BUILD_TIMEOUT = 10
356 CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests
358 def __init__(self, setup, builder_status):
360 @type setup: dict
361 @param setup: builder setup data, as stored in
362 BuildmasterConfig['builders']. Contains name,
363 slavename(s), builddir, factory, locks.
364 @type builder_status: L{buildbot.status.builder.BuilderStatus}
366 self.name = setup['name']
367 self.slavenames = []
368 if setup.has_key('slavename'):
369 self.slavenames.append(setup['slavename'])
370 if setup.has_key('slavenames'):
371 self.slavenames.extend(setup['slavenames'])
372 self.builddir = setup['builddir']
373 self.buildFactory = setup['factory']
374 self.nextSlave = setup.get('nextSlave')
375 if self.nextSlave is not None and not callable(self.nextSlave):
376 raise ValueError("nextSlave must be callable")
377 self.locks = setup.get("locks", [])
378 self.env = setup.get('env', {})
379 assert isinstance(self.env, dict)
380 if setup.has_key('periodicBuildTime'):
381 raise ValueError("periodicBuildTime can no longer be defined as"
382 " part of the Builder: use scheduler.Periodic"
383 " instead")
384 self.nextBuild = setup.get('nextBuild')
385 if self.nextBuild is not None and not callable(self.nextBuild):
386 raise ValueError("nextBuild must be callable")
388 # build/wannabuild slots: Build objects move along this sequence
389 self.buildable = []
390 self.building = []
391 # old_building holds active builds that were stolen from a predecessor
392 self.old_building = weakref.WeakKeyDictionary()
394 # buildslaves which have connected but which are not yet available.
395 # These are always in the ATTACHING state.
396 self.attaching_slaves = []
398 # buildslaves at our disposal. Each SlaveBuilder instance has a
399 # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
400 # Build is about to start, to make sure that they're still alive.
401 self.slaves = []
403 self.builder_status = builder_status
404 self.builder_status.setSlavenames(self.slavenames)
406 # for testing, to help synchronize tests
407 self.watchers = {'attach': [], 'detach': [], 'detach_all': [],
408 'idle': []}
410 def setBotmaster(self, botmaster):
411 self.botmaster = botmaster
413 def compareToSetup(self, setup):
414 diffs = []
415 setup_slavenames = []
416 if setup.has_key('slavename'):
417 setup_slavenames.append(setup['slavename'])
418 setup_slavenames.extend(setup.get('slavenames', []))
419 if setup_slavenames != self.slavenames:
420 diffs.append('slavenames changed from %s to %s' \
421 % (self.slavenames, setup_slavenames))
422 if setup['builddir'] != self.builddir:
423 diffs.append('builddir changed from %s to %s' \
424 % (self.builddir, setup['builddir']))
425 if setup['factory'] != self.buildFactory: # compare objects
426 diffs.append('factory changed')
427 oldlocks = [(lock.__class__, lock.name)
428 for lock in self.locks]
429 newlocks = [(lock.__class__, lock.name)
430 for lock in setup.get('locks',[])]
431 if oldlocks != newlocks:
432 diffs.append('locks changed from %s to %s' % (oldlocks, newlocks))
433 if setup.get('nextSlave') != self.nextSlave:
434 diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup['nextSlave']))
435 if setup.get('nextBuild') != self.nextBuild:
436 diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup['nextBuild']))
437 return diffs
439 def __repr__(self):
440 return "<Builder '%s' at %d>" % (self.name, id(self))
442 def getOldestRequestTime(self):
443 """Returns the timestamp of the oldest build request for this builder.
445 If there are no build requests, None is returned."""
446 if self.buildable:
447 return self.buildable[0].getSubmitTime()
448 else:
449 return None
451 def submitBuildRequest(self, req):
452 req.setSubmitTime(now())
453 self.buildable.append(req)
454 req.requestSubmitted(self)
455 self.builder_status.addBuildRequest(req.status)
456 self.botmaster.maybeStartAllBuilds()
458 def cancelBuildRequest(self, req):
459 if req in self.buildable:
460 self.buildable.remove(req)
461 self.builder_status.removeBuildRequest(req.status, cancelled=True)
462 return True
463 return False
465 def __getstate__(self):
466 d = self.__dict__.copy()
467 # TODO: note that d['buildable'] can contain Deferreds
468 del d['building'] # TODO: move these back to .buildable?
469 del d['slaves']
470 return d
472 def __setstate__(self, d):
473 self.__dict__ = d
474 self.building = []
475 self.slaves = []
477 def consumeTheSoulOfYourPredecessor(self, old):
478 """Suck the brain out of an old Builder.
480 This takes all the runtime state from an existing Builder and moves
481 it into ourselves. This is used when a Builder is changed in the
482 master.cfg file: the new Builder has a different factory, but we want
483 all the builds that were queued for the old one to get processed by
484 the new one. Any builds which are already running will keep running.
485 The new Builder will get as many of the old SlaveBuilder objects as
486 it wants."""
488 log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
489 (self, old))
490 # we claim all the pending builds, removing them from the old
491 # Builder's queue. This insures that the old Builder will not start
492 # any new work.
493 log.msg(" stealing %s buildrequests" % len(old.buildable))
494 self.buildable.extend(old.buildable)
495 old.buildable = []
497 # old.building (i.e. builds which are still running) is not migrated
498 # directly: it keeps track of builds which were in progress in the
499 # old Builder. When those builds finish, the old Builder will be
500 # notified, not us. However, since the old SlaveBuilder will point to
501 # us, it is our maybeStartBuild() that will be triggered.
502 if old.building:
503 self.builder_status.setBigState("building")
504 # however, we do grab a weakref to the active builds, so that our
505 # BuilderControl can see them and stop them. We use a weakref because
506 # we aren't the one to get notified, so there isn't a convenient
507 # place to remove it from self.building .
508 for b in old.building:
509 self.old_building[b] = None
510 for b in old.old_building:
511 self.old_building[b] = None
513 # Our set of slavenames may be different. Steal any of the old
514 # buildslaves that we want to keep using.
515 for sb in old.slaves[:]:
516 if sb.slave.slavename in self.slavenames:
517 log.msg(" stealing buildslave %s" % sb)
518 self.slaves.append(sb)
519 old.slaves.remove(sb)
520 sb.setBuilder(self)
522 # old.attaching_slaves:
523 # these SlaveBuilders are waiting on a sequence of calls:
524 # remote.setMaster and remote.print . When these two complete,
525 # old._attached will be fired, which will add a 'connect' event to
526 # the builder_status and try to start a build. However, we've pulled
527 # everything out of the old builder's queue, so it will have no work
528 # to do. The outstanding remote.setMaster/print call will be holding
529 # the last reference to the old builder, so it will disappear just
530 # after that response comes back.
532 # The BotMaster will ask the slave to re-set their list of Builders
533 # shortly after this function returns, which will cause our
534 # attached() method to be fired with a bunch of references to remote
535 # SlaveBuilders, some of which we already have (by stealing them
536 # from the old Builder), some of which will be new. The new ones
537 # will be re-attached.
539 # Therefore, we don't need to do anything about old.attaching_slaves
541 return # all done
543 def getBuild(self, number):
544 for b in self.building:
545 if b.build_status.number == number:
546 return b
547 for b in self.old_building.keys():
548 if b.build_status.number == number:
549 return b
550 return None
552 def fireTestEvent(self, name, fire_with=None):
553 if fire_with is None:
554 fire_with = self
555 watchers = self.watchers[name]
556 self.watchers[name] = []
557 for w in watchers:
558 reactor.callLater(0, w.callback, fire_with)
560 def addLatentSlave(self, slave):
561 assert interfaces.ILatentBuildSlave.providedBy(slave)
562 for s in self.slaves:
563 if s == slave:
564 break
565 else:
566 sb = LatentSlaveBuilder(slave, self)
567 self.builder_status.addPointEvent(
568 ['added', 'latent', slave.slavename])
569 self.slaves.append(sb)
570 reactor.callLater(0, self.botmaster.maybeStartAllBuilds)
572 def attached(self, slave, remote, commands):
573 """This is invoked by the BuildSlave when the self.slavename bot
574 registers their builder.
576 @type slave: L{buildbot.buildslave.BuildSlave}
577 @param slave: the BuildSlave that represents the buildslave as a whole
578 @type remote: L{twisted.spread.pb.RemoteReference}
579 @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
580 @type commands: dict: string -> string, or None
581 @param commands: provides the slave's version of each RemoteCommand
583 @rtype: L{twisted.internet.defer.Deferred}
584 @return: a Deferred that fires (with 'self') when the slave-side
585 builder is fully attached and ready to accept commands.
587 for s in self.attaching_slaves + self.slaves:
588 if s.slave == slave:
589 # already attached to them. This is fairly common, since
590 # attached() gets called each time we receive the builder
591 # list from the slave, and we ask for it each time we add or
592 # remove a builder. So if the slave is hosting builders
593 # A,B,C, and the config file changes A, we'll remove A and
594 # re-add it, triggering two builder-list requests, getting
595 # two redundant calls to attached() for B, and another two
596 # for C.
598 # Therefore, when we see that we're already attached, we can
599 # just ignore it. TODO: build a diagram of the state
600 # transitions here, I'm concerned about sb.attached() failing
601 # and leaving sb.state stuck at 'ATTACHING', and about
602 # the detached() message arriving while there's some
603 # transition pending such that the response to the transition
604 # re-vivifies sb
605 return defer.succeed(self)
607 sb = SlaveBuilder()
608 sb.setBuilder(self)
609 self.attaching_slaves.append(sb)
610 d = sb.attached(slave, remote, commands)
611 d.addCallback(self._attached)
612 d.addErrback(self._not_attached, slave)
613 return d
615 def _attached(self, sb):
616 # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
617 self.builder_status.addPointEvent(['connect', sb.slave.slavename])
618 self.attaching_slaves.remove(sb)
619 self.slaves.append(sb)
621 self.fireTestEvent('attach')
622 return self
624 def _not_attached(self, why, slave):
625 # already log.err'ed by SlaveBuilder._attachFailure
626 # TODO: make this .addSlaveEvent?
627 # TODO: remove from self.slaves (except that detached() should get
628 # run first, right?)
629 self.builder_status.addPointEvent(['failed', 'connect',
630 slave.slave.slavename])
631 # TODO: add an HTMLLogFile of the exception
632 self.fireTestEvent('attach', why)
634 def detached(self, slave):
635 """This is called when the connection to the bot is lost."""
636 log.msg("%s.detached" % self, slave.slavename)
637 for sb in self.attaching_slaves + self.slaves:
638 if sb.slave == slave:
639 break
640 else:
641 log.msg("WEIRD: Builder.detached(%s) (%s)"
642 " not in attaching_slaves(%s)"
643 " or slaves(%s)" % (slave, slave.slavename,
644 self.attaching_slaves,
645 self.slaves))
646 return
647 if sb.state == BUILDING:
648 # the Build's .lostRemote method (invoked by a notifyOnDisconnect
649 # handler) will cause the Build to be stopped, probably right
650 # after the notifyOnDisconnect that invoked us finishes running.
652 # TODO: should failover to a new Build
653 #self.retryBuild(sb.build)
654 pass
656 if sb in self.attaching_slaves:
657 self.attaching_slaves.remove(sb)
658 if sb in self.slaves:
659 self.slaves.remove(sb)
661 # TODO: make this .addSlaveEvent?
662 self.builder_status.addPointEvent(['disconnect', slave.slavename])
663 sb.detached() # inform the SlaveBuilder that their slave went away
664 self.updateBigStatus()
665 self.fireTestEvent('detach')
666 if not self.slaves:
667 self.fireTestEvent('detach_all')
669 def updateBigStatus(self):
670 if not self.slaves:
671 self.builder_status.setBigState("offline")
672 elif self.building:
673 self.builder_status.setBigState("building")
674 else:
675 self.builder_status.setBigState("idle")
676 self.fireTestEvent('idle')
678 def maybeStartBuild(self):
679 log.msg("maybeStartBuild %s: %s %s" %
680 (self, self.buildable, self.slaves))
681 if not self.buildable:
682 self.updateBigStatus()
683 return # nothing to do
685 # pick an idle slave
686 available_slaves = [sb for sb in self.slaves if sb.isAvailable()]
687 if not available_slaves:
688 log.msg("%s: want to start build, but we don't have a remote"
689 % self)
690 self.updateBigStatus()
691 return
692 if self.nextSlave:
693 sb = None
694 try:
695 sb = self.nextSlave(self, available_slaves)
696 except:
697 log.msg("Exception choosing next slave")
698 log.err(Failure())
700 if not sb:
701 log.msg("%s: want to start build, but we don't have a remote"
702 % self)
703 self.updateBigStatus()
704 return
705 elif self.CHOOSE_SLAVES_RANDOMLY:
706 sb = random.choice(available_slaves)
707 else:
708 sb = available_slaves[0]
710 # there is something to build, and there is a slave on which to build
711 # it. Grab the oldest request, see if we can merge it with anything
712 # else.
713 if not self.nextBuild:
714 req = self.buildable.pop(0)
715 else:
716 try:
717 req = self.nextBuild(self, self.buildable)
718 if not req:
719 # Nothing to do
720 self.updateBigStatus()
721 return
722 self.buildable.remove(req)
723 except:
724 log.msg("Exception choosing next build")
725 log.err(Failure())
726 self.updateBigStatus()
727 return
728 self.builder_status.removeBuildRequest(req.status)
729 mergers = []
730 botmaster = self.botmaster
731 for br in self.buildable[:]:
732 if botmaster.shouldMergeRequests(self, req, br):
733 self.buildable.remove(br)
734 self.builder_status.removeBuildRequest(br.status)
735 mergers.append(br)
736 requests = [req] + mergers
738 # Create a new build from our build factory and set ourself as the
739 # builder.
740 build = self.buildFactory.newBuild(requests)
741 build.setBuilder(self)
742 build.setLocks(self.locks)
743 if len(self.env) > 0:
744 build.setSlaveEnvironment(self.env)
746 # start it
747 self.startBuild(build, sb)
749 def startBuild(self, build, sb):
750 """Start a build on the given slave.
751 @param build: the L{base.Build} to start
752 @param sb: the L{SlaveBuilder} which will host this build
754 @return: a Deferred which fires with a
755 L{buildbot.interfaces.IBuildControl} that can be used to stop the
756 Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
757 watch the Build as it runs. """
759 self.building.append(build)
760 self.updateBigStatus()
761 if isinstance(sb, LatentSlaveBuilder):
762 log.msg("starting build %s.. substantiating the slave %s" %
763 (build, sb))
764 d = sb.substantiate(build)
765 def substantiated(res):
766 return sb.ping(self.START_BUILD_TIMEOUT)
767 def substantiation_failed(res):
768 self.builder_status.addPointEvent(
769 ['removing', 'latent', sb.slave.slavename])
770 sb.slave.disconnect()
771 # TODO: should failover to a new Build
772 #self.retryBuild(sb.build)
773 d.addCallbacks(substantiated, substantiation_failed)
774 else:
775 log.msg("starting build %s.. pinging the slave %s" % (build, sb))
776 d = sb.ping(self.START_BUILD_TIMEOUT)
777 # ping the slave to make sure they're still there. If they're fallen
778 # off the map (due to a NAT timeout or something), this will fail in
779 # a couple of minutes, depending upon the TCP timeout. TODO: consider
780 # making this time out faster, or at least characterize the likely
781 # duration.
782 d.addCallback(self._startBuild_1, build, sb)
783 return d
785 def _startBuild_1(self, res, build, sb):
786 if not res:
787 return self._startBuildFailed("slave ping failed", build, sb)
788 # The buildslave is ready to go. sb.buildStarted() sets its state to
789 # BUILDING (so we won't try to use it for any other builds). This
790 # gets set back to IDLE by the Build itself when it finishes.
791 sb.buildStarted()
792 d = sb.remote.callRemote("startBuild")
793 d.addCallbacks(self._startBuild_2, self._startBuildFailed,
794 callbackArgs=(build,sb), errbackArgs=(build,sb))
795 return d
797 def _startBuild_2(self, res, build, sb):
798 # create the BuildStatus object that goes with the Build
799 bs = self.builder_status.newBuild()
801 # start the build. This will first set up the steps, then tell the
802 # BuildStatus that it has started, which will announce it to the
803 # world (through our BuilderStatus object, which is its parent).
804 # Finally it will start the actual build process.
805 d = build.startBuild(bs, self.expectations, sb)
806 d.addCallback(self.buildFinished, sb)
807 d.addErrback(log.err) # this shouldn't happen. if it does, the slave
808 # will be wedged
809 for req in build.requests:
810 req.buildStarted(build, bs)
811 return build # this is the IBuildControl
813 def _startBuildFailed(self, why, build, sb):
814 # put the build back on the buildable list
815 log.msg("I tried to tell the slave that the build %s started, but "
816 "remote_startBuild failed: %s" % (build, why))
817 # release the slave. This will queue a call to maybeStartBuild, which
818 # will fire after other notifyOnDisconnect handlers have marked the
819 # slave as disconnected (so we don't try to use it again).
820 sb.buildFinished()
822 log.msg("re-queueing the BuildRequest")
823 self.building.remove(build)
824 for req in build.requests:
825 self.buildable.insert(0, req) # the interrupted build gets first
826 # priority
827 self.builder_status.addBuildRequest(req.status)
830 def buildFinished(self, build, sb):
831 """This is called when the Build has finished (either success or
832 failure). Any exceptions during the build are reported with
833 results=FAILURE, not with an errback."""
835 # by the time we get here, the Build has already released the slave
836 # (which queues a call to maybeStartBuild)
838 self.building.remove(build)
839 for req in build.requests:
840 req.finished(build.build_status)
842 def setExpectations(self, progress):
843 """Mark the build as successful and update expectations for the next
844 build. Only call this when the build did not fail in any way that
845 would invalidate the time expectations generated by it. (if the
846 compile failed and thus terminated early, we can't use the last
847 build to predict how long the next one will take).
849 if self.expectations:
850 self.expectations.update(progress)
851 else:
852 # the first time we get a good build, create our Expectations
853 # based upon its results
854 self.expectations = Expectations(progress)
855 log.msg("new expectations: %s seconds" % \
856 self.expectations.expectedBuildTime())
858 def shutdownSlave(self):
859 if self.remote:
860 self.remote.callRemote("shutdown")
863 class BuilderControl(components.Adapter):
864 implements(interfaces.IBuilderControl)
866 def requestBuild(self, req):
867 """Submit a BuildRequest to this Builder."""
868 self.original.submitBuildRequest(req)
870 def requestBuildSoon(self, req):
871 """Submit a BuildRequest like requestBuild, but raise a
872 L{buildbot.interfaces.NoSlaveError} if no slaves are currently
873 available, so it cannot be used to queue a BuildRequest in the hopes
874 that a slave will eventually connect. This method is appropriate for
875 use by things like the web-page 'Force Build' button."""
876 if not self.original.slaves:
877 raise interfaces.NoSlaveError
878 self.requestBuild(req)
880 def resubmitBuild(self, bs, reason="<rebuild, no reason given>"):
881 if not bs.isFinished():
882 return
884 ss = bs.getSourceStamp(absolute=True)
885 req = base.BuildRequest(reason, ss, self.original.name)
886 self.requestBuild(req)
888 def getPendingBuilds(self):
889 # return IBuildRequestControl objects
890 retval = []
891 for r in self.original.buildable:
892 retval.append(BuildRequestControl(self.original, r))
894 return retval
896 def getBuild(self, number):
897 return self.original.getBuild(number)
899 def ping(self, timeout=30):
900 if not self.original.slaves:
901 self.original.builder_status.addPointEvent(["ping", "no slave"])
902 return defer.succeed(False) # interfaces.NoSlaveError
903 dl = []
904 for s in self.original.slaves:
905 dl.append(s.ping(timeout, self.original.builder_status))
906 d = defer.DeferredList(dl)
907 d.addCallback(self._gatherPingResults)
908 return d
910 def _gatherPingResults(self, res):
911 for ignored,success in res:
912 if not success:
913 return False
914 return True
916 components.registerAdapter(BuilderControl, Builder, interfaces.IBuilderControl)
918 class BuildRequestControl:
919 implements(interfaces.IBuildRequestControl)
921 def __init__(self, builder, request):
922 self.original_builder = builder
923 self.original_request = request
925 def subscribe(self, observer):
926 raise NotImplementedError
928 def unsubscribe(self, observer):
929 raise NotImplementedError
931 def cancel(self):
932 self.original_builder.cancelBuildRequest(self.original_request)