ISlaveStatus: provide an interface to read BuildSlave.lastMessageReceived
[buildbot.git] / buildbot / buildslave.py
blob72f8a4e5daf59b84227250240d0dabda540ce5c7
2 import time
3 from twisted.python import log
4 from twisted.internet import defer, reactor
6 from buildbot.pbutil import NewCredPerspective
7 from buildbot.status.builder import SlaveStatus
9 class BuildSlave(NewCredPerspective):
10 """This is the master-side representative for a remote buildbot slave.
11 There is exactly one for each slave described in the config file (the
12 c['slaves'] list). When buildbots connect in (.attach), they get a
13 reference to this instance. The BotMaster object is stashed as the
14 .service attribute.
16 I represent a build slave -- a remote machine capable of
17 running builds. I am instantiated by the configuration file, and can be
18 subclassed to add extra functionality."""
20 def __init__(self, name, password, max_builds=None):
21 """
22 @param name: botname this machine will supply when it connects
23 @param password: password this machine will supply when
24 it connects
25 @param max_builds: maximum number of simultaneous builds that will
26 be run concurrently on this buildslave (the
27 default is None for no limit)
28 """
30 self.slavename = name
31 self.password = password
32 self.botmaster = None # no buildmaster yet
33 self.slave_status = SlaveStatus(name)
34 self.slave = None # a RemoteReference to the Bot, when connected
35 self.slave_commands = None
36 self.slavebuilders = []
37 self.max_builds = max_builds
38 self.lastMessageReceived = 0
40 def update(self, new):
41 """
42 Given a new BuildSlave, configure this one identically. Because
43 BuildSlave objects are remotely referenced, we can't replace them
44 without disconnecting the slave, yet there's no reason to do that.
45 """
46 # the reconfiguration logic should guarantee this:
47 assert self.slavename == new.slavename
48 assert self.password == new.password
49 assert self.__class__ == new.__class__
50 self.max_builds = new.max_builds
52 def __repr__(self):
53 builders = self.botmaster.getBuildersForSlave(self.slavename)
54 return "<BuildSlave '%s', current builders: %s>" % \
55 (self.slavename, ','.join(map(lambda b: b.name, builders)))
57 def setBotmaster(self, botmaster):
58 assert not self.botmaster, "BuildSlave already has a botmaster"
59 self.botmaster = botmaster
61 def updateSlave(self):
62 """Called to add or remove builders after the slave has connected.
64 @return: a Deferred that indicates when an attached slave has
65 accepted the new builders and/or released the old ones."""
66 if self.slave:
67 return self.sendBuilderList()
68 return defer.succeed(None)
70 def attached(self, bot):
71 """This is called when the slave connects.
73 @return: a Deferred that fires with a suitable pb.IPerspective to
74 give to the slave (i.e. 'self')"""
76 if self.slave:
77 # uh-oh, we've got a duplicate slave. The most likely
78 # explanation is that the slave is behind a slow link, thinks we
79 # went away, and has attempted to reconnect, so we've got two
80 # "connections" from the same slave, but the previous one is
81 # stale. Give the new one precedence.
82 log.msg("duplicate slave %s replacing old one" % self.slavename)
84 # just in case we've got two identically-configured slaves,
85 # report the IP addresses of both so someone can resolve the
86 # squabble
87 tport = self.slave.broker.transport
88 log.msg("old slave was connected from", tport.getPeer())
89 log.msg("new slave is from", bot.broker.transport.getPeer())
90 d = self.disconnect()
91 else:
92 d = defer.succeed(None)
93 # now we go through a sequence of calls, gathering information, then
94 # tell the Botmaster that it can finally give this slave to all the
95 # Builders that care about it.
97 # we accumulate slave information in this 'state' dictionary, then
98 # set it atomically if we make it far enough through the process
99 state = {}
101 def _log_attachment_on_slave(res):
102 d1 = bot.callRemote("print", "attached")
103 d1.addErrback(lambda why: None)
104 return d1
105 d.addCallback(_log_attachment_on_slave)
107 def _get_info(res):
108 d1 = bot.callRemote("getSlaveInfo")
109 def _got_info(info):
110 log.msg("Got slaveinfo from '%s'" % self.slavename)
111 # TODO: info{} might have other keys
112 state["admin"] = info.get("admin")
113 state["host"] = info.get("host")
114 def _info_unavailable(why):
115 # maybe an old slave, doesn't implement remote_getSlaveInfo
116 log.msg("BuildSlave.info_unavailable")
117 log.err(why)
118 d1.addCallbacks(_got_info, _info_unavailable)
119 return d1
120 d.addCallback(_get_info)
122 def _get_commands(res):
123 d1 = bot.callRemote("getCommands")
124 def _got_commands(commands):
125 state["slave_commands"] = commands
126 def _commands_unavailable(why):
127 # probably an old slave
128 log.msg("BuildSlave._commands_unavailable")
129 if why.check(AttributeError):
130 return
131 log.err(why)
132 d1.addCallbacks(_got_commands, _commands_unavailable)
133 return d1
134 d.addCallback(_get_commands)
136 def _accept_slave(res):
137 self.slave_status.setAdmin(state.get("admin"))
138 self.slave_status.setHost(state.get("host"))
139 self.slave_status.setConnected(True)
140 self.slave_commands = state.get("slave_commands")
141 self.slave = bot
142 log.msg("bot attached")
143 self.messageReceivedFromSlave()
144 return self.updateSlave()
145 d.addCallback(_accept_slave)
147 # Finally, the slave gets a reference to this BuildSlave. They
148 # receive this later, after we've started using them.
149 d.addCallback(lambda res: self)
150 return d
152 def messageReceivedFromSlave(self):
153 now = time.time()
154 self.lastMessageReceived = now
155 self.slave_status.setLastMessageReceived(now)
157 def detached(self, mind):
158 self.slave = None
159 self.slave_status.setConnected(False)
160 self.botmaster.slaveLost(self)
161 log.msg("BuildSlave.detached(%s)" % self.slavename)
164 def disconnect(self):
165 """Forcibly disconnect the slave.
167 This severs the TCP connection and returns a Deferred that will fire
168 (with None) when the connection is probably gone.
170 If the slave is still alive, they will probably try to reconnect
171 again in a moment.
173 This is called in two circumstances. The first is when a slave is
174 removed from the config file. In this case, when they try to
175 reconnect, they will be rejected as an unknown slave. The second is
176 when we wind up with two connections for the same slave, in which
177 case we disconnect the older connection.
180 if not self.slave:
181 return defer.succeed(None)
182 log.msg("disconnecting old slave %s now" % self.slavename)
184 # all kinds of teardown will happen as a result of
185 # loseConnection(), but it happens after a reactor iteration or
186 # two. Hook the actual disconnect so we can know when it is safe
187 # to connect the new slave. We have to wait one additional
188 # iteration (with callLater(0)) to make sure the *other*
189 # notifyOnDisconnect handlers have had a chance to run.
190 d = defer.Deferred()
192 # notifyOnDisconnect runs the callback with one argument, the
193 # RemoteReference being disconnected.
194 def _disconnected(rref):
195 reactor.callLater(0, d.callback, None)
196 self.slave.notifyOnDisconnect(_disconnected)
197 tport = self.slave.broker.transport
198 # this is the polite way to request that a socket be closed
199 tport.loseConnection()
200 try:
201 # but really we don't want to wait for the transmit queue to
202 # drain. The remote end is unlikely to ACK the data, so we'd
203 # probably have to wait for a (20-minute) TCP timeout.
204 #tport._closeSocket()
205 # however, doing _closeSocket (whether before or after
206 # loseConnection) somehow prevents the notifyOnDisconnect
207 # handlers from being run. Bummer.
208 tport.offset = 0
209 tport.dataBuffer = ""
210 pass
211 except:
212 # however, these hacks are pretty internal, so don't blow up if
213 # they fail or are unavailable
214 log.msg("failed to accelerate the shutdown process")
215 pass
216 log.msg("waiting for slave to finish disconnecting")
218 # When this Deferred fires, we'll be ready to accept the new slave
219 return d
221 def sendBuilderList(self):
222 our_builders = self.botmaster.getBuildersForSlave(self.slavename)
223 blist = [(b.name, b.builddir) for b in our_builders]
224 d = self.slave.callRemote("setBuilderList", blist)
225 def _sent(slist):
226 dl = []
227 for name, remote in slist.items():
228 # use get() since we might have changed our mind since then
229 b = self.botmaster.builders.get(name)
230 if b:
231 d1 = b.attached(self, remote, self.slave_commands)
232 dl.append(d1)
233 return defer.DeferredList(dl)
234 def _set_failed(why):
235 log.msg("BuildSlave.sendBuilderList (%s) failed" % self)
236 log.err(why)
237 # TODO: hang up on them?, without setBuilderList we can't use
238 # them
239 d.addCallbacks(_sent, _set_failed)
240 return d
242 def perspective_keepalive(self):
243 pass
245 def addSlaveBuilder(self, sb):
246 log.msg("%s adding %s" % (self, sb))
247 self.slavebuilders.append(sb)
249 def removeSlaveBuilder(self, sb):
250 log.msg("%s removing %s" % (self, sb))
251 if sb in self.slavebuilders:
252 self.slavebuilders.remove(sb)
254 def canStartBuild(self):
256 I am called when a build is requested to see if this buildslave
257 can start a build. This function can be used to limit overall
258 concurrency on the buildslave.
260 if self.max_builds:
261 active_builders = [sb for sb in self.slavebuilders if sb.isBusy()]
262 if len(active_builders) >= self.max_builds:
263 return False
264 return True