waterfall: fix DST calculation. Closes #137.
[buildbot.git] / buildbot / buildslave.py
blob4632697dfe11db558bcf96a21b4c0bff91c81544
2 import time
3 from email.Message import Message
4 from email.Utils import formatdate
5 from zope.interface import implements
6 from twisted.python import log
7 from twisted.internet import defer, reactor
8 from twisted.application import service
10 from buildbot.pbutil import NewCredPerspective
11 from buildbot.status.builder import SlaveStatus
12 from buildbot.status.mail import MailNotifier
13 from buildbot.interfaces import IBuildSlave
15 class BuildSlave(NewCredPerspective, service.MultiService):
16 """This is the master-side representative for a remote buildbot slave.
17 There is exactly one for each slave described in the config file (the
18 c['slaves'] list). When buildbots connect in (.attach), they get a
19 reference to this instance. The BotMaster object is stashed as the
20 .botmaster attribute. The BotMaster is also our '.parent' Service.
22 I represent a build slave -- a remote machine capable of
23 running builds. I am instantiated by the configuration file, and can be
24 subclassed to add extra functionality."""
26 implements(IBuildSlave)
28 def __init__(self, name, password, max_builds=None,
29 notify_on_missing=[], missing_timeout=3600):
30 """
31 @param name: botname this machine will supply when it connects
32 @param password: password this machine will supply when
33 it connects
34 @param max_builds: maximum number of simultaneous builds that will
35 be run concurrently on this buildslave (the
36 default is None for no limit)
37 """
38 service.MultiService.__init__(self)
39 self.slavename = name
40 self.password = password
41 self.botmaster = None # no buildmaster yet
42 self.slave_status = SlaveStatus(name)
43 self.slave = None # a RemoteReference to the Bot, when connected
44 self.slave_commands = None
45 self.slavebuilders = []
46 self.max_builds = max_builds
47 self.lastMessageReceived = 0
48 if isinstance(notify_on_missing, str):
49 notify_on_missing = [notify_on_missing]
50 self.notify_on_missing = notify_on_missing
51 for i in notify_on_missing:
52 assert isinstance(i, str)
53 self.missing_timeout = missing_timeout
54 self.missing_timer = None
56 def update(self, new):
57 """
58 Given a new BuildSlave, configure this one identically. Because
59 BuildSlave objects are remotely referenced, we can't replace them
60 without disconnecting the slave, yet there's no reason to do that.
61 """
62 # the reconfiguration logic should guarantee this:
63 assert self.slavename == new.slavename
64 assert self.password == new.password
65 assert self.__class__ == new.__class__
66 self.max_builds = new.max_builds
68 def __repr__(self):
69 builders = self.botmaster.getBuildersForSlave(self.slavename)
70 return "<BuildSlave '%s', current builders: %s>" % \
71 (self.slavename, ','.join(map(lambda b: b.name, builders)))
73 def setBotmaster(self, botmaster):
74 assert not self.botmaster, "BuildSlave already has a botmaster"
75 self.botmaster = botmaster
77 def updateSlave(self):
78 """Called to add or remove builders after the slave has connected.
80 @return: a Deferred that indicates when an attached slave has
81 accepted the new builders and/or released the old ones."""
82 if self.slave:
83 return self.sendBuilderList()
84 return defer.succeed(None)
86 def attached(self, bot):
87 """This is called when the slave connects.
89 @return: a Deferred that fires with a suitable pb.IPerspective to
90 give to the slave (i.e. 'self')"""
92 if self.missing_timer:
93 self.missing_timer.cancel()
94 self.missing_timer = None
96 if self.slave:
97 # uh-oh, we've got a duplicate slave. The most likely
98 # explanation is that the slave is behind a slow link, thinks we
99 # went away, and has attempted to reconnect, so we've got two
100 # "connections" from the same slave, but the previous one is
101 # stale. Give the new one precedence.
102 log.msg("duplicate slave %s replacing old one" % self.slavename)
104 # just in case we've got two identically-configured slaves,
105 # report the IP addresses of both so someone can resolve the
106 # squabble
107 tport = self.slave.broker.transport
108 log.msg("old slave was connected from", tport.getPeer())
109 log.msg("new slave is from", bot.broker.transport.getPeer())
110 d = self.disconnect()
111 else:
112 d = defer.succeed(None)
113 # now we go through a sequence of calls, gathering information, then
114 # tell the Botmaster that it can finally give this slave to all the
115 # Builders that care about it.
117 # we accumulate slave information in this 'state' dictionary, then
118 # set it atomically if we make it far enough through the process
119 state = {}
121 def _log_attachment_on_slave(res):
122 d1 = bot.callRemote("print", "attached")
123 d1.addErrback(lambda why: None)
124 return d1
125 d.addCallback(_log_attachment_on_slave)
127 def _get_info(res):
128 d1 = bot.callRemote("getSlaveInfo")
129 def _got_info(info):
130 log.msg("Got slaveinfo from '%s'" % self.slavename)
131 # TODO: info{} might have other keys
132 state["admin"] = info.get("admin")
133 state["host"] = info.get("host")
134 def _info_unavailable(why):
135 # maybe an old slave, doesn't implement remote_getSlaveInfo
136 log.msg("BuildSlave.info_unavailable")
137 log.err(why)
138 d1.addCallbacks(_got_info, _info_unavailable)
139 return d1
140 d.addCallback(_get_info)
142 def _get_commands(res):
143 d1 = bot.callRemote("getCommands")
144 def _got_commands(commands):
145 state["slave_commands"] = commands
146 def _commands_unavailable(why):
147 # probably an old slave
148 log.msg("BuildSlave._commands_unavailable")
149 if why.check(AttributeError):
150 return
151 log.err(why)
152 d1.addCallbacks(_got_commands, _commands_unavailable)
153 return d1
154 d.addCallback(_get_commands)
156 def _accept_slave(res):
157 self.slave_status.setAdmin(state.get("admin"))
158 self.slave_status.setHost(state.get("host"))
159 self.slave_status.setConnected(True)
160 self.slave_commands = state.get("slave_commands")
161 self.slave = bot
162 log.msg("bot attached")
163 self.messageReceivedFromSlave()
164 return self.updateSlave()
165 d.addCallback(_accept_slave)
167 # Finally, the slave gets a reference to this BuildSlave. They
168 # receive this later, after we've started using them.
169 d.addCallback(lambda res: self)
170 return d
172 def messageReceivedFromSlave(self):
173 now = time.time()
174 self.lastMessageReceived = now
175 self.slave_status.setLastMessageReceived(now)
177 def detached(self, mind):
178 self.slave = None
179 self.slave_status.setConnected(False)
180 self.botmaster.slaveLost(self)
181 log.msg("BuildSlave.detached(%s)" % self.slavename)
182 if self.notify_on_missing and self.parent:
183 self.missing_timer = reactor.callLater(self.missing_timeout,
184 self._missing_timer_fired)
186 def _missing_timer_fired(self):
187 self.missing_timer = None
188 # notify people, but only if we're still in the config
189 if not self.parent:
190 return
192 # first, see if we have a MailNotifier we can use. This gives us a
193 # fromaddr and a relayhost.
194 buildmaster = self.botmaster.parent
195 status = buildmaster.getStatus()
196 for st in buildmaster.statusTargets:
197 if isinstance(st, MailNotifier):
198 break
199 else:
200 # if not, they get a default MailNotifier, which always uses SMTP
201 # to localhost and uses a dummy fromaddr of "buildbot".
202 log.msg("buildslave-missing msg using default MailNotifier")
203 st = MailNotifier("buildbot")
204 # now construct the mail
205 text = "The Buildbot working for '%s'\n" % status.getProjectName()
206 text += ("has noticed that the buildslave named %s went away\n" %
207 self.slavename)
208 text += "\n"
209 text += ("It last disconnected at %s (buildmaster-local time)\n" %
210 time.ctime(time.time() - self.missing_timeout)) # close enough
211 text += "\n"
212 text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n"
213 text += "was '%s'.\n" % self.slave_status.getAdmin()
214 text += "\n"
215 text += "Sincerely,\n"
216 text += " The Buildbot\n"
217 text += " %s\n" % status.getProjectURL()
219 m = Message()
220 m.set_payload(text)
221 m['Date'] = formatdate(localtime=True)
222 m['Subject'] = "Buildbot: buildslave %s was lost" % self.slavename
223 m['From'] = st.fromaddr
224 recipients = self.notify_on_missing
225 d = st.sendMessage(m, recipients)
226 # return the Deferred for testing purposes
227 return d
229 def disconnect(self):
230 """Forcibly disconnect the slave.
232 This severs the TCP connection and returns a Deferred that will fire
233 (with None) when the connection is probably gone.
235 If the slave is still alive, they will probably try to reconnect
236 again in a moment.
238 This is called in two circumstances. The first is when a slave is
239 removed from the config file. In this case, when they try to
240 reconnect, they will be rejected as an unknown slave. The second is
241 when we wind up with two connections for the same slave, in which
242 case we disconnect the older connection.
245 if not self.slave:
246 return defer.succeed(None)
247 log.msg("disconnecting old slave %s now" % self.slavename)
249 # all kinds of teardown will happen as a result of
250 # loseConnection(), but it happens after a reactor iteration or
251 # two. Hook the actual disconnect so we can know when it is safe
252 # to connect the new slave. We have to wait one additional
253 # iteration (with callLater(0)) to make sure the *other*
254 # notifyOnDisconnect handlers have had a chance to run.
255 d = defer.Deferred()
257 # notifyOnDisconnect runs the callback with one argument, the
258 # RemoteReference being disconnected.
259 def _disconnected(rref):
260 reactor.callLater(0, d.callback, None)
261 self.slave.notifyOnDisconnect(_disconnected)
262 tport = self.slave.broker.transport
263 # this is the polite way to request that a socket be closed
264 tport.loseConnection()
265 try:
266 # but really we don't want to wait for the transmit queue to
267 # drain. The remote end is unlikely to ACK the data, so we'd
268 # probably have to wait for a (20-minute) TCP timeout.
269 #tport._closeSocket()
270 # however, doing _closeSocket (whether before or after
271 # loseConnection) somehow prevents the notifyOnDisconnect
272 # handlers from being run. Bummer.
273 tport.offset = 0
274 tport.dataBuffer = ""
275 pass
276 except:
277 # however, these hacks are pretty internal, so don't blow up if
278 # they fail or are unavailable
279 log.msg("failed to accelerate the shutdown process")
280 pass
281 log.msg("waiting for slave to finish disconnecting")
283 # When this Deferred fires, we'll be ready to accept the new slave
284 return d
286 def sendBuilderList(self):
287 our_builders = self.botmaster.getBuildersForSlave(self.slavename)
288 blist = [(b.name, b.builddir) for b in our_builders]
289 d = self.slave.callRemote("setBuilderList", blist)
290 def _sent(slist):
291 dl = []
292 for name, remote in slist.items():
293 # use get() since we might have changed our mind since then
294 b = self.botmaster.builders.get(name)
295 if b:
296 d1 = b.attached(self, remote, self.slave_commands)
297 dl.append(d1)
298 return defer.DeferredList(dl)
299 def _set_failed(why):
300 log.msg("BuildSlave.sendBuilderList (%s) failed" % self)
301 log.err(why)
302 # TODO: hang up on them?, without setBuilderList we can't use
303 # them
304 d.addCallbacks(_sent, _set_failed)
305 return d
307 def perspective_keepalive(self):
308 pass
310 def addSlaveBuilder(self, sb):
311 log.msg("%s adding %s" % (self, sb))
312 self.slavebuilders.append(sb)
314 def removeSlaveBuilder(self, sb):
315 log.msg("%s removing %s" % (self, sb))
316 if sb in self.slavebuilders:
317 self.slavebuilders.remove(sb)
319 def canStartBuild(self):
321 I am called when a build is requested to see if this buildslave
322 can start a build. This function can be used to limit overall
323 concurrency on the buildslave.
325 if self.max_builds:
326 active_builders = [sb for sb in self.slavebuilders if sb.isBusy()]
327 if len(active_builders) >= self.max_builds:
328 return False
329 return True