3 from email
.Message
import Message
4 from email
.Utils
import formatdate
5 from zope
.interface
import implements
6 from twisted
.python
import log
7 from twisted
.internet
import defer
, reactor
8 from twisted
.application
import service
10 from buildbot
.pbutil
import NewCredPerspective
11 from buildbot
.status
.builder
import SlaveStatus
12 from buildbot
.status
.mail
import MailNotifier
13 from buildbot
.interfaces
import IBuildSlave
15 class BuildSlave(NewCredPerspective
, service
.MultiService
):
16 """This is the master-side representative for a remote buildbot slave.
17 There is exactly one for each slave described in the config file (the
18 c['slaves'] list). When buildbots connect in (.attach), they get a
19 reference to this instance. The BotMaster object is stashed as the
20 .botmaster attribute. The BotMaster is also our '.parent' Service.
22 I represent a build slave -- a remote machine capable of
23 running builds. I am instantiated by the configuration file, and can be
24 subclassed to add extra functionality."""
26 implements(IBuildSlave
)
28 def __init__(self
, name
, password
, max_builds
=None,
29 notify_on_missing
=[], missing_timeout
=3600):
31 @param name: botname this machine will supply when it connects
32 @param password: password this machine will supply when
34 @param max_builds: maximum number of simultaneous builds that will
35 be run concurrently on this buildslave (the
36 default is None for no limit)
38 service
.MultiService
.__init
__(self
)
40 self
.password
= password
41 self
.botmaster
= None # no buildmaster yet
42 self
.slave_status
= SlaveStatus(name
)
43 self
.slave
= None # a RemoteReference to the Bot, when connected
44 self
.slave_commands
= None
45 self
.slavebuilders
= []
46 self
.max_builds
= max_builds
47 self
.lastMessageReceived
= 0
48 if isinstance(notify_on_missing
, str):
49 notify_on_missing
= [notify_on_missing
]
50 self
.notify_on_missing
= notify_on_missing
51 for i
in notify_on_missing
:
52 assert isinstance(i
, str)
53 self
.missing_timeout
= missing_timeout
54 self
.missing_timer
= None
56 def update(self
, new
):
58 Given a new BuildSlave, configure this one identically. Because
59 BuildSlave objects are remotely referenced, we can't replace them
60 without disconnecting the slave, yet there's no reason to do that.
62 # the reconfiguration logic should guarantee this:
63 assert self
.slavename
== new
.slavename
64 assert self
.password
== new
.password
65 assert self
.__class
__ == new
.__class
__
66 self
.max_builds
= new
.max_builds
69 builders
= self
.botmaster
.getBuildersForSlave(self
.slavename
)
70 return "<BuildSlave '%s', current builders: %s>" % \
71 (self
.slavename
, ','.join(map(lambda b
: b
.name
, builders
)))
73 def setBotmaster(self
, botmaster
):
74 assert not self
.botmaster
, "BuildSlave already has a botmaster"
75 self
.botmaster
= botmaster
77 def updateSlave(self
):
78 """Called to add or remove builders after the slave has connected.
80 @return: a Deferred that indicates when an attached slave has
81 accepted the new builders and/or released the old ones."""
83 return self
.sendBuilderList()
84 return defer
.succeed(None)
86 def attached(self
, bot
):
87 """This is called when the slave connects.
89 @return: a Deferred that fires with a suitable pb.IPerspective to
90 give to the slave (i.e. 'self')"""
92 if self
.missing_timer
:
93 self
.missing_timer
.cancel()
94 self
.missing_timer
= None
97 # uh-oh, we've got a duplicate slave. The most likely
98 # explanation is that the slave is behind a slow link, thinks we
99 # went away, and has attempted to reconnect, so we've got two
100 # "connections" from the same slave, but the previous one is
101 # stale. Give the new one precedence.
102 log
.msg("duplicate slave %s replacing old one" % self
.slavename
)
104 # just in case we've got two identically-configured slaves,
105 # report the IP addresses of both so someone can resolve the
107 tport
= self
.slave
.broker
.transport
108 log
.msg("old slave was connected from", tport
.getPeer())
109 log
.msg("new slave is from", bot
.broker
.transport
.getPeer())
110 d
= self
.disconnect()
112 d
= defer
.succeed(None)
113 # now we go through a sequence of calls, gathering information, then
114 # tell the Botmaster that it can finally give this slave to all the
115 # Builders that care about it.
117 # we accumulate slave information in this 'state' dictionary, then
118 # set it atomically if we make it far enough through the process
121 def _log_attachment_on_slave(res
):
122 d1
= bot
.callRemote("print", "attached")
123 d1
.addErrback(lambda why
: None)
125 d
.addCallback(_log_attachment_on_slave
)
128 d1
= bot
.callRemote("getSlaveInfo")
130 log
.msg("Got slaveinfo from '%s'" % self
.slavename
)
131 # TODO: info{} might have other keys
132 state
["admin"] = info
.get("admin")
133 state
["host"] = info
.get("host")
134 def _info_unavailable(why
):
135 # maybe an old slave, doesn't implement remote_getSlaveInfo
136 log
.msg("BuildSlave.info_unavailable")
138 d1
.addCallbacks(_got_info
, _info_unavailable
)
140 d
.addCallback(_get_info
)
142 def _get_commands(res
):
143 d1
= bot
.callRemote("getCommands")
144 def _got_commands(commands
):
145 state
["slave_commands"] = commands
146 def _commands_unavailable(why
):
147 # probably an old slave
148 log
.msg("BuildSlave._commands_unavailable")
149 if why
.check(AttributeError):
152 d1
.addCallbacks(_got_commands
, _commands_unavailable
)
154 d
.addCallback(_get_commands
)
156 def _accept_slave(res
):
157 self
.slave_status
.setAdmin(state
.get("admin"))
158 self
.slave_status
.setHost(state
.get("host"))
159 self
.slave_status
.setConnected(True)
160 self
.slave_commands
= state
.get("slave_commands")
162 log
.msg("bot attached")
163 self
.messageReceivedFromSlave()
164 return self
.updateSlave()
165 d
.addCallback(_accept_slave
)
167 # Finally, the slave gets a reference to this BuildSlave. They
168 # receive this later, after we've started using them.
169 d
.addCallback(lambda res
: self
)
172 def messageReceivedFromSlave(self
):
174 self
.lastMessageReceived
= now
175 self
.slave_status
.setLastMessageReceived(now
)
177 def detached(self
, mind
):
179 self
.slave_status
.setConnected(False)
180 self
.botmaster
.slaveLost(self
)
181 log
.msg("BuildSlave.detached(%s)" % self
.slavename
)
182 if self
.notify_on_missing
and self
.parent
:
183 self
.missing_timer
= reactor
.callLater(self
.missing_timeout
,
184 self
._missing
_timer
_fired
)
186 def _missing_timer_fired(self
):
187 self
.missing_timer
= None
188 # notify people, but only if we're still in the config
192 # first, see if we have a MailNotifier we can use. This gives us a
193 # fromaddr and a relayhost.
194 buildmaster
= self
.botmaster
.parent
195 status
= buildmaster
.getStatus()
196 for st
in buildmaster
.statusTargets
:
197 if isinstance(st
, MailNotifier
):
200 # if not, they get a default MailNotifier, which always uses SMTP
201 # to localhost and uses a dummy fromaddr of "buildbot".
202 log
.msg("buildslave-missing msg using default MailNotifier")
203 st
= MailNotifier("buildbot")
204 # now construct the mail
205 text
= "The Buildbot working for '%s'\n" % status
.getProjectName()
206 text
+= ("has noticed that the buildslave named %s went away\n" %
209 text
+= ("It last disconnected at %s (buildmaster-local time)\n" %
210 time
.ctime(time
.time() - self
.missing_timeout
)) # close enough
212 text
+= "The admin on record (as reported by BUILDSLAVE:info/admin)\n"
213 text
+= "was '%s'.\n" % self
.slave_status
.getAdmin()
215 text
+= "Sincerely,\n"
216 text
+= " The Buildbot\n"
217 text
+= " %s\n" % status
.getProjectURL()
221 m
['Date'] = formatdate(localtime
=True)
222 m
['Subject'] = "Buildbot: buildslave %s was lost" % self
.slavename
223 m
['From'] = st
.fromaddr
224 recipients
= self
.notify_on_missing
225 d
= st
.sendMessage(m
, recipients
)
226 # return the Deferred for testing purposes
229 def disconnect(self
):
230 """Forcibly disconnect the slave.
232 This severs the TCP connection and returns a Deferred that will fire
233 (with None) when the connection is probably gone.
235 If the slave is still alive, they will probably try to reconnect
238 This is called in two circumstances. The first is when a slave is
239 removed from the config file. In this case, when they try to
240 reconnect, they will be rejected as an unknown slave. The second is
241 when we wind up with two connections for the same slave, in which
242 case we disconnect the older connection.
246 return defer
.succeed(None)
247 log
.msg("disconnecting old slave %s now" % self
.slavename
)
249 # all kinds of teardown will happen as a result of
250 # loseConnection(), but it happens after a reactor iteration or
251 # two. Hook the actual disconnect so we can know when it is safe
252 # to connect the new slave. We have to wait one additional
253 # iteration (with callLater(0)) to make sure the *other*
254 # notifyOnDisconnect handlers have had a chance to run.
257 # notifyOnDisconnect runs the callback with one argument, the
258 # RemoteReference being disconnected.
259 def _disconnected(rref
):
260 reactor
.callLater(0, d
.callback
, None)
261 self
.slave
.notifyOnDisconnect(_disconnected
)
262 tport
= self
.slave
.broker
.transport
263 # this is the polite way to request that a socket be closed
264 tport
.loseConnection()
266 # but really we don't want to wait for the transmit queue to
267 # drain. The remote end is unlikely to ACK the data, so we'd
268 # probably have to wait for a (20-minute) TCP timeout.
269 #tport._closeSocket()
270 # however, doing _closeSocket (whether before or after
271 # loseConnection) somehow prevents the notifyOnDisconnect
272 # handlers from being run. Bummer.
274 tport
.dataBuffer
= ""
277 # however, these hacks are pretty internal, so don't blow up if
278 # they fail or are unavailable
279 log
.msg("failed to accelerate the shutdown process")
281 log
.msg("waiting for slave to finish disconnecting")
283 # When this Deferred fires, we'll be ready to accept the new slave
286 def sendBuilderList(self
):
287 our_builders
= self
.botmaster
.getBuildersForSlave(self
.slavename
)
288 blist
= [(b
.name
, b
.builddir
) for b
in our_builders
]
289 d
= self
.slave
.callRemote("setBuilderList", blist
)
292 for name
, remote
in slist
.items():
293 # use get() since we might have changed our mind since then
294 b
= self
.botmaster
.builders
.get(name
)
296 d1
= b
.attached(self
, remote
, self
.slave_commands
)
298 return defer
.DeferredList(dl
)
299 def _set_failed(why
):
300 log
.msg("BuildSlave.sendBuilderList (%s) failed" % self
)
302 # TODO: hang up on them?, without setBuilderList we can't use
304 d
.addCallbacks(_sent
, _set_failed
)
307 def perspective_keepalive(self
):
310 def addSlaveBuilder(self
, sb
):
311 log
.msg("%s adding %s" % (self
, sb
))
312 self
.slavebuilders
.append(sb
)
314 def removeSlaveBuilder(self
, sb
):
315 log
.msg("%s removing %s" % (self
, sb
))
316 if sb
in self
.slavebuilders
:
317 self
.slavebuilders
.remove(sb
)
319 def canStartBuild(self
):
321 I am called when a build is requested to see if this buildslave
322 can start a build. This function can be used to limit overall
323 concurrency on the buildslave.
326 active_builders
= [sb
for sb
in self
.slavebuilders
if sb
.isBusy()]
327 if len(active_builders
) >= self
.max_builds
: