3 from email
.Message
import Message
4 from email
.Utils
import formatdate
5 from zope
.interface
import implements
6 from twisted
.python
import log
7 from twisted
.internet
import defer
, reactor
8 from twisted
.application
import service
10 from buildbot
.pbutil
import NewCredPerspective
11 from buildbot
.status
.builder
import SlaveStatus
12 from buildbot
.status
.mail
import MailNotifier
13 from buildbot
.interfaces
import IBuildSlave
14 from buildbot
.process
.properties
import Properties
16 class BuildSlave(NewCredPerspective
, service
.MultiService
):
17 """This is the master-side representative for a remote buildbot slave.
18 There is exactly one for each slave described in the config file (the
19 c['slaves'] list). When buildbots connect in (.attach), they get a
20 reference to this instance. The BotMaster object is stashed as the
21 .botmaster attribute. The BotMaster is also our '.parent' Service.
23 I represent a build slave -- a remote machine capable of
24 running builds. I am instantiated by the configuration file, and can be
25 subclassed to add extra functionality."""
27 implements(IBuildSlave
)
29 def __init__(self
, name
, password
, max_builds
=None,
30 notify_on_missing
=[], missing_timeout
=3600,
33 @param name: botname this machine will supply when it connects
34 @param password: password this machine will supply when
36 @param max_builds: maximum number of simultaneous builds that will
37 be run concurrently on this buildslave (the
38 default is None for no limit)
39 @param properties: properties that will be applied to builds run on
41 @type properties: dictionary
43 service
.MultiService
.__init
__(self
)
45 self
.password
= password
46 self
.botmaster
= None # no buildmaster yet
47 self
.slave_status
= SlaveStatus(name
)
48 self
.slave
= None # a RemoteReference to the Bot, when connected
49 self
.slave_commands
= None
50 self
.slavebuilders
= []
51 self
.max_builds
= max_builds
53 self
.properties
= Properties()
54 self
.properties
.update(properties
, "BuildSlave")
55 self
.properties
.setProperty("slavename", name
, "BuildSlave")
57 self
.lastMessageReceived
= 0
58 if isinstance(notify_on_missing
, str):
59 notify_on_missing
= [notify_on_missing
]
60 self
.notify_on_missing
= notify_on_missing
61 for i
in notify_on_missing
:
62 assert isinstance(i
, str)
63 self
.missing_timeout
= missing_timeout
64 self
.missing_timer
= None
66 def update(self
, new
):
68 Given a new BuildSlave, configure this one identically. Because
69 BuildSlave objects are remotely referenced, we can't replace them
70 without disconnecting the slave, yet there's no reason to do that.
72 # the reconfiguration logic should guarantee this:
73 assert self
.slavename
== new
.slavename
74 assert self
.password
== new
.password
75 assert self
.__class
__ == new
.__class
__
76 self
.max_builds
= new
.max_builds
80 builders
= self
.botmaster
.getBuildersForSlave(self
.slavename
)
81 return "<BuildSlave '%s', current builders: %s>" % \
82 (self
.slavename
, ','.join(map(lambda b
: b
.name
, builders
)))
84 return "<BuildSlave '%s', (no builders yet)>" % self
.slavename
86 def setBotmaster(self
, botmaster
):
87 assert not self
.botmaster
, "BuildSlave already has a botmaster"
88 self
.botmaster
= botmaster
90 def updateSlave(self
):
91 """Called to add or remove builders after the slave has connected.
93 @return: a Deferred that indicates when an attached slave has
94 accepted the new builders and/or released the old ones."""
96 return self
.sendBuilderList()
97 return defer
.succeed(None)
99 def updateSlaveStatus(self
, buildStarted
=None, buildFinished
=None):
101 self
.slave_status
.buildStarted(buildStarted
)
103 self
.slave_status
.buildFinished(buildFinished
)
105 def attached(self
, bot
):
106 """This is called when the slave connects.
108 @return: a Deferred that fires with a suitable pb.IPerspective to
109 give to the slave (i.e. 'self')"""
112 # uh-oh, we've got a duplicate slave. The most likely
113 # explanation is that the slave is behind a slow link, thinks we
114 # went away, and has attempted to reconnect, so we've got two
115 # "connections" from the same slave, but the previous one is
116 # stale. Give the new one precedence.
117 log
.msg("duplicate slave %s replacing old one" % self
.slavename
)
119 # just in case we've got two identically-configured slaves,
120 # report the IP addresses of both so someone can resolve the
122 tport
= self
.slave
.broker
.transport
123 log
.msg("old slave was connected from", tport
.getPeer())
124 log
.msg("new slave is from", bot
.broker
.transport
.getPeer())
125 d
= self
.disconnect()
127 d
= defer
.succeed(None)
128 # now we go through a sequence of calls, gathering information, then
129 # tell the Botmaster that it can finally give this slave to all the
130 # Builders that care about it.
132 # we accumulate slave information in this 'state' dictionary, then
133 # set it atomically if we make it far enough through the process
136 def _log_attachment_on_slave(res
):
137 d1
= bot
.callRemote("print", "attached")
138 d1
.addErrback(lambda why
: None)
140 d
.addCallback(_log_attachment_on_slave
)
143 d1
= bot
.callRemote("getSlaveInfo")
145 log
.msg("Got slaveinfo from '%s'" % self
.slavename
)
146 # TODO: info{} might have other keys
147 state
["admin"] = info
.get("admin")
148 state
["host"] = info
.get("host")
149 def _info_unavailable(why
):
150 # maybe an old slave, doesn't implement remote_getSlaveInfo
151 log
.msg("BuildSlave.info_unavailable")
153 d1
.addCallbacks(_got_info
, _info_unavailable
)
155 d
.addCallback(_get_info
)
157 def _get_commands(res
):
158 d1
= bot
.callRemote("getCommands")
159 def _got_commands(commands
):
160 state
["slave_commands"] = commands
161 def _commands_unavailable(why
):
162 # probably an old slave
163 log
.msg("BuildSlave._commands_unavailable")
164 if why
.check(AttributeError):
167 d1
.addCallbacks(_got_commands
, _commands_unavailable
)
169 d
.addCallback(_get_commands
)
171 def _accept_slave(res
):
172 self
.slave_status
.setAdmin(state
.get("admin"))
173 self
.slave_status
.setHost(state
.get("host"))
174 self
.slave_status
.setConnected(True)
175 self
.slave_commands
= state
.get("slave_commands")
177 log
.msg("bot attached")
178 self
.messageReceivedFromSlave()
179 if self
.missing_timer
:
180 self
.missing_timer
.cancel()
181 self
.missing_timer
= None
183 return self
.updateSlave()
184 d
.addCallback(_accept_slave
)
186 # Finally, the slave gets a reference to this BuildSlave. They
187 # receive this later, after we've started using them.
188 d
.addCallback(lambda res
: self
)
191 def messageReceivedFromSlave(self
):
193 self
.lastMessageReceived
= now
194 self
.slave_status
.setLastMessageReceived(now
)
196 def detached(self
, mind
):
198 self
.slave_status
.setConnected(False)
199 self
.botmaster
.slaveLost(self
)
200 log
.msg("BuildSlave.detached(%s)" % self
.slavename
)
201 if self
.notify_on_missing
and self
.parent
and not self
.missing_timer
:
202 self
.missing_timer
= reactor
.callLater(self
.missing_timeout
,
203 self
._missing
_timer
_fired
)
205 def _missing_timer_fired(self
):
206 self
.missing_timer
= None
207 # notify people, but only if we're still in the config
211 # first, see if we have a MailNotifier we can use. This gives us a
212 # fromaddr and a relayhost.
213 buildmaster
= self
.botmaster
.parent
214 status
= buildmaster
.getStatus()
215 for st
in buildmaster
.statusTargets
:
216 if isinstance(st
, MailNotifier
):
219 # if not, they get a default MailNotifier, which always uses SMTP
220 # to localhost and uses a dummy fromaddr of "buildbot".
221 log
.msg("buildslave-missing msg using default MailNotifier")
222 st
= MailNotifier("buildbot")
223 # now construct the mail
224 text
= "The Buildbot working for '%s'\n" % status
.getProjectName()
225 text
+= ("has noticed that the buildslave named %s went away\n" %
228 text
+= ("It last disconnected at %s (buildmaster-local time)\n" %
229 time
.ctime(time
.time() - self
.missing_timeout
)) # close enough
231 text
+= "The admin on record (as reported by BUILDSLAVE:info/admin)\n"
232 text
+= "was '%s'.\n" % self
.slave_status
.getAdmin()
234 text
+= "Sincerely,\n"
235 text
+= " The Buildbot\n"
236 text
+= " %s\n" % status
.getProjectURL()
240 m
['Date'] = formatdate(localtime
=True)
241 m
['Subject'] = "Buildbot: buildslave %s was lost" % self
.slavename
242 m
['From'] = st
.fromaddr
243 recipients
= self
.notify_on_missing
244 m
['To'] = ", ".join(recipients
)
245 d
= st
.sendMessage(m
, recipients
)
246 # return the Deferred for testing purposes
249 def disconnect(self
):
250 """Forcibly disconnect the slave.
252 This severs the TCP connection and returns a Deferred that will fire
253 (with None) when the connection is probably gone.
255 If the slave is still alive, they will probably try to reconnect
258 This is called in two circumstances. The first is when a slave is
259 removed from the config file. In this case, when they try to
260 reconnect, they will be rejected as an unknown slave. The second is
261 when we wind up with two connections for the same slave, in which
262 case we disconnect the older connection.
266 return defer
.succeed(None)
267 log
.msg("disconnecting old slave %s now" % self
.slavename
)
269 # all kinds of teardown will happen as a result of
270 # loseConnection(), but it happens after a reactor iteration or
271 # two. Hook the actual disconnect so we can know when it is safe
272 # to connect the new slave. We have to wait one additional
273 # iteration (with callLater(0)) to make sure the *other*
274 # notifyOnDisconnect handlers have had a chance to run.
277 # notifyOnDisconnect runs the callback with one argument, the
278 # RemoteReference being disconnected.
279 def _disconnected(rref
):
280 reactor
.callLater(0, d
.callback
, None)
281 self
.slave
.notifyOnDisconnect(_disconnected
)
282 tport
= self
.slave
.broker
.transport
283 # this is the polite way to request that a socket be closed
284 tport
.loseConnection()
286 # but really we don't want to wait for the transmit queue to
287 # drain. The remote end is unlikely to ACK the data, so we'd
288 # probably have to wait for a (20-minute) TCP timeout.
289 #tport._closeSocket()
290 # however, doing _closeSocket (whether before or after
291 # loseConnection) somehow prevents the notifyOnDisconnect
292 # handlers from being run. Bummer.
294 tport
.dataBuffer
= ""
297 # however, these hacks are pretty internal, so don't blow up if
298 # they fail or are unavailable
299 log
.msg("failed to accelerate the shutdown process")
301 log
.msg("waiting for slave to finish disconnecting")
303 # When this Deferred fires, we'll be ready to accept the new slave
306 def sendBuilderList(self
):
307 our_builders
= self
.botmaster
.getBuildersForSlave(self
.slavename
)
308 blist
= [(b
.name
, b
.builddir
) for b
in our_builders
]
309 d
= self
.slave
.callRemote("setBuilderList", blist
)
312 for name
, remote
in slist
.items():
313 # use get() since we might have changed our mind since then
314 b
= self
.botmaster
.builders
.get(name
)
316 d1
= b
.attached(self
, remote
, self
.slave_commands
)
318 return defer
.DeferredList(dl
)
319 def _set_failed(why
):
320 log
.msg("BuildSlave.sendBuilderList (%s) failed" % self
)
322 # TODO: hang up on them?, without setBuilderList we can't use
324 d
.addCallbacks(_sent
, _set_failed
)
327 def perspective_keepalive(self
):
330 def addSlaveBuilder(self
, sb
):
331 log
.msg("%s adding %s" % (self
, sb
))
332 self
.slavebuilders
.append(sb
)
334 def removeSlaveBuilder(self
, sb
):
335 log
.msg("%s removing %s" % (self
, sb
))
336 if sb
in self
.slavebuilders
:
337 self
.slavebuilders
.remove(sb
)
339 def canStartBuild(self
):
341 I am called when a build is requested to see if this buildslave
342 can start a build. This function can be used to limit overall
343 concurrency on the buildslave.
346 active_builders
= [sb
for sb
in self
.slavebuilders
if sb
.isBusy()]
347 if len(active_builders
) >= self
.max_builds
: