add arg to send email when buildslaves go missing. Closes #64.
[buildbot.git] / buildbot / test / test_slaves.py
blob379be9f8e58385c918b451a6cc79d09ea6c55cbf
1 # -*- test-case-name: buildbot.test.test_slaves -*-
3 from twisted.trial import unittest
4 from twisted.internet import defer, reactor
5 from twisted.python import log
7 from buildbot.test.runutils import RunMixin
8 from buildbot.sourcestamp import SourceStamp
9 from buildbot.process.base import BuildRequest
10 from buildbot.status.builder import SUCCESS
11 from buildbot.status import mail
12 from buildbot.slave import bot
14 config_1 = """
15 from buildbot.process import factory
16 from buildbot.steps import dummy
17 from buildbot.buildslave import BuildSlave
18 s = factory.s
20 BuildmasterConfig = c = {}
21 c['slaves'] = [BuildSlave('bot1', 'sekrit'), BuildSlave('bot2', 'sekrit'),
22 BuildSlave('bot3', 'sekrit')]
23 c['schedulers'] = []
24 c['slavePortnum'] = 0
25 c['schedulers'] = []
27 f1 = factory.BuildFactory([s(dummy.RemoteDummy, timeout=1)])
28 f2 = factory.BuildFactory([s(dummy.RemoteDummy, timeout=2)])
30 c['builders'] = [
31 {'name': 'b1', 'slavenames': ['bot1','bot2','bot3'],
32 'builddir': 'b1', 'factory': f1},
34 """
36 config_2 = config_1 + """
38 c['builders'] = [
39 {'name': 'b1', 'slavenames': ['bot1','bot2','bot3'],
40 'builddir': 'b1', 'factory': f2},
43 """
45 class Slave(RunMixin, unittest.TestCase):
47 def setUp(self):
48 RunMixin.setUp(self)
49 self.master.loadConfig(config_1)
50 self.master.startService()
51 d = self.connectSlave(["b1"])
52 d.addCallback(lambda res: self.connectSlave(["b1"], "bot2"))
53 return d
55 def doBuild(self, buildername):
56 br = BuildRequest("forced", SourceStamp())
57 d = br.waitUntilFinished()
58 self.control.getBuilder(buildername).requestBuild(br)
59 return d
61 def testSequence(self):
62 # make sure both slaves appear in the list.
63 attached_slaves = [c for c in self.master.botmaster.slaves.values()
64 if c.slave]
65 self.failUnlessEqual(len(attached_slaves), 2)
66 b = self.master.botmaster.builders["b1"]
67 self.failUnlessEqual(len(b.slaves), 2)
69 # since the current scheduling algorithm is simple and does not
70 # rotate or attempt any sort of load-balancing, two builds in
71 # sequence should both use the first slave. This may change later if
72 # we move to a more sophisticated scheme.
73 b.CHOOSE_SLAVES_RANDOMLY = False
75 d = self.doBuild("b1")
76 d.addCallback(self._testSequence_1)
77 return d
78 def _testSequence_1(self, res):
79 self.failUnlessEqual(res.getResults(), SUCCESS)
80 self.failUnlessEqual(res.getSlavename(), "bot1")
82 d = self.doBuild("b1")
83 d.addCallback(self._testSequence_2)
84 return d
85 def _testSequence_2(self, res):
86 self.failUnlessEqual(res.getSlavename(), "bot1")
89 def testSimultaneous(self):
90 # make sure we can actually run two builds at the same time
91 d1 = self.doBuild("b1")
92 d2 = self.doBuild("b1")
93 d1.addCallback(self._testSimultaneous_1, d2)
94 return d1
95 def _testSimultaneous_1(self, res, d2):
96 self.failUnlessEqual(res.getResults(), SUCCESS)
97 b1_slavename = res.getSlavename()
98 d2.addCallback(self._testSimultaneous_2, b1_slavename)
99 return d2
100 def _testSimultaneous_2(self, res, b1_slavename):
101 self.failUnlessEqual(res.getResults(), SUCCESS)
102 b2_slavename = res.getSlavename()
103 # make sure the two builds were run by different slaves
104 slavenames = [b1_slavename, b2_slavename]
105 slavenames.sort()
106 self.failUnlessEqual(slavenames, ["bot1", "bot2"])
108 def testFallback1(self):
109 # detach the first slave, verify that a build is run using the second
110 # slave instead
111 d = self.shutdownSlave("bot1", "b1")
112 d.addCallback(self._testFallback1_1)
113 return d
114 def _testFallback1_1(self, res):
115 attached_slaves = [c for c in self.master.botmaster.slaves.values()
116 if c.slave]
117 self.failUnlessEqual(len(attached_slaves), 1)
118 self.failUnlessEqual(len(self.master.botmaster.builders["b1"].slaves),
120 d = self.doBuild("b1")
121 d.addCallback(self._testFallback1_2)
122 return d
123 def _testFallback1_2(self, res):
124 self.failUnlessEqual(res.getResults(), SUCCESS)
125 self.failUnlessEqual(res.getSlavename(), "bot2")
127 def testFallback2(self):
128 # Disable the first slave, so that a slaveping will timeout. Then
129 # start a build, and verify that the non-failing (second) one is
130 # claimed for the build, and that the failing one is removed from the
131 # list.
133 b1 = self.master.botmaster.builders["b1"]
134 # reduce the ping time so we'll failover faster
135 b1.START_BUILD_TIMEOUT = 1
136 assert b1.CHOOSE_SLAVES_RANDOMLY
137 b1.CHOOSE_SLAVES_RANDOMLY = False
138 self.disappearSlave("bot1", "b1", allowReconnect=False)
139 d = self.doBuild("b1")
140 d.addCallback(self._testFallback2_1)
141 return d
142 def _testFallback2_1(self, res):
143 self.failUnlessEqual(res.getResults(), SUCCESS)
144 self.failUnlessEqual(res.getSlavename(), "bot2")
145 b1slaves = self.master.botmaster.builders["b1"].slaves
146 self.failUnlessEqual(len(b1slaves), 1, "whoops: %s" % (b1slaves,))
147 self.failUnlessEqual(b1slaves[0].slave.slavename, "bot2")
150 def notFinished(self, brs):
151 # utility method
152 builds = brs.getBuilds()
153 self.failIf(len(builds) > 1)
154 if builds:
155 self.failIf(builds[0].isFinished())
157 def testDontClaimPingingSlave(self):
158 # have two slaves connect for the same builder. Do something to the
159 # first one so that slavepings are delayed (but do not fail
160 # outright).
161 timers = []
162 self.slaves['bot1'].debugOpts["stallPings"] = (10, timers)
163 br = BuildRequest("forced", SourceStamp())
164 d1 = br.waitUntilFinished()
165 self.master.botmaster.builders["b1"].CHOOSE_SLAVES_RANDOMLY = False
166 self.control.getBuilder("b1").requestBuild(br)
167 s1 = br.status # this is a BuildRequestStatus
168 # give it a chance to start pinging
169 d2 = defer.Deferred()
170 d2.addCallback(self._testDontClaimPingingSlave_1, d1, s1, timers)
171 reactor.callLater(1, d2.callback, None)
172 return d2
173 def _testDontClaimPingingSlave_1(self, res, d1, s1, timers):
174 # now the first build is running (waiting on the ping), so start the
175 # second build. This should claim the second slave, not the first,
176 # because the first is busy doing the ping.
177 self.notFinished(s1)
178 d3 = self.doBuild("b1")
179 d3.addCallback(self._testDontClaimPingingSlave_2, d1, s1, timers)
180 return d3
181 def _testDontClaimPingingSlave_2(self, res, d1, s1, timers):
182 self.failUnlessEqual(res.getSlavename(), "bot2")
183 self.notFinished(s1)
184 # now let the ping complete
185 self.failUnlessEqual(len(timers), 1)
186 timers[0].reset(0)
187 d1.addCallback(self._testDontClaimPingingSlave_3)
188 return d1
189 def _testDontClaimPingingSlave_3(self, res):
190 self.failUnlessEqual(res.getSlavename(), "bot1")
192 config_3 = """
193 from buildbot.process import factory
194 from buildbot.steps import dummy
195 from buildbot.buildslave import BuildSlave
196 s = factory.s
198 BuildmasterConfig = c = {}
199 c['slaves'] = [BuildSlave('bot1', 'sekrit')]
200 c['schedulers'] = []
201 c['slavePortnum'] = 0
202 c['schedulers'] = []
204 f1 = factory.BuildFactory([s(dummy.Wait, handle='one')])
205 f2 = factory.BuildFactory([s(dummy.Wait, handle='two')])
206 f3 = factory.BuildFactory([s(dummy.Wait, handle='three')])
208 c['builders'] = [
209 {'name': 'b1', 'slavenames': ['bot1'],
210 'builddir': 'b1', 'factory': f1},
214 config_4 = config_3 + """
215 c['builders'] = [
216 {'name': 'b1', 'slavenames': ['bot1'],
217 'builddir': 'b1', 'factory': f2},
221 config_5 = config_3 + """
222 c['builders'] = [
223 {'name': 'b1', 'slavenames': ['bot1'],
224 'builddir': 'b1', 'factory': f3},
228 from buildbot.slave.commands import waitCommandRegistry
230 class Reconfig(RunMixin, unittest.TestCase):
232 def setUp(self):
233 RunMixin.setUp(self)
234 self.master.loadConfig(config_3)
235 self.master.startService()
236 d = self.connectSlave(["b1"])
237 return d
239 def _one_started(self):
240 log.msg("testReconfig._one_started")
241 self.build1_started = True
242 self.d1.callback(None)
243 return self.d2
245 def _two_started(self):
246 log.msg("testReconfig._two_started")
247 self.build2_started = True
248 self.d3.callback(None)
249 return self.d4
251 def _three_started(self):
252 log.msg("testReconfig._three_started")
253 self.build3_started = True
254 self.d5.callback(None)
255 return self.d6
257 def testReconfig(self):
258 # reconfiguring a Builder should not interrupt any running Builds. No
259 # queued BuildRequests should be lost. The next Build started should
260 # use the new process.
261 slave1 = self.slaves['bot1']
262 bot1 = slave1.getServiceNamed('bot')
263 sb1 = bot1.builders['b1']
264 self.failUnless(isinstance(sb1, bot.SlaveBuilder))
265 self.failUnless(sb1.running)
266 b1 = self.master.botmaster.builders['b1']
267 self.orig_b1 = b1
269 self.d1 = d1 = defer.Deferred()
270 self.d2 = d2 = defer.Deferred()
271 self.d3, self.d4 = defer.Deferred(), defer.Deferred()
272 self.d5, self.d6 = defer.Deferred(), defer.Deferred()
273 self.build1_started = False
274 self.build2_started = False
275 self.build3_started = False
276 waitCommandRegistry[("one","build1")] = self._one_started
277 waitCommandRegistry[("two","build2")] = self._two_started
278 waitCommandRegistry[("three","build3")] = self._three_started
280 # use different branches to make sure these cannot be merged
281 br1 = BuildRequest("build1", SourceStamp(branch="1"))
282 b1.submitBuildRequest(br1)
283 br2 = BuildRequest("build2", SourceStamp(branch="2"))
284 b1.submitBuildRequest(br2)
285 br3 = BuildRequest("build3", SourceStamp(branch="3"))
286 b1.submitBuildRequest(br3)
287 self.requests = (br1, br2, br3)
288 # all three are now in the queue
290 # wait until the first one has started
291 d1.addCallback(self._testReconfig_2)
292 return d1
294 def _testReconfig_2(self, res):
295 log.msg("_testReconfig_2")
296 # confirm that it is building
297 brs = self.requests[0].status.getBuilds()
298 self.failUnlessEqual(len(brs), 1)
299 self.build1 = brs[0]
300 self.failUnlessEqual(self.build1.getCurrentStep().getName(), "wait")
301 # br1 is building, br2 and br3 are in the queue (in that order). Now
302 # we reconfigure the Builder.
303 self.failUnless(self.build1_started)
304 d = self.master.loadConfig(config_4)
305 d.addCallback(self._testReconfig_3)
306 return d
308 def _testReconfig_3(self, res):
309 log.msg("_testReconfig_3")
310 # now check to see that br1 is still building, and that br2 and br3
311 # are in the queue of the new builder
312 b1 = self.master.botmaster.builders['b1']
313 self.failIfIdentical(b1, self.orig_b1)
314 self.failIf(self.build1.isFinished())
315 self.failUnlessEqual(self.build1.getCurrentStep().getName(), "wait")
316 self.failUnlessEqual(len(b1.buildable), 2)
317 self.failUnless(self.requests[1] in b1.buildable)
318 self.failUnless(self.requests[2] in b1.buildable)
320 # allow br1 to finish, and make sure its status is delivered normally
321 d = self.requests[0].waitUntilFinished()
322 d.addCallback(self._testReconfig_4)
323 self.d2.callback(None)
324 return d
326 def _testReconfig_4(self, bs):
327 log.msg("_testReconfig_4")
328 self.failUnlessEqual(bs.getReason(), "build1")
329 self.failUnless(bs.isFinished())
330 self.failUnlessEqual(bs.getResults(), SUCCESS)
332 # at this point, the first build has finished, and there is a pending
333 # call to start the second build. Once that pending call fires, there
334 # is a network roundtrip before the 'wait' RemoteCommand is delivered
335 # to the slave. We need to wait for both events to happen before we
336 # can check to make sure it is using the correct process. Just wait a
337 # full second.
338 d = defer.Deferred()
339 d.addCallback(self._testReconfig_5)
340 reactor.callLater(1, d.callback, None)
341 return d
343 def _testReconfig_5(self, res):
344 log.msg("_testReconfig_5")
345 # at this point the next build ought to be running
346 b1 = self.master.botmaster.builders['b1']
347 self.failUnlessEqual(len(b1.buildable), 1)
348 self.failUnless(self.requests[2] in b1.buildable)
349 self.failUnlessEqual(len(b1.building), 1)
350 # and it ought to be using the new process
351 self.failUnless(self.build2_started)
353 # now, while the second build is running, change the config multiple
354 # times.
356 d = self.master.loadConfig(config_3)
357 d.addCallback(lambda res: self.master.loadConfig(config_4))
358 d.addCallback(lambda res: self.master.loadConfig(config_5))
359 def _done(res):
360 # then once that's done, allow the second build to finish and
361 # wait for it to complete
362 da = self.requests[1].waitUntilFinished()
363 self.d4.callback(None)
364 return da
365 d.addCallback(_done)
366 def _done2(res):
367 # and once *that*'s done, wait another second to let the third
368 # build start
369 db = defer.Deferred()
370 reactor.callLater(1, db.callback, None)
371 return db
372 d.addCallback(_done2)
373 d.addCallback(self._testReconfig_6)
374 return d
376 def _testReconfig_6(self, res):
377 log.msg("_testReconfig_6")
378 # now check to see that the third build is running
379 self.failUnless(self.build3_started)
381 # we're done
385 class Slave2(RunMixin, unittest.TestCase):
387 revision = 0
389 def setUp(self):
390 RunMixin.setUp(self)
391 self.master.loadConfig(config_1)
392 self.master.startService()
394 def doBuild(self, buildername, reason="forced"):
395 # we need to prevent these builds from being merged, so we create
396 # each of them with a different revision specifier. The revision is
397 # ignored because our build process does not have a source checkout
398 # step.
399 self.revision += 1
400 br = BuildRequest(reason, SourceStamp(revision=self.revision))
401 d = br.waitUntilFinished()
402 self.control.getBuilder(buildername).requestBuild(br)
403 return d
405 def testFirstComeFirstServed(self):
406 # submit three builds, then connect a slave which fails the
407 # slaveping. The first build will claim the slave, do the slaveping,
408 # give up, and re-queue the build. Verify that the build gets
409 # re-queued in front of all other builds. This may be tricky, because
410 # the other builds may attempt to claim the just-failed slave.
412 d1 = self.doBuild("b1", "first")
413 d2 = self.doBuild("b1", "second")
414 #buildable = self.master.botmaster.builders["b1"].buildable
415 #print [b.reason for b in buildable]
417 # specifically, I want the poor build to get precedence over any
418 # others that were waiting. To test this, we need more builds than
419 # slaves.
421 # now connect a broken slave. The first build started as soon as it
422 # connects, so by the time we get to our _1 method, the ill-fated
423 # build has already started.
424 d = self.connectSlave(["b1"], opts={"failPingOnce": True})
425 d.addCallback(self._testFirstComeFirstServed_1, d1, d2)
426 return d
427 def _testFirstComeFirstServed_1(self, res, d1, d2):
428 # the master has send the slaveping. When this is received, it will
429 # fail, causing the master to hang up on the slave. When it
430 # reconnects, it should find the first build at the front of the
431 # queue. If we simply wait for both builds to complete, then look at
432 # the status logs, we should see that the builds ran in the correct
433 # order.
435 d = defer.DeferredList([d1,d2])
436 d.addCallback(self._testFirstComeFirstServed_2)
437 return d
438 def _testFirstComeFirstServed_2(self, res):
439 b = self.status.getBuilder("b1")
440 builds = b.getBuild(0), b.getBuild(1)
441 reasons = [build.getReason() for build in builds]
442 self.failUnlessEqual(reasons, ["first", "second"])
444 config_multi_builders = config_1 + """
445 c['builders'] = [
446 {'name': 'dummy', 'slavenames': ['bot1','bot2','bot3'],
447 'builddir': 'b1', 'factory': f2},
448 {'name': 'dummy2', 'slavenames': ['bot1','bot2','bot3'],
449 'builddir': 'b2', 'factory': f2},
450 {'name': 'dummy3', 'slavenames': ['bot1','bot2','bot3'],
451 'builddir': 'b3', 'factory': f2},
456 config_mail_missing = config_1 + """
457 c['slaves'] = [BuildSlave('bot1', 'sekrit', notify_on_missing='admin',
458 missing_timeout=1)]
459 c['builders'] = [
460 {'name': 'dummy', 'slavenames': ['bot1'],
461 'builddir': 'b1', 'factory': f1},
463 c['projectName'] = 'myproject'
464 c['projectURL'] = 'myURL'
467 class FakeMailer(mail.MailNotifier):
468 def sendMessage(self, m, recipients):
469 self.messages.append((m,recipients))
470 return defer.succeed(None)
472 class BuildSlave(RunMixin, unittest.TestCase):
473 def test_track_builders(self):
474 self.master.loadConfig(config_multi_builders)
475 self.master.readConfig = True
476 self.master.startService()
477 d = self.connectSlave()
479 def _check(res):
480 b = self.master.botmaster.builders['dummy']
481 self.failUnless(len(b.slaves) == 1) # just bot1
483 bs = b.slaves[0].slave
484 self.failUnless(len(bs.slavebuilders) == 3)
485 self.failUnless(b in [sb.builder for sb in bs.slavebuilders])
487 d.addCallback(_check)
488 return d
490 def test_mail_on_missing(self):
491 self.master.loadConfig(config_mail_missing)
492 self.master.readConfig = True
493 self.master.startService()
494 fm = FakeMailer("buildbot@example.org")
495 fm.messages = []
496 fm.setServiceParent(self.master)
497 self.master.statusTargets.append(fm)
499 d = self.connectSlave()
500 d.addCallback(self.stall, 1)
501 d.addCallback(lambda res: self.shutdownSlave("bot1", "dummy"))
502 def _not_yet(res):
503 self.failIf(fm.messages)
504 d.addCallback(_not_yet)
505 # we reconnect right away, so the timer shouldn't fire
506 d.addCallback(lambda res: self.connectSlave())
507 d.addCallback(self.stall, 3)
508 d.addCallback(_not_yet)
509 d.addCallback(lambda res: self.shutdownSlave("bot1", "dummy"))
510 d.addCallback(_not_yet)
511 # now we let it sit disconnected for long enough for the timer to
512 # fire
513 d.addCallback(self.stall, 3)
514 def _check(res):
515 self.failUnlessEqual(len(fm.messages), 1)
516 msg,recips = fm.messages[0]
517 self.failUnlessEqual(recips, ["admin"])
518 body = msg.as_string()
519 self.failUnlessIn("Subject: Buildbot: buildslave bot1 was lost",
520 body)
521 self.failUnlessIn("From: buildbot@example.org", body)
522 self.failUnlessIn("working for 'myproject'", body)
523 self.failUnlessIn("has noticed that the buildslave named bot1 went away",
524 body)
525 self.failUnlessIn("was 'one'", body)
526 self.failUnlessIn("myURL", body)
527 d.addCallback(_check)
528 return d
530 def stall(self, result, delay=1):
531 d = defer.Deferred()
532 reactor.callLater(delay, d.callback, result)
533 return d