choose slaves randomly rather than always picking the first one, closes #36
[buildbot.git] / buildbot / test / test_slaves.py
blob978a788491001040af3ade0c41e6c6bdb31d8310
1 # -*- test-case-name: buildbot.test.test_slaves -*-
3 from twisted.trial import unittest
4 from twisted.internet import defer, reactor
5 from twisted.python import log
7 from buildbot.test.runutils import RunMixin
8 from buildbot.sourcestamp import SourceStamp
9 from buildbot.process.base import BuildRequest
10 from buildbot.status.builder import SUCCESS
11 from buildbot.slave import bot
13 config_1 = """
14 from buildbot.process import factory
15 from buildbot.steps import dummy
16 s = factory.s
18 BuildmasterConfig = c = {}
19 c['bots'] = [('bot1', 'sekrit'), ('bot2', 'sekrit'), ('bot3', 'sekrit')]
20 c['sources'] = []
21 c['schedulers'] = []
22 c['slavePortnum'] = 0
23 c['schedulers'] = []
25 f1 = factory.BuildFactory([s(dummy.RemoteDummy, timeout=1)])
26 f2 = factory.BuildFactory([s(dummy.RemoteDummy, timeout=2)])
28 c['builders'] = [
29 {'name': 'b1', 'slavenames': ['bot1','bot2','bot3'],
30 'builddir': 'b1', 'factory': f1},
32 """
34 config_2 = config_1 + """
36 c['builders'] = [
37 {'name': 'b1', 'slavenames': ['bot1','bot2','bot3'],
38 'builddir': 'b1', 'factory': f2},
41 """
43 class Slave(RunMixin, unittest.TestCase):
45 def setUp(self):
46 RunMixin.setUp(self)
47 self.master.loadConfig(config_1)
48 self.master.startService()
49 d = self.connectSlave(["b1"])
50 d.addCallback(lambda res: self.connectSlave(["b1"], "bot2"))
51 return d
53 def doBuild(self, buildername):
54 br = BuildRequest("forced", SourceStamp())
55 d = br.waitUntilFinished()
56 self.control.getBuilder(buildername).requestBuild(br)
57 return d
59 def testSequence(self):
60 # make sure both slaves appear in the list.
61 attached_slaves = [c for c in self.master.botmaster.slaves.values()
62 if c.slave]
63 self.failUnlessEqual(len(attached_slaves), 2)
64 b = self.master.botmaster.builders["b1"]
65 self.failUnlessEqual(len(b.slaves), 2)
67 # since the current scheduling algorithm is simple and does not
68 # rotate or attempt any sort of load-balancing, two builds in
69 # sequence should both use the first slave. This may change later if
70 # we move to a more sophisticated scheme.
71 b.CHOOSE_SLAVES_RANDOMLY = False
73 d = self.doBuild("b1")
74 d.addCallback(self._testSequence_1)
75 return d
76 def _testSequence_1(self, res):
77 self.failUnlessEqual(res.getResults(), SUCCESS)
78 self.failUnlessEqual(res.getSlavename(), "bot1")
80 d = self.doBuild("b1")
81 d.addCallback(self._testSequence_2)
82 return d
83 def _testSequence_2(self, res):
84 self.failUnlessEqual(res.getSlavename(), "bot1")
87 def testSimultaneous(self):
88 # make sure we can actually run two builds at the same time
89 d1 = self.doBuild("b1")
90 d2 = self.doBuild("b1")
91 d1.addCallback(self._testSimultaneous_1, d2)
92 return d1
93 def _testSimultaneous_1(self, res, d2):
94 self.failUnlessEqual(res.getResults(), SUCCESS)
95 b1_slavename = res.getSlavename()
96 d2.addCallback(self._testSimultaneous_2, b1_slavename)
97 return d2
98 def _testSimultaneous_2(self, res, b1_slavename):
99 self.failUnlessEqual(res.getResults(), SUCCESS)
100 b2_slavename = res.getSlavename()
101 # make sure the two builds were run by different slaves
102 slavenames = [b1_slavename, b2_slavename]
103 slavenames.sort()
104 self.failUnlessEqual(slavenames, ["bot1", "bot2"])
106 def testFallback1(self):
107 # detach the first slave, verify that a build is run using the second
108 # slave instead
109 d = self.shutdownSlave("bot1", "b1")
110 d.addCallback(self._testFallback1_1)
111 return d
112 def _testFallback1_1(self, res):
113 attached_slaves = [c for c in self.master.botmaster.slaves.values()
114 if c.slave]
115 self.failUnlessEqual(len(attached_slaves), 1)
116 self.failUnlessEqual(len(self.master.botmaster.builders["b1"].slaves),
118 d = self.doBuild("b1")
119 d.addCallback(self._testFallback1_2)
120 return d
121 def _testFallback1_2(self, res):
122 self.failUnlessEqual(res.getResults(), SUCCESS)
123 self.failUnlessEqual(res.getSlavename(), "bot2")
125 def testFallback2(self):
126 # Disable the first slave, so that a slaveping will timeout. Then
127 # start a build, and verify that the non-failing (second) one is
128 # claimed for the build, and that the failing one is removed from the
129 # list.
131 # reduce the ping time so we'll failover faster
132 self.master.botmaster.builders["b1"].START_BUILD_TIMEOUT = 1
133 assert self.master.botmaster.builders["b1"].CHOOSE_SLAVES_RANDOMLY
134 self.master.botmaster.builders["b1"].CHOOSE_SLAVES_RANDOMLY = False
135 self.disappearSlave("bot1", "b1")
136 d = self.doBuild("b1")
137 d.addCallback(self._testFallback2_1)
138 return d
139 def _testFallback2_1(self, res):
140 self.failUnlessEqual(res.getResults(), SUCCESS)
141 self.failUnlessEqual(res.getSlavename(), "bot2")
142 b1slaves = self.master.botmaster.builders["b1"].slaves
143 # TODO: this check fails sometimes, sometimes len(b1slaves)==2,
144 # sometimes it is empty
145 self.failUnlessEqual(len(b1slaves), 1, b1slaves)
146 self.failUnlessEqual(b1slaves[0].slave.slavename, "bot2")
149 def notFinished(self, brs):
150 # utility method
151 builds = brs.getBuilds()
152 self.failIf(len(builds) > 1)
153 if builds:
154 self.failIf(builds[0].isFinished())
156 def testDontClaimPingingSlave(self):
157 # have two slaves connect for the same builder. Do something to the
158 # first one so that slavepings are delayed (but do not fail
159 # outright).
160 timers = []
161 self.slaves['bot1'].debugOpts["stallPings"] = (10, timers)
162 br = BuildRequest("forced", SourceStamp())
163 d1 = br.waitUntilFinished()
164 self.master.botmaster.builders["b1"].CHOOSE_SLAVES_RANDOMLY = False
165 self.control.getBuilder("b1").requestBuild(br)
166 s1 = br.status # this is a BuildRequestStatus
167 # give it a chance to start pinging
168 d2 = defer.Deferred()
169 d2.addCallback(self._testDontClaimPingingSlave_1, d1, s1, timers)
170 reactor.callLater(1, d2.callback, None)
171 return d2
172 def _testDontClaimPingingSlave_1(self, res, d1, s1, timers):
173 # now the first build is running (waiting on the ping), so start the
174 # second build. This should claim the second slave, not the first,
175 # because the first is busy doing the ping.
176 self.notFinished(s1)
177 d3 = self.doBuild("b1")
178 d3.addCallback(self._testDontClaimPingingSlave_2, d1, s1, timers)
179 return d3
180 def _testDontClaimPingingSlave_2(self, res, d1, s1, timers):
181 self.failUnlessEqual(res.getSlavename(), "bot2")
182 self.notFinished(s1)
183 # now let the ping complete
184 self.failUnlessEqual(len(timers), 1)
185 timers[0].reset(0)
186 d1.addCallback(self._testDontClaimPingingSlave_3)
187 return d1
188 def _testDontClaimPingingSlave_3(self, res):
189 self.failUnlessEqual(res.getSlavename(), "bot1")
191 config_3 = """
192 from buildbot.process import factory
193 from buildbot.steps import dummy
194 s = factory.s
196 BuildmasterConfig = c = {}
197 c['bots'] = [('bot1', 'sekrit')]
198 c['sources'] = []
199 c['schedulers'] = []
200 c['slavePortnum'] = 0
201 c['schedulers'] = []
203 f1 = factory.BuildFactory([s(dummy.Wait, handle='one')])
204 f2 = factory.BuildFactory([s(dummy.Wait, handle='two')])
205 f3 = factory.BuildFactory([s(dummy.Wait, handle='three')])
207 c['builders'] = [
208 {'name': 'b1', 'slavenames': ['bot1'],
209 'builddir': 'b1', 'factory': f1},
213 config_4 = config_3 + """
214 c['builders'] = [
215 {'name': 'b1', 'slavenames': ['bot1'],
216 'builddir': 'b1', 'factory': f2},
220 config_5 = config_3 + """
221 c['builders'] = [
222 {'name': 'b1', 'slavenames': ['bot1'],
223 'builddir': 'b1', 'factory': f3},
227 from buildbot.slave.commands import waitCommandRegistry
229 class Reconfig(RunMixin, unittest.TestCase):
231 def setUp(self):
232 RunMixin.setUp(self)
233 self.master.loadConfig(config_3)
234 self.master.startService()
235 d = self.connectSlave(["b1"])
236 return d
238 def _one_started(self):
239 log.msg("testReconfig._one_started")
240 self.build1_started = True
241 self.d1.callback(None)
242 return self.d2
244 def _two_started(self):
245 log.msg("testReconfig._two_started")
246 self.build2_started = True
247 self.d3.callback(None)
248 return self.d4
250 def _three_started(self):
251 log.msg("testReconfig._three_started")
252 self.build3_started = True
253 self.d5.callback(None)
254 return self.d6
256 def testReconfig(self):
257 # reconfiguring a Builder should not interrupt any running Builds. No
258 # queued BuildRequests should be lost. The next Build started should
259 # use the new process.
260 slave1 = self.slaves['bot1']
261 bot1 = slave1.getServiceNamed('bot')
262 sb1 = bot1.builders['b1']
263 self.failUnless(isinstance(sb1, bot.SlaveBuilder))
264 self.failUnless(sb1.running)
265 b1 = self.master.botmaster.builders['b1']
266 self.orig_b1 = b1
268 self.d1 = d1 = defer.Deferred()
269 self.d2 = d2 = defer.Deferred()
270 self.d3, self.d4 = defer.Deferred(), defer.Deferred()
271 self.d5, self.d6 = defer.Deferred(), defer.Deferred()
272 self.build1_started = False
273 self.build2_started = False
274 self.build3_started = False
275 waitCommandRegistry[("one","build1")] = self._one_started
276 waitCommandRegistry[("two","build2")] = self._two_started
277 waitCommandRegistry[("three","build3")] = self._three_started
279 # use different branches to make sure these cannot be merged
280 br1 = BuildRequest("build1", SourceStamp(branch="1"))
281 b1.submitBuildRequest(br1)
282 br2 = BuildRequest("build2", SourceStamp(branch="2"))
283 b1.submitBuildRequest(br2)
284 br3 = BuildRequest("build3", SourceStamp(branch="3"))
285 b1.submitBuildRequest(br3)
286 self.requests = (br1, br2, br3)
287 # all three are now in the queue
289 # wait until the first one has started
290 d1.addCallback(self._testReconfig_2)
291 return d1
293 def _testReconfig_2(self, res):
294 log.msg("_testReconfig_2")
295 # confirm that it is building
296 brs = self.requests[0].status.getBuilds()
297 self.failUnlessEqual(len(brs), 1)
298 self.build1 = brs[0]
299 self.failUnlessEqual(self.build1.getCurrentStep().getName(), "wait")
300 # br1 is building, br2 and br3 are in the queue (in that order). Now
301 # we reconfigure the Builder.
302 self.failUnless(self.build1_started)
303 d = self.master.loadConfig(config_4)
304 d.addCallback(self._testReconfig_3)
305 return d
307 def _testReconfig_3(self, res):
308 log.msg("_testReconfig_3")
309 # now check to see that br1 is still building, and that br2 and br3
310 # are in the queue of the new builder
311 b1 = self.master.botmaster.builders['b1']
312 self.failIfIdentical(b1, self.orig_b1)
313 self.failIf(self.build1.isFinished())
314 self.failUnlessEqual(self.build1.getCurrentStep().getName(), "wait")
315 self.failUnlessEqual(len(b1.buildable), 2)
316 self.failUnless(self.requests[1] in b1.buildable)
317 self.failUnless(self.requests[2] in b1.buildable)
319 # allow br1 to finish, and make sure its status is delivered normally
320 d = self.requests[0].waitUntilFinished()
321 d.addCallback(self._testReconfig_4)
322 self.d2.callback(None)
323 return d
325 def _testReconfig_4(self, bs):
326 log.msg("_testReconfig_4")
327 self.failUnlessEqual(bs.getReason(), "build1")
328 self.failUnless(bs.isFinished())
329 self.failUnlessEqual(bs.getResults(), SUCCESS)
331 # at this point, the first build has finished, and there is a pending
332 # call to start the second build. Once that pending call fires, there
333 # is a network roundtrip before the 'wait' RemoteCommand is delivered
334 # to the slave. We need to wait for both events to happen before we
335 # can check to make sure it is using the correct process. Just wait a
336 # full second.
337 d = defer.Deferred()
338 d.addCallback(self._testReconfig_5)
339 reactor.callLater(1, d.callback, None)
340 return d
342 def _testReconfig_5(self, res):
343 log.msg("_testReconfig_5")
344 # at this point the next build ought to be running
345 b1 = self.master.botmaster.builders['b1']
346 self.failUnlessEqual(len(b1.buildable), 1)
347 self.failUnless(self.requests[2] in b1.buildable)
348 self.failUnlessEqual(len(b1.building), 1)
349 # and it ought to be using the new process
350 self.failUnless(self.build2_started)
352 # now, while the second build is running, change the config multiple
353 # times.
355 d = self.master.loadConfig(config_3)
356 d.addCallback(lambda res: self.master.loadConfig(config_4))
357 d.addCallback(lambda res: self.master.loadConfig(config_5))
358 def _done(res):
359 # then once that's done, allow the second build to finish and
360 # wait for it to complete
361 da = self.requests[1].waitUntilFinished()
362 self.d4.callback(None)
363 return da
364 d.addCallback(_done)
365 def _done2(res):
366 # and once *that*'s done, wait another second to let the third
367 # build start
368 db = defer.Deferred()
369 reactor.callLater(1, db.callback, None)
370 return db
371 d.addCallback(_done2)
372 d.addCallback(self._testReconfig_6)
373 return d
375 def _testReconfig_6(self, res):
376 log.msg("_testReconfig_6")
377 # now check to see that the third build is running
378 self.failUnless(self.build3_started)
380 # we're done
384 class Slave2(RunMixin, unittest.TestCase):
386 revision = 0
388 def setUp(self):
389 RunMixin.setUp(self)
390 self.master.loadConfig(config_1)
391 self.master.startService()
393 def doBuild(self, buildername, reason="forced"):
394 # we need to prevent these builds from being merged, so we create
395 # each of them with a different revision specifier. The revision is
396 # ignored because our build process does not have a source checkout
397 # step.
398 self.revision += 1
399 br = BuildRequest(reason, SourceStamp(revision=self.revision))
400 d = br.waitUntilFinished()
401 self.control.getBuilder(buildername).requestBuild(br)
402 return d
404 def testFirstComeFirstServed(self):
405 # submit three builds, then connect a slave which fails the
406 # slaveping. The first build will claim the slave, do the slaveping,
407 # give up, and re-queue the build. Verify that the build gets
408 # re-queued in front of all other builds. This may be tricky, because
409 # the other builds may attempt to claim the just-failed slave.
411 d1 = self.doBuild("b1", "first")
412 d2 = self.doBuild("b1", "second")
413 #buildable = self.master.botmaster.builders["b1"].buildable
414 #print [b.reason for b in buildable]
416 # specifically, I want the poor build to get precedence over any
417 # others that were waiting. To test this, we need more builds than
418 # slaves.
420 # now connect a broken slave. The first build started as soon as it
421 # connects, so by the time we get to our _1 method, the ill-fated
422 # build has already started.
423 d = self.connectSlave(["b1"], opts={"failPingOnce": True})
424 d.addCallback(self._testFirstComeFirstServed_1, d1, d2)
425 return d
426 def _testFirstComeFirstServed_1(self, res, d1, d2):
427 # the master has send the slaveping. When this is received, it will
428 # fail, causing the master to hang up on the slave. When it
429 # reconnects, it should find the first build at the front of the
430 # queue. If we simply wait for both builds to complete, then look at
431 # the status logs, we should see that the builds ran in the correct
432 # order.
434 d = defer.DeferredList([d1,d2])
435 d.addCallback(self._testFirstComeFirstServed_2)
436 return d
437 def _testFirstComeFirstServed_2(self, res):
438 b = self.status.getBuilder("b1")
439 builds = b.getBuild(0), b.getBuild(1)
440 reasons = [build.getReason() for build in builds]
441 self.failUnlessEqual(reasons, ["first", "second"])