Merge branch 'web-improvements' of git://github.com/catlee/buildbot
[buildbot.git] / buildbot / ec2buildslave.py
blob5adb34571ff012a21f36e4ca18af2b2765471a0c
1 """A LatentSlave that uses EC2 to instantiate the slaves on demand.
3 Tested with Python boto 1.5c
4 """
6 # Portions copyright Canonical Ltd. 2009
8 import cStringIO
9 import os
10 import re
11 import time
12 import urllib
14 import boto
15 import boto.exception
16 from twisted.internet import defer, threads
17 from twisted.python import log
19 from buildbot.buildslave import AbstractLatentBuildSlave
20 from buildbot import interfaces
22 PENDING = 'pending'
23 RUNNING = 'running'
24 SHUTTINGDOWN = 'shutting-down'
25 TERMINATED = 'terminated'
27 class EC2LatentBuildSlave(AbstractLatentBuildSlave):
29 instance = image = None
30 _poll_resolution = 5 # hook point for tests
32 def __init__(self, name, password, instance_type, ami=None,
33 valid_ami_owners=None, valid_ami_location_regex=None,
34 elastic_ip=None, identifier=None, secret_identifier=None,
35 aws_id_file_path=None, user_data=None,
36 keypair_name='latent_buildbot_slave',
37 security_name='latent_buildbot_slave',
38 max_builds=None, notify_on_missing=[], missing_timeout=60*20,
39 build_wait_timeout=60*10, properties={}):
40 AbstractLatentBuildSlave.__init__(
41 self, name, password, max_builds, notify_on_missing,
42 missing_timeout, build_wait_timeout, properties)
43 if not ((ami is not None) ^
44 (valid_ami_owners is not None or
45 valid_ami_location_regex is not None)):
46 raise ValueError(
47 'You must provide either a specific ami, or one or both of '
48 'valid_ami_location_regex and valid_ami_owners')
49 self.ami = ami
50 if valid_ami_owners is not None:
51 if isinstance(valid_ami_owners, (int, long)):
52 valid_ami_owners = (valid_ami_owners,)
53 else:
54 for element in valid_ami_owners:
55 if not isinstance(element, (int, long)):
56 raise ValueError(
57 'valid_ami_owners should be int or iterable '
58 'of ints', element)
59 if valid_ami_location_regex is not None:
60 if not isinstance(valid_ami_location_regex, basestring):
61 raise ValueError(
62 'valid_ami_location_regex should be a string')
63 else:
64 # verify that regex will compile
65 re.compile(valid_ami_location_regex)
66 self.valid_ami_owners = valid_ami_owners
67 self.valid_ami_location_regex = valid_ami_location_regex
68 self.instance_type = instance_type
69 self.keypair_name = keypair_name
70 self.security_name = security_name
71 self.user_data = user_data
72 if identifier is None:
73 assert secret_identifier is None, (
74 'supply both or neither of identifier, secret_identifier')
75 if aws_id_file_path is None:
76 home = os.environ['HOME']
77 aws_id_file_path = os.path.join(home, '.ec2', 'aws_id')
78 if not os.path.exists(aws_id_file_path):
79 raise ValueError(
80 "Please supply your AWS access key identifier and secret "
81 "access key identifier either when instantiating this %s "
82 "or in the %s file (on two lines).\n" %
83 (self.__class__.__name__, aws_id_file_path))
84 aws_file = open(aws_id_file_path, 'r')
85 try:
86 identifier = aws_file.readline().strip()
87 secret_identifier = aws_file.readline().strip()
88 finally:
89 aws_file.close()
90 else:
91 assert (aws_id_file_path is None,
92 'if you supply the identifier and secret_identifier, '
93 'do not specify the aws_id_file_path')
94 assert (secret_identifier is not None,
95 'supply both or neither of identifier, secret_identifier')
96 # Make the EC2 connection.
97 self.conn = boto.connect_ec2(identifier, secret_identifier)
99 # Make a keypair
101 # We currently discard the keypair data because we don't need it.
102 # If we do need it in the future, we will always recreate the keypairs
103 # because there is no way to
104 # programmatically retrieve the private key component, unless we
105 # generate it and store it on the filesystem, which is an unnecessary
106 # usage requirement.
107 try:
108 key_pair = self.conn.get_all_key_pairs(keypair_name)[0]
109 # key_pair.delete() # would be used to recreate
110 except boto.exception.EC2ResponseError, e:
111 if e.code != 'InvalidKeyPair.NotFound':
112 if e.code == 'AuthFailure':
113 print ('POSSIBLE CAUSES OF ERROR:\n'
114 ' Did you sign up for EC2?\n'
115 ' Did you put a credit card number in your AWS '
116 'account?\n'
117 'Please doublecheck before reporting a problem.\n')
118 raise
119 # make one; we would always do this, and stash the result, if we
120 # needed the key (for instance, to SSH to the box). We'd then
121 # use paramiko to use the key to connect.
122 self.conn.create_key_pair(keypair_name)
124 # create security group
125 try:
126 group = self.conn.get_all_security_groups(security_name)[0]
127 except boto.exception.EC2ResponseError, e:
128 if e.code == 'InvalidGroup.NotFound':
129 self.security_group = self.conn.create_security_group(
130 security_name,
131 'Authorization to access the buildbot instance.')
132 # Authorize the master as necessary
133 # TODO this is where we'd open the hole to do the reverse pb
134 # connect to the buildbot
135 # ip = urllib.urlopen(
136 # 'http://checkip.amazonaws.com').read().strip()
137 # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip)
138 # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip)
139 else:
140 raise
142 # get the image
143 if self.ami is not None:
144 self.image = self.conn.get_image(self.ami)
145 else:
146 # verify we have access to at least one acceptable image
147 discard = self.get_image()
149 # get the specified elastic IP, if any
150 if elastic_ip is not None:
151 elastic_ip = self.conn.get_all_addresses([elastic_ip])[0]
152 self.elastic_ip = elastic_ip
154 def get_image(self):
155 if self.image is not None:
156 return self.image
157 if self.valid_ami_location_regex:
158 level = 0
159 options = []
160 get_match = re.compile(self.valid_ami_location_regex).match
161 for image in self.conn.get_all_images(
162 owners=self.valid_ami_owners):
163 # gather sorting data
164 match = get_match(image.location)
165 if match:
166 alpha_sort = int_sort = None
167 if level < 2:
168 try:
169 alpha_sort = match.group(1)
170 except IndexError:
171 level = 2
172 else:
173 if level == 0:
174 try:
175 int_sort = int(alpha_sort)
176 except ValueError:
177 level = 1
178 options.append([int_sort, alpha_sort,
179 image.location, image.id, image])
180 if level:
181 log.msg('sorting images at level %d' % level)
182 options = [candidate[level:] for candidate in options]
183 else:
184 options = [(image.location, image.id, image) for image
185 in self.conn.get_all_images(
186 owners=self.valid_ami_owners)]
187 options.sort()
188 log.msg('sorted images (last is chosen): %s' %
189 (', '.join(
190 '%s (%s)' % (candidate[-1].id, candidate[-1].location)
191 for candidate in options)))
192 if not options:
193 raise ValueError('no available images match constraints')
194 return options[-1][-1]
196 def dns(self):
197 if self.instance is None:
198 return None
199 return self.instance.public_dns_name
200 dns = property(dns)
202 def start_instance(self):
203 if self.instance is not None:
204 raise ValueError('instance active')
205 return threads.deferToThread(self._start_instance)
207 def _start_instance(self):
208 image = self.get_image()
209 reservation = image.run(
210 key_name=self.keypair_name, security_groups=[self.security_name],
211 instance_type=self.instance_type, user_data=self.user_data)
212 self.instance = reservation.instances[0]
213 log.msg('%s %s starting instance %s' %
214 (self.__class__.__name__, self.slavename, self.instance.id))
215 duration = 0
216 interval = self._poll_resolution
217 while self.instance.state == PENDING:
218 time.sleep(interval)
219 duration += interval
220 if duration % 60 == 0:
221 log.msg('%s %s has waited %d minutes for instance %s' %
222 (self.__class__.__name__, self.slavename, duration//60,
223 self.instance.id))
224 self.instance.update()
225 if self.instance.state == RUNNING:
226 self.output = self.instance.get_console_output()
227 minutes = duration//60
228 seconds = duration%60
229 log.msg('%s %s instance %s started on %s '
230 'in about %d minutes %d seconds (%s)' %
231 (self.__class__.__name__, self.slavename,
232 self.instance.id, self.dns, minutes, seconds,
233 self.output.output))
234 if self.elastic_ip is not None:
235 self.instance.use_ip(self.elastic_ip)
236 return [self.instance.id,
237 image.id,
238 '%02d:%02d:%02d' % (minutes//60, minutes%60, seconds)]
239 else:
240 log.msg('%s %s failed to start instance %s (%s)' %
241 (self.__class__.__name__, self.slavename,
242 self.instance.id, self.instance.state))
243 raise interfaces.LatentBuildSlaveFailedToSubstantiate(
244 self.instance.id, self.instance.state)
246 def stop_instance(self, fast=False):
247 if self.instance is None:
248 # be gentle. Something may just be trying to alert us that an
249 # instance never attached, and it's because, somehow, we never
250 # started.
251 return defer.succeed(None)
252 instance = self.instance
253 self.output = self.instance = None
254 return threads.deferToThread(
255 self._stop_instance, instance, fast)
257 def _stop_instance(self, instance, fast):
258 if self.elastic_ip is not None:
259 self.conn.disassociate_address(self.elastic_ip.public_ip)
260 instance.update()
261 if instance.state not in (SHUTTINGDOWN, TERMINATED):
262 instance.stop()
263 log.msg('%s %s terminating instance %s' %
264 (self.__class__.__name__, self.slavename, instance.id))
265 duration = 0
266 interval = self._poll_resolution
267 if fast:
268 goal = (SHUTTINGDOWN, TERMINATED)
269 instance.update()
270 else:
271 goal = (TERMINATED,)
272 while instance.state not in goal:
273 time.sleep(interval)
274 duration += interval
275 if duration % 60 == 0:
276 log.msg(
277 '%s %s has waited %d minutes for instance %s to end' %
278 (self.__class__.__name__, self.slavename, duration//60,
279 instance.id))
280 instance.update()
281 log.msg('%s %s instance %s %s '
282 'after about %d minutes %d seconds' %
283 (self.__class__.__name__, self.slavename,
284 instance.id, goal, duration//60, duration%60))