(refs #306) fix trigger documentation
[buildbot.git] / buildbot / ec2buildslave.py
blob240ddebec319b1f99630ceb35a9b29f088c48099
1 """A LatentSlave that uses EC2 to instantiate the slaves on demand.
3 Tested with Python boto 1.5c
4 """
6 import cStringIO
7 import os
8 import re
9 import time
10 import urllib
12 import boto
13 import boto.exception
14 from twisted.internet import defer, threads
15 from twisted.python import log
17 from buildbot.buildslave import AbstractLatentBuildSlave
18 from buildbot import interfaces
20 PENDING = 'pending'
21 RUNNING = 'running'
22 SHUTTINGDOWN = 'shutting-down'
23 TERMINATED = 'terminated'
25 class EC2LatentBuildSlave(AbstractLatentBuildSlave):
27 instance = image = None
28 _poll_resolution = 5 # hook point for tests
30 def __init__(self, name, password, instance_type, ami=None,
31 valid_ami_owners=None, valid_ami_location_regex=None,
32 elastic_ip=None, identifier=None, secret_identifier=None,
33 aws_id_file_path=None,
34 keypair_name='latent_buildbot_slave',
35 security_name='latent_buildbot_slave',
36 max_builds=None, notify_on_missing=[], missing_timeout=60*20,
37 build_wait_timeout=60*10, properties={}):
38 AbstractLatentBuildSlave.__init__(
39 self, name, password, max_builds, notify_on_missing,
40 missing_timeout, build_wait_timeout, properties)
41 if not ((ami is not None) ^
42 (valid_ami_owners is not None or
43 valid_ami_location_regex is not None)):
44 raise ValueError(
45 'You must provide either a specific ami, or one or both of '
46 'valid_ami_location_regex and valid_ami_owners')
47 self.ami = ami
48 if valid_ami_owners is not None:
49 if isinstance(valid_ami_owners, (int, long)):
50 valid_ami_owners = (valid_ami_owners,)
51 else:
52 for element in valid_ami_owners:
53 if not isinstance(element, (int, long)):
54 raise ValueError(
55 'valid_ami_owners should be int or iterable '
56 'of ints', element)
57 if valid_ami_location_regex is not None:
58 if not isinstance(valid_ami_location_regex, basestring):
59 raise ValueError(
60 'valid_ami_location_regex should be a string')
61 else:
62 # verify that regex will compile
63 re.compile(valid_ami_location_regex)
64 self.valid_ami_owners = valid_ami_owners
65 self.valid_ami_location_regex = valid_ami_location_regex
66 self.instance_type = instance_type
67 self.keypair_name = keypair_name
68 self.security_name = security_name
69 if identifier is None:
70 assert secret_identifier is None, (
71 'supply both or neither of identifier, secret_identifier')
72 if aws_id_file_path is None:
73 home = os.environ['HOME']
74 aws_id_file_path = os.path.join(home, '.ec2', 'aws_id')
75 if not os.path.exists(aws_id_file_path):
76 raise ValueError(
77 "Please supply your AWS access key identifier and secret "
78 "access key identifier either when instantiating this %s "
79 "or in the %s file (on two lines).\n" %
80 (self.__class__.__name__, aws_id_file_path))
81 aws_file = open(aws_id_file_path, 'r')
82 try:
83 identifier = aws_file.readline().strip()
84 secret_identifier = aws_file.readline().strip()
85 finally:
86 aws_file.close()
87 else:
88 assert (aws_id_file_path is None,
89 'if you supply the identifier and secret_identifier, '
90 'do not specify the aws_id_file_path')
91 assert (secret_identifier is not None,
92 'supply both or neither of identifier, secret_identifier')
93 # Make the EC2 connection.
94 self.conn = boto.connect_ec2(identifier, secret_identifier)
96 # Make a keypair
98 # We currently discard the keypair data because we don't need it.
99 # If we do need it in the future, we will always recreate the keypairs
100 # because there is no way to
101 # programmatically retrieve the private key component, unless we
102 # generate it and store it on the filesystem, which is an unnecessary
103 # usage requirement.
104 try:
105 key_pair = self.conn.get_all_key_pairs(keypair_name)[0]
106 # key_pair.delete() # would be used to recreate
107 except boto.exception.EC2ResponseError, e:
108 if e.code != 'InvalidKeyPair.NotFound':
109 if e.code == 'AuthFailure':
110 print ('POSSIBLE CAUSES OF ERROR:\n'
111 ' Did you sign up for EC2?\n'
112 ' Did you put a credit card number in your AWS '
113 'account?\n'
114 'Please doublecheck before reporting a problem.\n')
115 raise
116 # make one; we would always do this, and stash the result, if we
117 # needed the key (for instance, to SSH to the box). We'd then
118 # use paramiko to use the key to connect.
119 self.conn.create_key_pair(keypair_name)
121 # create security group
122 try:
123 group = self.conn.get_all_security_groups(security_name)[0]
124 except boto.exception.EC2ResponseError, e:
125 if e.code == 'InvalidGroup.NotFound':
126 self.security_group = self.conn.create_security_group(
127 security_name,
128 'Authorization to access the buildbot instance.')
129 # Authorize the master as necessary
130 # TODO this is where we'd open the hole to do the reverse pb
131 # connect to the buildbot
132 # ip = urllib.urlopen(
133 # 'http://checkip.amazonaws.com').read().strip()
134 # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip)
135 # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip)
136 else:
137 raise
139 # get the image
140 if self.ami is not None:
141 self.image = self.conn.get_image(self.ami)
142 else:
143 # verify we have access to at least one acceptable image
144 discard = self.get_image()
146 # get the specified elastic IP, if any
147 if elastic_ip is not None:
148 elastic_ip = self.conn.get_all_addresses([elastic_ip])[0]
149 self.elastic_ip = elastic_ip
151 def get_image(self):
152 if self.image is not None:
153 return self.image
154 if self.valid_ami_location_regex:
155 level = 0
156 options = []
157 get_match = re.compile(self.valid_ami_location_regex).match
158 for image in self.conn.get_all_images(
159 owners=self.valid_ami_owners):
160 # gather sorting data
161 match = get_match(image.location)
162 if match:
163 alpha_sort = int_sort = None
164 if level < 2:
165 try:
166 alpha_sort = match.group(1)
167 except IndexError:
168 level = 2
169 else:
170 if level == 0:
171 try:
172 int_sort = int(alpha_sort)
173 except ValueError:
174 level = 1
175 options.append([int_sort, alpha_sort,
176 image.location, image.id, image])
177 if level:
178 log.msg('sorting images at level %d' % level)
179 options = [candidate[level:] for candidate in options]
180 else:
181 options = [(image.location, image.id, image) for image
182 in self.conn.get_all_images(
183 owners=self.valid_ami_owners)]
184 options.sort()
185 log.msg('sorted images (last is chosen): %s' %
186 (', '.join(
187 '%s (%s)' % (candidate[-1].id, candidate[-1].location)
188 for candidate in options)))
189 if not options:
190 raise ValueError('no available images match constraints')
191 return options[-1][-1]
193 @property
194 def dns(self):
195 if self.instance is None:
196 return None
197 return self.instance.public_dns_name
199 def start_instance(self):
200 if self.instance is not None:
201 raise ValueError('instance active')
202 return threads.deferToThread(self._start_instance)
204 def _start_instance(self):
205 image = self.get_image()
206 reservation = image.run(
207 key_name=self.keypair_name, security_groups=[self.security_name],
208 instance_type=self.instance_type)
209 self.instance = reservation.instances[0]
210 log.msg('%s %s starting instance %s' %
211 (self.__class__.__name__, self.slavename, self.instance.id))
212 duration = 0
213 interval = self._poll_resolution
214 while self.instance.state == PENDING:
215 time.sleep(interval)
216 duration += interval
217 if duration % 60 == 0:
218 log.msg('%s %s has waited %d minutes for instance %s' %
219 (self.__class__.__name__, self.slavename, duration//60,
220 self.instance.id))
221 self.instance.update()
222 if self.instance.state == RUNNING:
223 self.output = self.instance.get_console_output()
224 minutes = duration//60
225 seconds = duration%60
226 log.msg('%s %s instance %s started on %s '
227 'in about %d minutes %d seconds (%s)' %
228 (self.__class__.__name__, self.slavename,
229 self.instance.id, self.dns, minutes, seconds,
230 self.output.output))
231 if self.elastic_ip is not None:
232 self.instance.use_ip(self.elastic_ip)
233 return [self.instance.id,
234 image.id,
235 '%02d:%02d:%02d' % (minutes//60, minutes%60, seconds)]
236 else:
237 log.msg('%s %s failed to start instance %s (%s)' %
238 (self.__class__.__name__, self.slavename,
239 self.instance.id, self.instance.state))
240 raise interfaces.LatentBuildSlaveFailedToSubstantiate(
241 self.instance.id, self.instance.state)
243 def stop_instance(self, fast=False):
244 if self.instance is None:
245 # be gentle. Something may just be trying to alert us that an
246 # instance never attached, and it's because, somehow, we never
247 # started.
248 return defer.succeed(None)
249 instance = self.instance
250 self.output = self.instance = None
251 return threads.deferToThread(
252 self._stop_instance, instance, fast)
254 def _stop_instance(self, instance, fast):
255 if self.elastic_ip is not None:
256 self.conn.disassociate_address(self.elastic_ip.public_ip)
257 instance.update()
258 if instance.state not in (SHUTTINGDOWN, TERMINATED):
259 instance.stop()
260 log.msg('%s %s terminating instance %s' %
261 (self.__class__.__name__, self.slavename, instance.id))
262 duration = 0
263 interval = self._poll_resolution
264 if fast:
265 goal = (SHUTTINGDOWN, TERMINATED)
266 instance.update()
267 else:
268 goal = (TERMINATED,)
269 while instance.state not in goal:
270 time.sleep(interval)
271 duration += interval
272 if duration % 60 == 0:
273 log.msg(
274 '%s %s has waited %d minutes for instance %s to end' %
275 (self.__class__.__name__, self.slavename, duration//60,
276 instance.id))
277 instance.update()
278 log.msg('%s %s instance %s %s '
279 'after about %d minutes %d seconds' %
280 (self.__class__.__name__, self.slavename,
281 instance.id, goal, duration//60, duration%60))