1 """A LatentSlave that uses EC2 to instantiate the slaves on demand.
3 Tested with Python boto 1.5c
14 from twisted
.internet
import defer
, threads
15 from twisted
.python
import log
17 from buildbot
.buildslave
import AbstractLatentBuildSlave
18 from buildbot
import interfaces
22 SHUTTINGDOWN
= 'shutting-down'
23 TERMINATED
= 'terminated'
25 class EC2LatentBuildSlave(AbstractLatentBuildSlave
):
27 instance
= image
= None
28 _poll_resolution
= 5 # hook point for tests
30 def __init__(self
, name
, password
, instance_type
, ami
=None,
31 valid_ami_owners
=None, valid_ami_location_regex
=None,
32 elastic_ip
=None, identifier
=None, secret_identifier
=None,
33 aws_id_file_path
=None,
34 keypair_name
='latent_buildbot_slave',
35 security_name
='latent_buildbot_slave',
36 max_builds
=None, notify_on_missing
=[], missing_timeout
=60*20,
37 build_wait_timeout
=60*10, properties
={}):
38 AbstractLatentBuildSlave
.__init
__(
39 self
, name
, password
, max_builds
, notify_on_missing
,
40 missing_timeout
, build_wait_timeout
, properties
)
41 if not ((ami
is not None) ^
42 (valid_ami_owners
is not None or
43 valid_ami_location_regex
is not None)):
45 'You must provide either a specific ami, or one or both of '
46 'valid_ami_location_regex and valid_ami_owners')
48 if valid_ami_owners
is not None:
49 if isinstance(valid_ami_owners
, (int, long)):
50 valid_ami_owners
= (valid_ami_owners
,)
52 for element
in valid_ami_owners
:
53 if not isinstance(element
, (int, long)):
55 'valid_ami_owners should be int or iterable '
57 if valid_ami_location_regex
is not None:
58 if not isinstance(valid_ami_location_regex
, basestring
):
60 'valid_ami_location_regex should be a string')
62 # verify that regex will compile
63 re
.compile(valid_ami_location_regex
)
64 self
.valid_ami_owners
= valid_ami_owners
65 self
.valid_ami_location_regex
= valid_ami_location_regex
66 self
.instance_type
= instance_type
67 self
.keypair_name
= keypair_name
68 self
.security_name
= security_name
69 if identifier
is None:
70 assert secret_identifier
is None, (
71 'supply both or neither of identifier, secret_identifier')
72 if aws_id_file_path
is None:
73 home
= os
.environ
['HOME']
74 aws_id_file_path
= os
.path
.join(home
, '.ec2', 'aws_id')
75 if not os
.path
.exists(aws_id_file_path
):
77 "Please supply your AWS access key identifier and secret "
78 "access key identifier either when instantiating this %s "
79 "or in the %s file (on two lines).\n" %
80 (self
.__class
__.__name
__, aws_id_file_path
))
81 aws_file
= open(aws_id_file_path
, 'r')
83 identifier
= aws_file
.readline().strip()
84 secret_identifier
= aws_file
.readline().strip()
88 assert (aws_id_file_path
is None,
89 'if you supply the identifier and secret_identifier, '
90 'do not specify the aws_id_file_path')
91 assert (secret_identifier
is not None,
92 'supply both or neither of identifier, secret_identifier')
93 # Make the EC2 connection.
94 self
.conn
= boto
.connect_ec2(identifier
, secret_identifier
)
98 # We currently discard the keypair data because we don't need it.
99 # If we do need it in the future, we will always recreate the keypairs
100 # because there is no way to
101 # programmatically retrieve the private key component, unless we
102 # generate it and store it on the filesystem, which is an unnecessary
105 key_pair
= self
.conn
.get_all_key_pairs(keypair_name
)[0]
106 # key_pair.delete() # would be used to recreate
107 except boto
.exception
.EC2ResponseError
, e
:
108 if e
.code
!= 'InvalidKeyPair.NotFound':
109 if e
.code
== 'AuthFailure':
110 print ('POSSIBLE CAUSES OF ERROR:\n'
111 ' Did you sign up for EC2?\n'
112 ' Did you put a credit card number in your AWS '
114 'Please doublecheck before reporting a problem.\n')
116 # make one; we would always do this, and stash the result, if we
117 # needed the key (for instance, to SSH to the box). We'd then
118 # use paramiko to use the key to connect.
119 self
.conn
.create_key_pair(keypair_name
)
121 # create security group
123 group
= self
.conn
.get_all_security_groups(security_name
)[0]
124 except boto
.exception
.EC2ResponseError
, e
:
125 if e
.code
== 'InvalidGroup.NotFound':
126 self
.security_group
= self
.conn
.create_security_group(
128 'Authorization to access the buildbot instance.')
129 # Authorize the master as necessary
130 # TODO this is where we'd open the hole to do the reverse pb
131 # connect to the buildbot
132 # ip = urllib.urlopen(
133 # 'http://checkip.amazonaws.com').read().strip()
134 # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip)
135 # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip)
140 if self
.ami
is not None:
141 self
.image
= self
.conn
.get_image(self
.ami
)
143 # verify we have access to at least one acceptable image
144 discard
= self
.get_image()
146 # get the specified elastic IP, if any
147 if elastic_ip
is not None:
148 elastic_ip
= self
.conn
.get_all_addresses([elastic_ip
])[0]
149 self
.elastic_ip
= elastic_ip
152 if self
.image
is not None:
154 if self
.valid_ami_location_regex
:
157 get_match
= re
.compile(self
.valid_ami_location_regex
).match
158 for image
in self
.conn
.get_all_images(
159 owners
=self
.valid_ami_owners
):
160 # gather sorting data
161 match
= get_match(image
.location
)
163 alpha_sort
= int_sort
= None
166 alpha_sort
= match
.group(1)
172 int_sort
= int(alpha_sort
)
175 options
.append([int_sort
, alpha_sort
,
176 image
.location
, image
.id, image
])
178 log
.msg('sorting images at level %d' % level
)
179 options
= [candidate
[level
:] for candidate
in options
]
181 options
= [(image
.location
, image
.id, image
) for image
182 in self
.conn
.get_all_images(
183 owners
=self
.valid_ami_owners
)]
185 log
.msg('sorted images (last is chosen): %s' %
187 '%s (%s)' % (candidate
[-1].id, candidate
[-1].location
)
188 for candidate
in options
)))
190 raise ValueError('no available images match constraints')
191 return options
[-1][-1]
195 if self
.instance
is None:
197 return self
.instance
.public_dns_name
199 def start_instance(self
):
200 if self
.instance
is not None:
201 raise ValueError('instance active')
202 return threads
.deferToThread(self
._start
_instance
)
204 def _start_instance(self
):
205 image
= self
.get_image()
206 reservation
= image
.run(
207 key_name
=self
.keypair_name
, security_groups
=[self
.security_name
],
208 instance_type
=self
.instance_type
)
209 self
.instance
= reservation
.instances
[0]
210 log
.msg('%s %s starting instance %s' %
211 (self
.__class
__.__name
__, self
.slavename
, self
.instance
.id))
213 interval
= self
._poll
_resolution
214 while self
.instance
.state
== PENDING
:
217 if duration
% 60 == 0:
218 log
.msg('%s %s has waited %d minutes for instance %s' %
219 (self
.__class
__.__name
__, self
.slavename
, duration
//60,
221 self
.instance
.update()
222 if self
.instance
.state
== RUNNING
:
223 self
.output
= self
.instance
.get_console_output()
224 minutes
= duration
//60
225 seconds
= duration
%60
226 log
.msg('%s %s instance %s started on %s '
227 'in about %d minutes %d seconds (%s)' %
228 (self
.__class
__.__name
__, self
.slavename
,
229 self
.instance
.id, self
.dns
, minutes
, seconds
,
231 if self
.elastic_ip
is not None:
232 self
.instance
.use_ip(self
.elastic_ip
)
233 return [self
.instance
.id,
235 '%02d:%02d:%02d' % (minutes
//60, minutes
%60, seconds
)]
237 log
.msg('%s %s failed to start instance %s (%s)' %
238 (self
.__class
__.__name
__, self
.slavename
,
239 self
.instance
.id, self
.instance
.state
))
240 raise interfaces
.LatentBuildSlaveFailedToSubstantiate(
241 self
.instance
.id, self
.instance
.state
)
243 def stop_instance(self
, fast
=False):
244 if self
.instance
is None:
245 # be gentle. Something may just be trying to alert us that an
246 # instance never attached, and it's because, somehow, we never
248 return defer
.succeed(None)
249 instance
= self
.instance
250 self
.output
= self
.instance
= None
251 return threads
.deferToThread(
252 self
._stop
_instance
, instance
, fast
)
254 def _stop_instance(self
, instance
, fast
):
255 if self
.elastic_ip
is not None:
256 self
.conn
.disassociate_address(self
.elastic_ip
.public_ip
)
258 if instance
.state
not in (SHUTTINGDOWN
, TERMINATED
):
260 log
.msg('%s %s terminating instance %s' %
261 (self
.__class
__.__name
__, self
.slavename
, instance
.id))
263 interval
= self
._poll
_resolution
265 goal
= (SHUTTINGDOWN
, TERMINATED
)
269 while instance
.state
not in goal
:
272 if duration
% 60 == 0:
274 '%s %s has waited %d minutes for instance %s to end' %
275 (self
.__class
__.__name
__, self
.slavename
, duration
//60,
278 log
.msg('%s %s instance %s %s '
279 'after about %d minutes %d seconds' %
280 (self
.__class
__.__name
__, self
.slavename
,
281 instance
.id, goal
, duration
//60, duration
%60))