App Engine Python SDK version 1.8.2
[gae.git] / python / google / appengine / ext / cloudstorage / cloudstorage_stub.py
blobfc5414b81fd36be6eca437a220973fa9093c38b8
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Stub for Google storage."""
22 import calendar
23 import datetime
24 import hashlib
25 import StringIO
27 from google.appengine.api import datastore
28 from google.appengine.api import namespace_manager
29 from google.appengine.api.blobstore import blobstore_stub
30 from google.appengine.ext import db
31 from google.appengine.ext.cloudstorage import common
34 class _AE_GCSFileInfo_(db.Model):
35 """Store GCS specific info.
37 GCS allows user to define arbitrary metadata via header x-goog-meta-foo: bar.
38 These headers are returned when user does a GET or HEAD on the object.
40 Key name is blobkey.
41 """
43 filename = db.StringProperty(required=True)
44 finalized = db.BooleanProperty(required=True)
49 raw_options = db.StringListProperty()
52 size = db.IntegerProperty()
54 creation = db.DateTimeProperty()
56 content_type = db.StringProperty()
57 etag = db.ByteStringProperty()
59 def get_options(self):
60 return dict(o.split(':', 1) for o in self.raw_options)
62 def set_options(self, options_dict):
63 self.raw_options = [
64 '%s:%s' % (k.lower(), v) for k, v in options_dict.iteritems()]
65 if 'content-type' in options_dict:
66 self.content_type = options_dict['content-type']
69 options = property(get_options, set_options)
71 @classmethod
72 def kind(cls):
74 return blobstore_stub._GS_INFO_KIND
77 class _AE_GCSPartialFile_(db.Model):
78 """Store partial content for uploading files."""
85 end = db.IntegerProperty(required=True)
87 partial_content = db.TextProperty(required=True)
90 class CloudStorageStub(object):
91 """Google Cloud Storage stub implementation.
93 We use blobstore stub to store files. All metadata are stored
94 in _AE_GCSFileInfo_.
96 Note: this Google Cloud Storage stub is designed to work with
97 apphosting.ext.cloudstorage.storage_api.py.
98 It only implements the part of GCS storage_api.py uses, and its interface
99 maps to GCS XML APIs.
102 def __init__(self, blob_storage):
103 """Initialize.
105 Args:
106 blob_storage:
107 apphosting.api.blobstore.blobstore_stub.BlobStorage instance
109 self.blob_storage = blob_storage
111 def _filename_to_blobkey(self, filename):
112 """Get blobkey for filename.
114 Args:
115 filename: gcs filename of form /bucket/filename.
117 Returns:
118 blobinfo's datastore's key name, aka, blobkey.
120 common.validate_file_path(filename)
122 return blobstore_stub.BlobstoreServiceStub.CreateEncodedGoogleStorageKey(
123 filename[1:])
125 @db.non_transactional
126 def post_start_creation(self, filename, options):
127 """Start object creation with a POST.
129 This implements the resumable upload XML API.
131 Args:
132 filename: gcs filename of form /bucket/filename.
133 options: a dict containing all user specified request headers.
134 e.g. {'content-type': 'foo', 'x-goog-meta-bar': 'bar'}.
136 Returns:
137 a token (blobkey) used for continuing upload.
139 ns = namespace_manager.get_namespace()
140 try:
141 namespace_manager.set_namespace('')
142 common.validate_file_path(filename)
143 token = self._filename_to_blobkey(filename)
144 gcs_file = _AE_GCSFileInfo_.get_by_key_name(token)
146 self._cleanup_old_file(gcs_file)
147 new_file = _AE_GCSFileInfo_(key_name=token,
148 filename=filename,
149 finalized=False)
150 new_file.options = options
151 new_file.put()
152 return token
153 finally:
154 namespace_manager.set_namespace(ns)
157 @db.non_transactional
158 def _cleanup_old_file(self, gcs_file):
159 """Clean up the old version of a file.
161 The old version may or may not be finalized yet. Either way,
162 when user tries to create a file that already exists, we delete the
163 old version first.
165 Args:
166 gcs_file: an instance of _AE_GCSFileInfo_.
168 if gcs_file:
169 if gcs_file.finalized:
170 blobkey = gcs_file.key().name()
171 self.blob_storage.DeleteBlob(blobkey)
172 else:
173 db.delete(_AE_GCSPartialFile_.all().ancestor(gcs_file))
174 gcs_file.delete()
176 @db.non_transactional
177 def put_continue_creation(self, token, content, content_range,
178 last=False,
179 _upload_filename=None):
180 """Continue object upload with PUTs.
182 This implements the resumable upload XML API.
184 Args:
185 token: upload token returned by post_start_creation.
186 content: object content.
187 content_range: a (start, end) tuple specifying the content range of this
188 chunk. Both are inclusive according to XML API.
189 last: True if this is the last chunk of file content.
190 _upload_filename: internal use. Might be removed any time! This is
191 used by blobstore to pass in the upload filename from user.
193 Returns:
194 _AE_GCSFileInfo entity for this file if the file is finalized.
196 Raises:
197 ValueError: if token is invalid.
199 ns = namespace_manager.get_namespace()
200 try:
201 namespace_manager.set_namespace('')
202 gcs_file = _AE_GCSFileInfo_.get_by_key_name(token)
203 if not gcs_file:
204 raise ValueError('Invalid token')
205 if content:
206 start, end = content_range
207 if len(content) != (end - start + 1):
208 raise ValueError('Invalid content range %d-%d' % content_range)
209 blobkey = '%s-%d-%d' % (token, content_range[0], content_range[1])
210 self.blob_storage.StoreBlob(blobkey, StringIO.StringIO(content))
211 new_content = _AE_GCSPartialFile_(parent=gcs_file,
213 key_name='%020d' % start,
214 partial_content=blobkey,
215 start=start,
216 end=end + 1)
217 new_content.put()
218 if last:
219 return self._end_creation(token, _upload_filename)
220 finally:
221 namespace_manager.set_namespace(ns)
223 @db.non_transactional
224 def put_copy(self, src, dst):
225 """Copy file from src to dst.
227 Metadata is copied.
229 Args:
230 src: /bucket/filename. This file must exist.
231 dst: /bucket/filename
233 common.validate_file_path(src)
234 common.validate_file_path(dst)
237 src_blobkey = self._filename_to_blobkey(src)
238 source = _AE_GCSFileInfo_.get_by_key_name(src_blobkey)
239 ns = namespace_manager.get_namespace()
240 try:
241 namespace_manager.set_namespace('')
242 token = self._filename_to_blobkey(dst)
243 new_file = _AE_GCSFileInfo_(key_name=token,
244 filename=dst,
245 finalized=True)
246 new_file.options = source.options
247 new_file.etag = source.etag
248 new_file.size = source.size
249 new_file.creation = datetime.datetime.utcnow()
250 new_file.put()
251 finally:
252 namespace_manager.set_namespace(ns)
255 local_file = self.blob_storage.OpenBlob(src_blobkey)
256 self.blob_storage.StoreBlob(token, local_file)
258 @db.non_transactional
259 def _end_creation(self, token, _upload_filename):
260 """End object upload.
262 Args:
263 token: upload token returned by post_start_creation.
265 Returns:
266 _AE_GCSFileInfo Entity for this file.
268 Raises:
269 ValueError: if token is invalid. Or file is corrupted during upload.
271 Save file content to blobstore. Save blobinfo and _AE_GCSFileInfo.
273 gcs_file = _AE_GCSFileInfo_.get_by_key_name(token)
274 if not gcs_file:
275 raise ValueError('Invalid token')
277 error_msg, content = self._get_content(gcs_file)
278 if error_msg:
279 raise ValueError(error_msg)
281 gcs_file.etag = hashlib.md5(content).hexdigest()
282 gcs_file.creation = datetime.datetime.utcnow()
283 gcs_file.size = len(content)
287 blob_info = datastore.Entity('__BlobInfo__', name=str(token), namespace='')
288 blob_info['content_type'] = gcs_file.content_type
289 blob_info['creation'] = gcs_file.creation
290 blob_info['filename'] = _upload_filename
291 blob_info['md5_hash'] = gcs_file.etag
292 blob_info['size'] = gcs_file.size
293 datastore.Put(blob_info)
295 self.blob_storage.StoreBlob(token, StringIO.StringIO(content))
297 gcs_file.finalized = True
298 gcs_file.put()
299 return gcs_file
301 @db.transactional(propagation=db.INDEPENDENT)
302 def _get_content(self, gcs_file):
303 """Aggregate all partial content of the gcs_file.
305 Args:
306 gcs_file: an instance of _AE_GCSFileInfo_.
308 Returns:
309 (error_msg, content) tuple. error_msg is set if the file is
310 corrupted during upload. Otherwise content is set to the
311 aggregation of all partial contents.
313 content = ''
314 previous_end = 0
315 error_msg = ''
316 for partial in (_AE_GCSPartialFile_.all(namespace='').ancestor(gcs_file).
317 order('__key__')):
318 start = int(partial.key().name())
319 if not error_msg:
320 if start < previous_end:
321 error_msg = 'File is corrupted due to missing chunks.'
322 elif start > previous_end:
323 error_msg = 'File is corrupted due to overlapping chunks'
324 previous_end = partial.end
325 content += self.blob_storage.OpenBlob(partial.partial_content).read()
326 self.blob_storage.DeleteBlob(partial.partial_content)
327 partial.delete()
328 if error_msg:
329 gcs_file.delete()
330 content = ''
331 return error_msg, content
333 @db.non_transactional
334 def get_bucket(self,
335 bucketpath,
336 prefix,
337 marker,
338 max_keys):
339 """Get bucket listing with a GET.
341 Args:
342 bucketpath: gcs bucket path of form '/bucket'
343 prefix: prefix to limit listing.
344 marker: a str after which to start listing.
345 max_keys: max size of listing.
347 See https://developers.google.com/storage/docs/reference-methods#getbucket
348 for details.
350 Returns:
351 A list of GCSFileStat sorted by filename.
353 common.validate_bucket_path(bucketpath)
354 q = _AE_GCSFileInfo_.all(namespace='')
355 fully_qualified_prefix = '/'.join([bucketpath, prefix])
356 if marker:
357 q.filter('filename >', '/'.join([bucketpath, marker]))
358 else:
359 q.filter('filename >=', fully_qualified_prefix)
360 result = []
361 for info in q.run(limit=max_keys):
362 if not info.filename.startswith(fully_qualified_prefix):
363 break
365 info = db.get(info.key())
366 if info:
367 result.append(common.GCSFileStat(
368 filename=info.filename,
369 st_size=info.size,
370 st_ctime=calendar.timegm(info.creation.utctimetuple()),
371 etag=info.etag))
372 return result
374 @db.non_transactional
375 def get_object(self, filename, start=0, end=None):
376 """Get file content with a GET.
378 Args:
379 filename: gcs filename of form '/bucket/filename'.
380 start: start offset to request. Inclusive.
381 end: end offset to request. Inclusive.
383 Returns:
384 The segment of file content requested.
386 Raises:
387 ValueError: if file doesn't exist.
389 common.validate_file_path(filename)
390 blobkey = self._filename_to_blobkey(filename)
391 key = blobstore_stub.BlobstoreServiceStub.ToDatastoreBlobKey(blobkey)
392 gcsfileinfo = db.get(key)
393 if not gcsfileinfo or not gcsfileinfo.finalized:
394 raise ValueError('File does not exist.')
395 local_file = self.blob_storage.OpenBlob(blobkey)
396 local_file.seek(start)
397 if end:
398 return local_file.read(end - start + 1)
399 else:
400 return local_file.read()
402 @db.non_transactional
403 def head_object(self, filename):
404 """Get file stat with a HEAD.
406 Args:
407 filename: gcs filename of form '/bucket/filename'
409 Returns:
410 A GCSFileStat object containing file stat. None if file doesn't exist.
412 common.validate_file_path(filename)
413 blobkey = self._filename_to_blobkey(filename)
414 key = blobstore_stub.BlobstoreServiceStub.ToDatastoreBlobKey(blobkey)
415 info = db.get(key)
416 if info and info.finalized:
417 metadata = common.get_metadata(info.options)
418 filestat = common.GCSFileStat(
419 filename=info.filename,
420 st_size=info.size,
421 etag=info.etag,
422 st_ctime=calendar.timegm(info.creation.utctimetuple()),
423 content_type=info.content_type,
424 metadata=metadata)
425 return filestat
426 return None
428 @db.non_transactional
429 def delete_object(self, filename):
430 """Delete file with a DELETE.
432 Args:
433 filename: gcs filename of form '/bucket/filename'
435 Returns:
436 True if file is deleted. False if file doesn't exist.
438 common.validate_file_path(filename)
439 blobkey = self._filename_to_blobkey(filename)
440 key = blobstore_stub.BlobstoreServiceStub.ToDatastoreBlobKey(blobkey)
441 gcsfileinfo = db.get(key)
442 if not gcsfileinfo:
443 return False
445 blobstore_stub.BlobstoreServiceStub.DeleteBlob(blobkey, self.blob_storage)
446 return True