App Engine Python SDK version 1.8.1
[gae.git] / python / google / appengine / ext / cloudstorage / cloudstorage_stub.py
blob596cfd2d9749d7338d1010f991f02b7f7e970fe1
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Stub for Google storage."""
22 import calendar
23 import datetime
24 import hashlib
25 import StringIO
27 from google.appengine.api import datastore
28 from google.appengine.api import namespace_manager
29 from google.appengine.api.blobstore import blobstore_stub
30 from google.appengine.ext import db
31 from google.appengine.ext.cloudstorage import common
34 class _AE_GCSFileInfo_(db.Model):
35 """Store GCS specific info.
37 GCS allows user to define arbitrary metadata via header x-goog-meta-foo: bar.
38 These headers are returned when user does a GET or HEAD on the object.
40 Key name is blobkey.
41 """
43 filename = db.StringProperty(required=True)
44 finalized = db.BooleanProperty(required=True)
48 raw_options = db.StringListProperty()
51 size = db.IntegerProperty()
53 creation = db.DateTimeProperty()
54 content_type = db.StringProperty()
55 etag = db.ByteStringProperty()
57 def get_options(self):
58 return dict(o.split(':', 1) for o in self.raw_options)
60 def set_options(self, options_dict):
61 self.raw_options = [
62 '%s:%s' % (k.lower(), v) for k, v in options_dict.iteritems()]
63 if 'content-type' in options_dict:
64 self.content_type = options_dict['content-type']
67 options = property(get_options, set_options)
69 @classmethod
70 def kind(cls):
72 return blobstore_stub._GS_INFO_KIND
75 class _AE_GCSPartialFile_(db.Model):
76 """Store partial content for uploading files."""
83 end = db.IntegerProperty(required=True)
85 partial_content = db.TextProperty(required=True)
88 class CloudStorageStub(object):
89 """Google Cloud Storage stub implementation.
91 We use blobstore stub to store files. All metadata are stored
92 in _AE_GCSFileInfo_.
94 Note: this Google Cloud Storage stub is designed to work with
95 apphosting.ext.cloudstorage.storage_api.py.
96 It only implements the part of GCS storage_api.py uses, and its interface
97 maps to GCS XML APIs.
98 """
100 def __init__(self, blob_storage):
101 """Initialize.
103 Args:
104 blob_storage:
105 apphosting.api.blobstore.blobstore_stub.BlobStorage instance
107 self.blob_storage = blob_storage
109 def _filename_to_blobkey(self, filename):
110 """Get blobkey for filename.
112 Args:
113 filename: gcs filename of form /bucket/filename.
115 Returns:
116 blobinfo's datastore's key name, aka, blobkey.
118 common.validate_file_path(filename)
120 return blobstore_stub.BlobstoreServiceStub.CreateEncodedGoogleStorageKey(
121 filename[1:])
123 def post_start_creation(self, filename, options):
124 """Start object creation with a POST.
126 This implements the resumable upload XML API.
128 Args:
129 filename: gcs filename of form /bucket/filename.
130 options: a dict containing all user specified request headers.
131 e.g. {'content-type': 'foo', 'x-goog-meta-bar': 'bar'}.
133 Returns:
134 a token (blobkey) used for continuing upload.
136 ns = namespace_manager.get_namespace()
137 try:
138 namespace_manager.set_namespace('')
139 common.validate_file_path(filename)
140 token = self._filename_to_blobkey(filename)
141 gcs_file = _AE_GCSFileInfo_.get_by_key_name(token)
143 self._cleanup_old_file(gcs_file)
144 new_file = _AE_GCSFileInfo_(key_name=token,
145 filename=filename,
146 finalized=False)
147 new_file.options = options
148 new_file.put()
149 return token
150 finally:
151 namespace_manager.set_namespace(ns)
154 def _cleanup_old_file(self, gcs_file):
155 """Clean up the old version of a file.
157 The old version may or may not be finalized yet. Either way,
158 when user tries to create a file that already exists, we delete the
159 old version first.
161 Args:
162 gcs_file: an instance of _AE_GCSFileInfo_.
164 if gcs_file:
165 if gcs_file.finalized:
166 blobkey = gcs_file.key().name()
167 self.blob_storage.DeleteBlob(blobkey)
168 else:
169 db.delete(_AE_GCSPartialFile_.all().ancestor(gcs_file))
170 gcs_file.delete()
172 def put_continue_creation(self, token, content, content_range,
173 last=False,
174 _upload_filename=None):
175 """Continue object upload with PUTs.
177 This implements the resumable upload XML API.
179 Args:
180 token: upload token returned by post_start_creation.
181 content: object content.
182 content_range: a (start, end) tuple specifying the content range of this
183 chunk. Both are inclusive according to XML API.
184 last: True if this is the last chunk of file content.
185 _upload_filename: internal use. Might be removed any time! This is
186 used by blobstore to pass in the upload filename from user.
188 Returns:
189 _AE_GCSFileInfo entity for this file if the file is finalized.
191 Raises:
192 ValueError: if token is invalid.
194 ns = namespace_manager.get_namespace()
195 try:
196 namespace_manager.set_namespace('')
197 gcs_file = _AE_GCSFileInfo_.get_by_key_name(token)
198 if not gcs_file:
199 raise ValueError('Invalid token')
200 if content:
201 start, end = content_range
202 if len(content) != (end - start + 1):
203 raise ValueError('Invalid content range %d-%d' % content_range)
204 blobkey = '%s-%d-%d' % (token, content_range[0], content_range[1])
205 self.blob_storage.StoreBlob(blobkey, StringIO.StringIO(content))
206 new_content = _AE_GCSPartialFile_(parent=gcs_file,
208 key_name='{:020}'.format(start),
209 partial_content=blobkey,
210 start=start,
211 end=end + 1)
212 new_content.put()
213 if last:
214 return self._end_creation(token, _upload_filename)
215 finally:
216 namespace_manager.set_namespace(ns)
218 def _end_creation(self, token, _upload_filename):
219 """End object upload.
221 Args:
222 token: upload token returned by post_start_creation.
224 Returns:
225 _AE_GCSFileInfo Entity for this file.
227 Raises:
228 ValueError: if token is invalid. Or file is corrupted during upload.
230 Save file content to blobstore. Save blobinfo and _AE_GCSFileInfo.
232 gcs_file = _AE_GCSFileInfo_.get_by_key_name(token)
233 if not gcs_file:
234 raise ValueError('Invalid token')
236 error_msg, content = self._get_content(gcs_file)
237 if error_msg:
238 raise ValueError(error_msg)
240 gcs_file.etag = hashlib.md5(content).hexdigest()
241 gcs_file.creation = datetime.datetime.utcnow()
242 gcs_file.size = len(content)
246 blob_info = datastore.Entity('__BlobInfo__', name=str(token), namespace='')
247 blob_info['content_type'] = gcs_file.content_type
248 blob_info['creation'] = gcs_file.creation
249 blob_info['filename'] = _upload_filename
250 blob_info['md5_hash'] = gcs_file.etag
251 blob_info['size'] = gcs_file.size
252 datastore.Put(blob_info)
254 self.blob_storage.StoreBlob(token, StringIO.StringIO(content))
256 gcs_file.finalized = True
257 gcs_file.put()
258 return gcs_file
260 @db.transactional
261 def _get_content(self, gcs_file):
262 """Aggregate all partial content of the gcs_file.
264 Args:
265 gcs_file: an instance of _AE_GCSFileInfo_.
267 Returns:
268 (error_msg, content) tuple. error_msg is set if the file is
269 corrupted during upload. Otherwise content is set to the
270 aggregation of all partial contents.
272 content = ''
273 previous_end = 0
274 error_msg = ''
275 for partial in (_AE_GCSPartialFile_.all(namespace='').ancestor(gcs_file).
276 order('__key__')):
277 start = int(partial.key().name())
278 if not error_msg:
279 if start < previous_end:
280 error_msg = 'File is corrupted due to missing chunks.'
281 elif start > previous_end:
282 error_msg = 'File is corrupted due to overlapping chunks'
283 previous_end = partial.end
284 content += self.blob_storage.OpenBlob(partial.partial_content).read()
285 self.blob_storage.DeleteBlob(partial.partial_content)
286 partial.delete()
287 if error_msg:
288 gcs_file.delete()
289 content = ''
290 return error_msg, content
292 def get_bucket(self,
293 bucketpath,
294 prefix,
295 marker,
296 max_keys):
297 """Get bucket listing with a GET.
299 Args:
300 bucketpath: gcs bucket path of form '/bucket'
301 prefix: prefix to limit listing.
302 marker: a str after which to start listing.
303 max_keys: max size of listing.
305 See https://developers.google.com/storage/docs/reference-methods#getbucket
306 for details.
308 Returns:
309 A list of GCSFileStat sorted by filename.
311 common.validate_bucket_path(bucketpath)
312 q = _AE_GCSFileInfo_.all(namespace='')
313 fully_qualified_prefix = '/'.join([bucketpath, prefix])
314 if marker:
315 q.filter('filename >', '/'.join([bucketpath, marker]))
316 else:
317 q.filter('filename >=', fully_qualified_prefix)
318 result = []
319 for info in q.run(limit=max_keys):
320 if not info.filename.startswith(fully_qualified_prefix):
321 break
323 info = db.get(info.key())
324 if info:
325 result.append(common.GCSFileStat(
326 filename=info.filename,
327 st_size=info.size,
328 st_ctime=calendar.timegm(info.creation.utctimetuple()),
329 etag=info.etag))
330 return result
332 def get_object(self, filename, start=0, end=None):
333 """Get file content with a GET.
335 Args:
336 filename: gcs filename of form '/bucket/filename'.
337 start: start offset to request. Inclusive.
338 end: end offset to request. Inclusive.
340 Returns:
341 The segment of file content requested.
343 Raises:
344 ValueError: if file doesn't exist.
346 common.validate_file_path(filename)
347 blobkey = self._filename_to_blobkey(filename)
348 key = blobstore_stub.BlobstoreServiceStub.ToDatastoreBlobKey(blobkey)
349 gcsfileinfo = db.get(key)
350 if not gcsfileinfo or not gcsfileinfo.finalized:
351 raise ValueError('File does not exist.')
352 local_file = self.blob_storage.OpenBlob(blobkey)
353 local_file.seek(start)
354 if end:
355 return local_file.read(end - start + 1)
356 else:
357 return local_file.read()
359 def head_object(self, filename):
360 """Get file stat with a HEAD.
362 Args:
363 filename: gcs filename of form '/bucket/filename'
365 Returns:
366 A GCSFileStat object containing file stat. None if file doesn't exist.
368 common.validate_file_path(filename)
369 blobkey = self._filename_to_blobkey(filename)
370 key = blobstore_stub.BlobstoreServiceStub.ToDatastoreBlobKey(blobkey)
371 info = db.get(key)
372 if info and info.finalized:
373 metadata = common.get_metadata(info.options)
374 filestat = common.GCSFileStat(
375 filename=info.filename,
376 st_size=info.size,
377 etag=info.etag,
378 st_ctime=calendar.timegm(info.creation.utctimetuple()),
379 content_type=info.content_type,
380 metadata=metadata)
381 return filestat
382 return None
384 def delete_object(self, filename):
385 """Delete file with a DELETE.
387 Args:
388 filename: gcs filename of form '/bucket/filename'
390 Returns:
391 True if file is deleted. False if file doesn't exist.
393 common.validate_file_path(filename)
394 blobkey = self._filename_to_blobkey(filename)
395 key = blobstore_stub.BlobstoreServiceStub.ToDatastoreBlobKey(blobkey)
396 gcsfileinfo = db.get(key)
397 if not gcsfileinfo:
398 return False
400 blobstore_stub.BlobstoreServiceStub.DeleteBlob(blobkey, self.blob_storage)
401 return True