3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Stub for Google storage."""
27 from google
.appengine
.api
import datastore
28 from google
.appengine
.api
import namespace_manager
29 from google
.appengine
.api
.blobstore
import blobstore_stub
30 from google
.appengine
.ext
import db
31 from google
.appengine
.ext
.cloudstorage
import common
34 class _AE_GCSFileInfo_(db
.Model
):
35 """Store GCS specific info.
37 GCS allows user to define arbitrary metadata via header x-goog-meta-foo: bar.
38 These headers are returned when user does a GET or HEAD on the object.
43 filename
= db
.StringProperty(required
=True)
44 finalized
= db
.BooleanProperty(required
=True)
48 raw_options
= db
.StringListProperty()
51 size
= db
.IntegerProperty()
53 creation
= db
.DateTimeProperty()
54 content_type
= db
.StringProperty()
55 etag
= db
.ByteStringProperty()
57 def get_options(self
):
58 return dict(o
.split(':', 1) for o
in self
.raw_options
)
60 def set_options(self
, options_dict
):
62 '%s:%s' % (k
.lower(), v
) for k
, v
in options_dict
.iteritems()]
63 if 'content-type' in options_dict
:
64 self
.content_type
= options_dict
['content-type']
67 options
= property(get_options
, set_options
)
72 return blobstore_stub
._GS
_INFO
_KIND
75 class _AE_GCSPartialFile_(db
.Model
):
76 """Store partial content for uploading files."""
83 end
= db
.IntegerProperty(required
=True)
85 partial_content
= db
.TextProperty(required
=True)
88 class CloudStorageStub(object):
89 """Google Cloud Storage stub implementation.
91 We use blobstore stub to store files. All metadata are stored
94 Note: this Google Cloud Storage stub is designed to work with
95 apphosting.ext.cloudstorage.storage_api.py.
96 It only implements the part of GCS storage_api.py uses, and its interface
100 def __init__(self
, blob_storage
):
105 apphosting.api.blobstore.blobstore_stub.BlobStorage instance
107 self
.blob_storage
= blob_storage
109 def _filename_to_blobkey(self
, filename
):
110 """Get blobkey for filename.
113 filename: gcs filename of form /bucket/filename.
116 blobinfo's datastore's key name, aka, blobkey.
118 common
.validate_file_path(filename
)
120 return blobstore_stub
.BlobstoreServiceStub
.CreateEncodedGoogleStorageKey(
123 def post_start_creation(self
, filename
, options
):
124 """Start object creation with a POST.
126 This implements the resumable upload XML API.
129 filename: gcs filename of form /bucket/filename.
130 options: a dict containing all user specified request headers.
131 e.g. {'content-type': 'foo', 'x-goog-meta-bar': 'bar'}.
134 a token (blobkey) used for continuing upload.
136 ns
= namespace_manager
.get_namespace()
138 namespace_manager
.set_namespace('')
139 common
.validate_file_path(filename
)
140 token
= self
._filename
_to
_blobkey
(filename
)
141 gcs_file
= _AE_GCSFileInfo_
.get_by_key_name(token
)
143 self
._cleanup
_old
_file
(gcs_file
)
144 new_file
= _AE_GCSFileInfo_(key_name
=token
,
147 new_file
.options
= options
151 namespace_manager
.set_namespace(ns
)
154 def _cleanup_old_file(self
, gcs_file
):
155 """Clean up the old version of a file.
157 The old version may or may not be finalized yet. Either way,
158 when user tries to create a file that already exists, we delete the
162 gcs_file: an instance of _AE_GCSFileInfo_.
165 if gcs_file
.finalized
:
166 blobkey
= gcs_file
.key().name()
167 self
.blob_storage
.DeleteBlob(blobkey
)
169 db
.delete(_AE_GCSPartialFile_
.all().ancestor(gcs_file
))
172 def put_continue_creation(self
, token
, content
, content_range
,
174 _upload_filename
=None):
175 """Continue object upload with PUTs.
177 This implements the resumable upload XML API.
180 token: upload token returned by post_start_creation.
181 content: object content.
182 content_range: a (start, end) tuple specifying the content range of this
183 chunk. Both are inclusive according to XML API.
184 last: True if this is the last chunk of file content.
185 _upload_filename: internal use. Might be removed any time! This is
186 used by blobstore to pass in the upload filename from user.
189 _AE_GCSFileInfo entity for this file if the file is finalized.
192 ValueError: if token is invalid.
194 ns
= namespace_manager
.get_namespace()
196 namespace_manager
.set_namespace('')
197 gcs_file
= _AE_GCSFileInfo_
.get_by_key_name(token
)
199 raise ValueError('Invalid token')
201 start
, end
= content_range
202 if len(content
) != (end
- start
+ 1):
203 raise ValueError('Invalid content range %d-%d' % content_range
)
204 blobkey
= '%s-%d-%d' % (token
, content_range
[0], content_range
[1])
205 self
.blob_storage
.StoreBlob(blobkey
, StringIO
.StringIO(content
))
206 new_content
= _AE_GCSPartialFile_(parent
=gcs_file
,
208 key_name
='{:020}'.format(start
),
209 partial_content
=blobkey
,
214 return self
._end
_creation
(token
, _upload_filename
)
216 namespace_manager
.set_namespace(ns
)
218 def _end_creation(self
, token
, _upload_filename
):
219 """End object upload.
222 token: upload token returned by post_start_creation.
225 _AE_GCSFileInfo Entity for this file.
228 ValueError: if token is invalid. Or file is corrupted during upload.
230 Save file content to blobstore. Save blobinfo and _AE_GCSFileInfo.
232 gcs_file
= _AE_GCSFileInfo_
.get_by_key_name(token
)
234 raise ValueError('Invalid token')
236 error_msg
, content
= self
._get
_content
(gcs_file
)
238 raise ValueError(error_msg
)
240 gcs_file
.etag
= hashlib
.md5(content
).hexdigest()
241 gcs_file
.creation
= datetime
.datetime
.utcnow()
242 gcs_file
.size
= len(content
)
246 blob_info
= datastore
.Entity('__BlobInfo__', name
=str(token
), namespace
='')
247 blob_info
['content_type'] = gcs_file
.content_type
248 blob_info
['creation'] = gcs_file
.creation
249 blob_info
['filename'] = _upload_filename
250 blob_info
['md5_hash'] = gcs_file
.etag
251 blob_info
['size'] = gcs_file
.size
252 datastore
.Put(blob_info
)
254 self
.blob_storage
.StoreBlob(token
, StringIO
.StringIO(content
))
256 gcs_file
.finalized
= True
261 def _get_content(self
, gcs_file
):
262 """Aggregate all partial content of the gcs_file.
265 gcs_file: an instance of _AE_GCSFileInfo_.
268 (error_msg, content) tuple. error_msg is set if the file is
269 corrupted during upload. Otherwise content is set to the
270 aggregation of all partial contents.
275 for partial
in (_AE_GCSPartialFile_
.all(namespace
='').ancestor(gcs_file
).
277 start
= int(partial
.key().name())
279 if start
< previous_end
:
280 error_msg
= 'File is corrupted due to missing chunks.'
281 elif start
> previous_end
:
282 error_msg
= 'File is corrupted due to overlapping chunks'
283 previous_end
= partial
.end
284 content
+= self
.blob_storage
.OpenBlob(partial
.partial_content
).read()
285 self
.blob_storage
.DeleteBlob(partial
.partial_content
)
290 return error_msg
, content
297 """Get bucket listing with a GET.
300 bucketpath: gcs bucket path of form '/bucket'
301 prefix: prefix to limit listing.
302 marker: a str after which to start listing.
303 max_keys: max size of listing.
305 See https://developers.google.com/storage/docs/reference-methods#getbucket
309 A list of GCSFileStat sorted by filename.
311 common
.validate_bucket_path(bucketpath
)
312 q
= _AE_GCSFileInfo_
.all(namespace
='')
313 fully_qualified_prefix
= '/'.join([bucketpath
, prefix
])
315 q
.filter('filename >', '/'.join([bucketpath
, marker
]))
317 q
.filter('filename >=', fully_qualified_prefix
)
319 for info
in q
.run(limit
=max_keys
):
320 if not info
.filename
.startswith(fully_qualified_prefix
):
323 info
= db
.get(info
.key())
325 result
.append(common
.GCSFileStat(
326 filename
=info
.filename
,
328 st_ctime
=calendar
.timegm(info
.creation
.utctimetuple()),
332 def get_object(self
, filename
, start
=0, end
=None):
333 """Get file content with a GET.
336 filename: gcs filename of form '/bucket/filename'.
337 start: start offset to request. Inclusive.
338 end: end offset to request. Inclusive.
341 The segment of file content requested.
344 ValueError: if file doesn't exist.
346 common
.validate_file_path(filename
)
347 blobkey
= self
._filename
_to
_blobkey
(filename
)
348 key
= blobstore_stub
.BlobstoreServiceStub
.ToDatastoreBlobKey(blobkey
)
349 gcsfileinfo
= db
.get(key
)
350 if not gcsfileinfo
or not gcsfileinfo
.finalized
:
351 raise ValueError('File does not exist.')
352 local_file
= self
.blob_storage
.OpenBlob(blobkey
)
353 local_file
.seek(start
)
355 return local_file
.read(end
- start
+ 1)
357 return local_file
.read()
359 def head_object(self
, filename
):
360 """Get file stat with a HEAD.
363 filename: gcs filename of form '/bucket/filename'
366 A GCSFileStat object containing file stat. None if file doesn't exist.
368 common
.validate_file_path(filename
)
369 blobkey
= self
._filename
_to
_blobkey
(filename
)
370 key
= blobstore_stub
.BlobstoreServiceStub
.ToDatastoreBlobKey(blobkey
)
372 if info
and info
.finalized
:
373 metadata
= common
.get_metadata(info
.options
)
374 filestat
= common
.GCSFileStat(
375 filename
=info
.filename
,
378 st_ctime
=calendar
.timegm(info
.creation
.utctimetuple()),
379 content_type
=info
.content_type
,
384 def delete_object(self
, filename
):
385 """Delete file with a DELETE.
388 filename: gcs filename of form '/bucket/filename'
391 True if file is deleted. False if file doesn't exist.
393 common
.validate_file_path(filename
)
394 blobkey
= self
._filename
_to
_blobkey
(filename
)
395 key
= blobstore_stub
.BlobstoreServiceStub
.ToDatastoreBlobKey(blobkey
)
396 gcsfileinfo
= db
.get(key
)
400 blobstore_stub
.BlobstoreServiceStub
.DeleteBlob(blobkey
, self
.blob_storage
)