App Engine Python SDK version 1.8.1
[gae.git] / python / google / appengine / ext / cloudstorage / common.py
blobb6b68fb11025f27bb473fd39e699d2290891d2e5
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
31 """Helpers shared by cloudstorage_stub and cloudstorage_api."""
41 __all__ = ['CS_XML_NS',
42 'CSFileStat',
43 'dt_str_to_posix',
44 'LOCAL_API_HOST',
45 'local_run',
46 'get_access_token',
47 'get_metadata',
48 'GCSFileStat',
49 'http_time_to_posix',
50 'memory_usage',
51 'posix_time_to_http',
52 'posix_to_dt_str',
53 'set_access_token',
54 'validate_options',
55 'validate_bucket_path',
56 'validate_file_path',
61 import calendar
62 import datetime
63 from email import utils as email_utils
64 import logging
65 import os
66 import re
68 try:
69 from google.appengine.api import runtime
70 except ImportError:
71 from google.appengine.api import runtime
74 _GCS_BUCKET_REGEX = re.compile(r'/[a-z0-9\.\-_]{3,}$')
75 _GCS_FULLPATH_REGEX = re.compile(r'/[a-z0-9\.\-_]{3,}/.*')
76 _GCS_OPTIONS = ('x-goog-acl',
77 'x-goog-meta-')
79 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
81 LOCAL_API_HOST = 'gcs-magicstring.appspot.com'
83 _access_token = ''
86 def set_access_token(access_token):
87 """Set the shared access token to authenticate with Google Cloud Storage.
89 When set, the library will always attempt to communicate with the
90 real Google Cloud Storage with this token even when running on dev appserver.
91 Note the token could expire so it's up to you to renew it.
93 When absent, the library will automatically request and refresh a token
94 on appserver, or when on dev appserver, talk to a Google Cloud Storage
95 stub.
97 Args:
98 access_token: you can get one by run 'gsutil -d ls' and copy the
99 str after 'Bearer'.
101 global _access_token
102 _access_token = access_token
105 def get_access_token():
106 """Returns the shared access token."""
107 return _access_token
110 class GCSFileStat(object):
111 """Container for GCS file stat."""
113 def __init__(self,
114 filename,
115 st_size,
116 etag,
117 st_ctime,
118 content_type=None,
119 metadata=None):
120 """Initialize.
122 Args:
123 filename: a Google Cloud Storage filename of form '/bucket/filename'.
124 st_size: file size in bytes. long compatible.
125 etag: hex digest of the md5 hash of the file's content. str.
126 st_ctime: posix file creation time. float compatible.
127 content_type: content type. str.
128 metadata: a str->str dict of user specified metadata from the
129 x-goog-meta header, e.g. {'x-goog-meta-foo': 'foo'}.
131 self.filename = filename
132 self.st_size = long(st_size)
133 self.st_ctime = float(st_ctime)
134 if etag[0] == '"' and etag[-1] == '"':
135 etag = etag[1:-1]
136 self.etag = etag
137 self.content_type = content_type
138 self.metadata = metadata
140 def __repr__(self):
141 return (
142 '(filename: %(filename)s, st_size: %(st_size)s, '
143 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
144 'content_type: %(content_type)s, '
145 'metadata: %(metadata)s)' %
146 dict(filename=self.filename,
147 st_size=self.st_size,
148 st_ctime=self.st_ctime,
149 etag=self.etag,
150 content_type=self.content_type,
151 metadata=self.metadata))
155 CSFileStat = GCSFileStat
158 def get_metadata(headers):
159 """Get user defined metadata from HTTP response headers."""
160 return dict((k, v) for k, v in headers.iteritems()
161 if k.startswith('x-goog-meta-'))
164 def validate_bucket_path(path):
165 """Validate a Google Cloud Storage bucket path.
167 Args:
168 path: a Google Storage bucket path. It should have form '/bucket'.
170 Raises:
171 ValueError: if path is invalid.
173 _validate_path(path)
174 if not _GCS_BUCKET_REGEX.match(path):
175 raise ValueError('Bucket should have format /bucket '
176 'but got %s' % path)
179 def validate_file_path(path):
180 """Validate a Google Cloud Storage file path.
182 Args:
183 path: a Google Storage file path. It should have form '/bucket/filename'.
185 Raises:
186 ValueError: if path is invalid.
188 _validate_path(path)
189 if not _GCS_FULLPATH_REGEX.match(path):
190 raise ValueError('Path should have format /bucket/filename '
191 'but got %s' % path)
194 def _validate_path(path):
195 """Basic validation of Google Storage paths.
197 Args:
198 path: a Google Storage path. It should have form '/bucket/filename'
199 or '/bucket'.
201 Raises:
202 ValueError: if path is invalid.
203 TypeError: if path is not of type basestring.
205 if not path:
206 raise ValueError('Path is empty')
207 if not isinstance(path, basestring):
208 raise TypeError('Path should be a string but is %s (%s).' %
209 (path.__class__, path))
212 def validate_options(options):
213 """Validate Google Cloud Storage options.
215 Args:
216 options: a str->basestring dict of options to pass to Google Cloud Storage.
218 Raises:
219 ValueError: if option is not supported.
220 TypeError: if option is not of type str or value of an option
221 is not of type basestring.
223 if not options:
224 return
226 for k, v in options.iteritems():
227 if not isinstance(k, str):
228 raise TypeError('option %r should be a str.' % k)
229 if not any(k.startswith(valid) for valid in _GCS_OPTIONS):
230 raise ValueError('option %s is not supported.' % k)
231 if not isinstance(v, basestring):
232 raise TypeError('value %r for option %s should be of type basestring.' %
233 v, k)
236 def http_time_to_posix(http_time):
237 """Convert HTTP time format to posix time.
239 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
240 for http time format.
242 Args:
243 http_time: time in RFC 2616 format. e.g.
244 "Mon, 20 Nov 1995 19:12:08 GMT".
246 Returns:
247 A float of secs from unix epoch.
249 if http_time is not None:
250 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
253 def posix_time_to_http(posix_time):
254 """Convert posix time to HTML header time format.
256 Args:
257 posix_time: unix time.
259 Returns:
260 A datatime str in RFC 2616 format.
262 if posix_time:
263 return email_utils.formatdate(posix_time, usegmt=True)
267 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
270 def dt_str_to_posix(dt_str):
271 """format str to posix.
273 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
274 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
275 between date and time when they are on the same line.
276 Z indicates UTC (zero meridian).
278 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
280 This is used to parse LastModified node from GCS's GET bucket XML response.
282 Args:
283 dt_str: A datetime str.
285 Returns:
286 A float of secs from unix epoch. By posix definition, epoch is midnight
287 1970/1/1 UTC.
289 parsable, _ = dt_str.split('.')
290 dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
291 return calendar.timegm(dt.utctimetuple())
294 def posix_to_dt_str(posix):
295 """Reverse of str_to_datetime.
297 This is used by GCS stub to generate GET bucket XML response.
299 Args:
300 posix: A float of secs from unix epoch.
302 Returns:
303 A datetime str.
305 dt = datetime.datetime.utcfromtimestamp(posix)
306 dt_str = dt.strftime(_DT_FORMAT)
307 return dt_str + '.000Z'
310 def local_run():
311 """Whether running in dev appserver."""
312 return ('SERVER_SOFTWARE' not in os.environ or
313 os.environ['SERVER_SOFTWARE'].startswith('Development'))
316 def memory_usage(method):
317 """Log memory usage before and after a method."""
318 def wrapper(*args, **kwargs):
319 logging.info('Memory before method %s is %s.',
320 method.__name__, runtime.memory_usage().current())
321 result = method(*args, **kwargs)
322 logging.info('Memory after method %s is %s',
323 method.__name__, runtime.memory_usage().current())
324 return result
325 return wrapper